From f53c9a6b25e7b55920660bc711d1a1bdd1d1f787 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 1 Feb 2021 21:02:36 +0300
Subject: [PATCH 0001/1238] Fix "Block structure mismatch" for INSERT into
 Distributed

Add missing conversion (via ConvertingBlockInputStream) for INSERT into
remote nodes (for sync insert, async insert and async batch insert),
like for local nodes (in DistributedBlockOutputStream::writeBlockConverted).

This is required when the structure of the Distributed table differs
from the structure of the local table.

And also add a warning message, to highlight this in logs (since this
works slower).

Fixes: #19888
---
 src/Storages/Distributed/DirectoryMonitor.cpp | 45 +++++++++++++++----
 .../DistributedBlockOutputStream.cpp          | 42 +++++++++--------
 ..._INSERT_block_structure_mismatch.reference |  4 ++
 ...3_dist_INSERT_block_structure_mismatch.sql | 23 ++++++++++
 4 files changed, 86 insertions(+), 28 deletions(-)
 create mode 100644 tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.reference
 create mode 100644 tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.sql

diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index 8d1b9103357..bf15ca22ca9 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -1,5 +1,7 @@
 #include <DataStreams/RemoteBlockOutputStream.h>
 #include <DataStreams/NativeBlockInputStream.h>
+#include <DataStreams/ConvertingBlockInputStream.h>
+#include <DataStreams/OneBlockInputStream.h>
 #include <Common/escapeForFileName.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/StringUtils/StringUtils.h>
@@ -184,6 +186,37 @@ namespace
             return disk->getDirectorySyncGuard(path);
         return nullptr;
     }
+
+    void writeRemoteConvert(const DistributedHeader & header, RemoteBlockOutputStream & remote, ReadBufferFromFile & in, Poco::Logger * log)
+    {
+        if (remote.getHeader() && header.header != remote.getHeader().dumpStructure())
+        {
+            LOG_WARNING(log,
+                "Structure does not match (remote: {}, local: {}), implicit conversion will be done",
+                remote.getHeader().dumpStructure(), header.header);
+
+            CompressedReadBuffer decompressing_in(in);
+            /// Lack of header, requires to read blocks
+            NativeBlockInputStream block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION);
+
+            block_in.readPrefix();
+            while (Block block = block_in.read())
+            {
+                ConvertingBlockInputStream convert(
+                    std::make_shared<OneBlockInputStream>(block),
+                    remote.getHeader(),
+                    ConvertingBlockInputStream::MatchColumnsMode::Name);
+                auto adopted_block = convert.read();
+                remote.write(adopted_block);
+            }
+            block_in.readSuffix();
+        }
+        else
+        {
+            CheckingCompressedReadBuffer checking_in(in);
+            remote.writePrepared(checking_in);
+        }
+    }
 }
 
 
@@ -438,11 +471,8 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
         auto connection = pool->get(timeouts, &header.insert_settings);
         RemoteBlockOutputStream remote{*connection, timeouts,
             header.insert_query, header.insert_settings, header.client_info};
-
-        CheckingCompressedReadBuffer checking_in(in);
-
         remote.writePrefix();
-        remote.writePrepared(checking_in);
+        writeRemoteConvert(header, remote, in, log);
         remote.writeSuffix();
     }
     catch (const Exception & e)
@@ -560,7 +590,6 @@ struct StorageDistributedDirectoryMonitor::Batch
         try
         {
             std::unique_ptr<RemoteBlockOutputStream> remote;
-            bool first = true;
 
             for (UInt64 file_idx : file_indices)
             {
@@ -575,16 +604,14 @@ struct StorageDistributedDirectoryMonitor::Batch
                 ReadBufferFromFile in(file_path->second);
                 const auto & header = readDistributedHeader(in, parent.log);
 
-                if (first)
+                if (!remote)
                 {
-                    first = false;
                     remote = std::make_unique<RemoteBlockOutputStream>(*connection, timeouts,
                         header.insert_query, header.insert_settings, header.client_info);
                     remote->writePrefix();
                 }
 
-                CheckingCompressedReadBuffer checking_in(in);
-                remote->writePrepared(checking_in);
+                writeRemoteConvert(header, *remote, in, parent.log);
             }
 
             if (remote)
diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index d21764bbb7d..c698c0b18d5 100644
--- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -60,24 +60,26 @@ namespace ErrorCodes
     extern const int TIMEOUT_EXCEEDED;
 }
 
-static void writeBlockConvert(const BlockOutputStreamPtr & out, const Block & block, const size_t repeats)
+static Block adoptBlock(const Block & header, const Block & block, Poco::Logger * log)
 {
-    if (!blocksHaveEqualStructure(out->getHeader(), block))
-    {
-        ConvertingBlockInputStream convert(
-            std::make_shared<OneBlockInputStream>(block),
-            out->getHeader(),
-            ConvertingBlockInputStream::MatchColumnsMode::Name);
-        auto adopted_block = convert.read();
+    if (blocksHaveEqualStructure(header, block))
+        return block;
 
-        for (size_t i = 0; i < repeats; ++i)
-            out->write(adopted_block);
-    }
-    else
-    {
-        for (size_t i = 0; i < repeats; ++i)
-            out->write(block);
-    }
+    LOG_WARNING(log,
+        "Structure does not match (remote: {}, local: {}), implicit conversion will be done.",
+        header.dumpStructure(), block.dumpStructure());
+
+    ConvertingBlockInputStream convert(
+        std::make_shared<OneBlockInputStream>(block),
+        header,
+        ConvertingBlockInputStream::MatchColumnsMode::Name);
+    return convert.read();
+}
+static void writeBlockConvert(const BlockOutputStreamPtr & out, const Block & block, const size_t repeats, Poco::Logger * log)
+{
+    Block adopted_block = adoptBlock(out->getHeader(), block, log);
+    for (size_t i = 0; i < repeats; ++i)
+        out->write(adopted_block);
 }
 
 
@@ -343,7 +345,9 @@ DistributedBlockOutputStream::runWritingJob(DistributedBlockOutputStream::JobRep
             }
 
             CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
-            job.stream->write(shard_block);
+
+            Block adopted_shard_block = adoptBlock(job.stream->getHeader(), shard_block, log);
+            job.stream->write(adopted_shard_block);
         }
         else // local
         {
@@ -367,7 +371,7 @@ DistributedBlockOutputStream::runWritingJob(DistributedBlockOutputStream::JobRep
                 job.stream->writePrefix();
             }
 
-            writeBlockConvert(job.stream, shard_block, shard_info.getLocalNodeCount());
+            writeBlockConvert(job.stream, shard_block, shard_info.getLocalNodeCount(), log);
         }
 
         job.blocks_written += 1;
@@ -589,7 +593,7 @@ void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_
     auto block_io = interp.execute();
 
     block_io.out->writePrefix();
-    writeBlockConvert(block_io.out, block, repeats);
+    writeBlockConvert(block_io.out, block, repeats, log);
     block_io.out->writeSuffix();
 }
 
diff --git a/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.reference b/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.reference
new file mode 100644
index 00000000000..be589c9ceb0
--- /dev/null
+++ b/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.reference
@@ -0,0 +1,4 @@
+1
+1
+2
+2
diff --git a/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.sql b/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.sql
new file mode 100644
index 00000000000..eaf15ed9fd8
--- /dev/null
+++ b/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.sql
@@ -0,0 +1,23 @@
+DROP TABLE IF EXISTS tmp_01683;
+DROP TABLE IF EXISTS dist_01683;
+
+SET prefer_localhost_replica=0;
+-- To suppress "Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done."
+SET send_logs_level='error';
+
+CREATE TABLE tmp_01683 (n Int8) ENGINE=Memory;
+CREATE TABLE dist_01683 (n UInt64) Engine=Distributed(test_cluster_two_shards, currentDatabase(), tmp_01683, n);
+
+SET insert_distributed_sync=1;
+INSERT INTO dist_01683 VALUES (1),(2);
+
+SET insert_distributed_sync=0;
+INSERT INTO dist_01683 VALUES (1),(2);
+SYSTEM FLUSH DISTRIBUTED dist_01683;
+
+-- TODO: cover distributed_directory_monitor_batch_inserts=1
+
+SELECT * FROM tmp_01683 ORDER BY n;
+
+DROP TABLE tmp_01683;
+DROP TABLE dist_01683;

From 594c6b0dd4471117629a848a686a2dcb6fb4095e Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 1 Feb 2021 21:02:36 +0300
Subject: [PATCH 0002/1238] Suppress warnings in
 00967_insert_into_distributed_different_types

---
 .../00967_insert_into_distributed_different_types.sql          | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql
index 455fab694cd..6324c6a6c10 100644
--- a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql
+++ b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql
@@ -1,6 +1,9 @@
 DROP TABLE IF EXISTS dist_00967;
 DROP TABLE IF EXISTS underlying_00967;
 
+-- To suppress "Structure does not match (...), implicit conversion will be done." message
+SET send_logs_level='error';
+
 CREATE TABLE dist_00967 (key UInt64) Engine=Distributed('test_shard_localhost', currentDatabase(), underlying_00967);
 -- fails for TinyLog()/MergeTree()/... but not for Memory()
 CREATE TABLE underlying_00967 (key Nullable(UInt64)) Engine=TinyLog();

From edd79e3fafe79da1c124474c4b4d368c2e77d9cd Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 2 Feb 2021 01:10:08 +0300
Subject: [PATCH 0003/1238] Suppress warnings in
 01457_create_as_table_function_structure

---
 .../0_stateless/01457_create_as_table_function_structure.sql   | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/queries/0_stateless/01457_create_as_table_function_structure.sql b/tests/queries/0_stateless/01457_create_as_table_function_structure.sql
index 1c9c1e1ef44..9399f06220b 100644
--- a/tests/queries/0_stateless/01457_create_as_table_function_structure.sql
+++ b/tests/queries/0_stateless/01457_create_as_table_function_structure.sql
@@ -18,6 +18,9 @@ DROP TABLE tmp;
 DETACH DATABASE test_01457;
 ATTACH DATABASE test_01457;
 
+-- To suppress "Structure does not match (...), implicit conversion will be done." message
+SET send_logs_level='error';
+
 CREATE TABLE tmp (n Int8) ENGINE=Memory;
 INSERT INTO test_01457.tf_remote_explicit_structure VALUES ('42');
 SELECT * FROM tmp;

From 5070b8a76a68d690ea2bb0f378d9c2837ba356ae Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 2 Feb 2021 21:38:29 +0300
Subject: [PATCH 0004/1238] Update test_insert_distributed_async_send for
 recent block conversion changes

After the implicit conversion had been added, String and
Nullable(String) successfully converted, let's use UInt64 over
Nullable(String).
---
 .../test.py                                   | 49 ++++++++++---------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/tests/integration/test_insert_distributed_async_send/test.py b/tests/integration/test_insert_distributed_async_send/test.py
index 7f6a2887c3b..b469da4e2e1 100644
--- a/tests/integration/test_insert_distributed_async_send/test.py
+++ b/tests/integration/test_insert_distributed_async_send/test.py
@@ -175,38 +175,43 @@ def test_insert_distributed_async_send_different_header(batch):
     create_tables('insert_distributed_async_send_cluster_two_shards')
 
     node = get_node(batch)
-    node.query("INSERT INTO dist VALUES (0, '')", settings={
+    node.query("INSERT INTO dist VALUES (0, 'f')", settings={
         'prefer_localhost_replica': 0,
     })
-    node.query('ALTER TABLE dist MODIFY COLUMN value Nullable(String)')
-    node.query("INSERT INTO dist VALUES (2, '')", settings={
+    node.query('ALTER TABLE dist MODIFY COLUMN value UInt64')
+    node.query("INSERT INTO dist VALUES (2, 1)", settings={
         'prefer_localhost_replica': 0,
     })
 
+    n1.query('ALTER TABLE data MODIFY COLUMN value UInt64', settings={
+        'mutations_sync': 1,
+    })
+
     if batch:
-        # first batch with Nullable(String)
-        n1.query('ALTER TABLE data MODIFY COLUMN value Nullable(String)', settings={
-            'mutations_sync': 1,
-        })
-        # but only one batch will be sent
-        with pytest.raises(QueryRuntimeException, match=r"DB::Exception: Cannot convert: String to Nullable\(String\)\. Stack trace:"):
+        # but only one batch will be sent, and first is with UInt64 column, so
+        # one rows inserted, and for string ('f') exception will be throw.
+        with pytest.raises(QueryRuntimeException, match=r"DB::Exception: Cannot parse string 'f' as UInt64: syntax error at begin of string"):
             node.query('SYSTEM FLUSH DISTRIBUTED dist')
         assert int(n1.query('SELECT count() FROM data')) == 1
-        # second batch with String
-        n1.query('ALTER TABLE data MODIFY COLUMN value String', settings={
-            'mutations_sync': 1,
-        })
+        # but once underlying column String, implicit conversion will do the
+        # thing, and insert left batch.
+        n1.query("""
+        DROP TABLE data SYNC;
+        CREATE TABLE data (key Int, value String) Engine=MergeTree() ORDER BY key;
+        """)
         node.query('SYSTEM FLUSH DISTRIBUTED dist')
-        assert int(n1.query('SELECT count() FROM data')) == 2
-    else:
-        # first send with String
-        with pytest.raises(QueryRuntimeException, match=r"DB::Exception: Cannot convert: Nullable\(String\) to String\. Stack trace:"):
-            node.query('SYSTEM FLUSH DISTRIBUTED dist')
         assert int(n1.query('SELECT count() FROM data')) == 1
-        # second send with Nullable(String)
-        n1.query('ALTER TABLE data MODIFY COLUMN value Nullable(String)', settings={
-            'mutations_sync': 1,
-        })
+    else:
+        # first send with String ('f'), so zero rows will be inserted
+        with pytest.raises(QueryRuntimeException, match=r"DB::Exception: Cannot parse string 'f' as UInt64: syntax error at begin of string"):
+            node.query('SYSTEM FLUSH DISTRIBUTED dist')
+        assert int(n1.query('SELECT count() FROM data')) == 0
+        # but once underlying column String, implicit conversion will do the
+        # thing, and insert 2 rows (mixed UInt64 and String).
+        n1.query("""
+        DROP TABLE data SYNC;
+        CREATE TABLE data (key Int, value String) Engine=MergeTree() ORDER BY key;
+        """)
         node.query('SYSTEM FLUSH DISTRIBUTED dist')
         assert int(n1.query('SELECT count() FROM data')) == 2
 

From 22bedec33ebc84b768fddcae63c17ca2928ba547 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 3 Feb 2021 08:00:37 +0300
Subject: [PATCH 0005/1238] Add 01683_dist_INSERT_block_structure_mismatch into
 arcadia_skip_list

---
 tests/queries/0_stateless/arcadia_skip_list.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 5b8256bb5af..76be95863cb 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -199,3 +199,4 @@
 01675_data_type_coroutine
 01671_aggregate_function_group_bitmap_data
 01674_executable_dictionary_implicit_key
+01683_dist_INSERT_block_structure_mismatch

From d4210d04c16861ed6cbccd589e9c19fd4511c97d Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sun, 5 Apr 2020 15:18:51 +0300
Subject: [PATCH 0006/1238] databasereplicated constructor scratch

---
 src/Databases/DatabaseReplicated.cpp | 215 +++++++++++++++++++++++++++
 src/Databases/DatabaseReplicated.h   |  61 ++++++++
 2 files changed, 276 insertions(+)
 create mode 100644 src/Databases/DatabaseReplicated.cpp
 create mode 100644 src/Databases/DatabaseReplicated.h

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
new file mode 100644
index 00000000000..fd5f53a596c
--- /dev/null
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -0,0 +1,215 @@
+#include <iomanip>
+
+#include <Core/Settings.h>
+#include <Databases/DatabaseOnDisk.h>
+#include <Databases/DatabaseReplicated.h>
+#include <Databases/DatabasesCommon.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromFile.h>
+#include <IO/WriteHelpers.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/InterpreterCreateQuery.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Parsers/ASTSetQuery.h>
+#include <Parsers/ParserCreateQuery.h>
+#include <Parsers/formatAST.h>
+#include <Parsers/parseQuery.h>
+#include <Storages/StorageFactory.h>
+#include <TableFunctions/TableFunctionFactory.h>
+
+#include <Parsers/queryToString.h>
+
+#include <Poco/DirectoryIterator.h>
+#include <Poco/Event.h>
+#include <Common/Stopwatch.h>
+#include <Common/ThreadPool.h>
+#include <Common/escapeForFileName.h>
+#include <Common/quoteString.h>
+#include <Common/typeid_cast.h>
+#include <common/logger_useful.h>
+
+#include <Common/ZooKeeper/KeeperException.h>
+#include <Common/ZooKeeper/Types.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
+
+#include <ext/scope_guard.h>
+
+namespace DB
+{
+
+
+namespace ErrorCodes
+{
+    extern const int NO_ZOOKEEPER;
+}
+
+void DatabaseReplicated::setZooKeeper(zkutil::ZooKeeperPtr zookeeper)
+{
+    std::lock_guard lock(current_zookeeper_mutex);
+    current_zookeeper = zookeeper;
+}
+
+zkutil::ZooKeeperPtr DatabaseReplicated::tryGetZooKeeper() const
+{
+    std::lock_guard lock(current_zookeeper_mutex);
+    return current_zookeeper;
+}
+
+zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
+{
+    auto res = tryGetZooKeeper();
+    if (!res)
+        throw Exception("Cannot get ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
+    return res;
+}
+
+
+DatabaseReplicated::DatabaseReplicated(
+    const String & name_,
+    const String & metadata_path_,
+    const String & zookeeper_path_,
+    const String & replica_name_,
+    const Context & context_)
+    : DatabaseOrdinary(name_, metadata_path_, context_)
+    , zookeeper_path(zookeeper_path_)
+    , replica_name(replica_name_)
+{
+
+    if (!zookeeper_path.empty() && zookeeper_path.back() == '/')
+        zookeeper_path.resize(zookeeper_path.size() - 1);
+    /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
+    if (!zookeeper_path.empty() && zookeeper_path.front() != '/')
+        zookeeper_path = "/" + zookeeper_path;
+    replica_path = zookeeper_path + "/replicas/" + replica_name;
+
+    if (context_.hasZooKeeper()) {
+        current_zookeeper = context_.getZooKeeper();
+    }
+
+    if (!current_zookeeper)
+    {
+        // TODO wtf is attach
+        // if (!attach)
+            throw Exception("Can't create replicated table without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
+
+        /// Do not activate the replica. It will be readonly.
+        // TODO is it relevant for engines?
+        // LOG_ERROR(log, "No ZooKeeper: database will be in readonly mode.");
+        // TODO is_readonly = true;
+        // return;
+    }
+
+    // can the zk path exist and no metadata on disk be available at the same moment? if so, in such a case, the db instance must be restored.
+
+    current_zookeeper->createIfNotExists(zookeeper_path, String());
+    current_zookeeper->createIfNotExists(replica_path, String());
+    // TODO what to do?
+    // TODO createDatabaseIfNotExists ?
+    // TODO check database structure ?
+}
+
+void DatabaseReplicated::createTable(
+    const Context & context,
+    const String & table_name,
+    const StoragePtr & table,
+    const ASTPtr & query)
+{
+    // try
+    DatabaseOnDisk::createTable(context, table_name, table, query);
+
+    // replicated stuff
+    String statement = getObjectDefinitionFromCreateQuery(query);
+    auto zookeeper = getZooKeeper();
+    // TODO в чем прикол именно так создавать зиноды?
+    Coordination::Requests ops;
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "",
+        zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", metadata,
+        zkutil::CreateMode::Persistent));
+//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/columns", getColumns().toString(),
+//        zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "",
+        zkutil::CreateMode::Persistent));
+//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/blocks", "",
+//        zkutil::CreateMode::Persistent));
+//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/block_numbers", "",
+//        zkutil::CreateMode::Persistent));
+//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/nonincrement_block_numbers", "",
+//        zkutil::CreateMode::Persistent)); /// /nonincrement_block_numbers dir is unused, but is created nonetheless for backwards compatibility.
+    // TODO do we need a leader here? (probably yes) what is it gonna do?       
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/leader_election", "",
+        zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/temp", "",
+        zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "",
+        zkutil::CreateMode::Persistent));
+
+    Coordination::Responses responses;
+    auto code = zookeeper->tryMulti(ops, responses);
+    if (code && code != Coordination::ZNODEEXISTS)
+        throw Coordination::Exception(code);
+
+    // ...
+
+}
+
+
+void DatabaseReplicated::renameTable(
+        const Context & context,
+        const String & table_name,
+        IDatabase & to_database,
+        const String & to_table_name,
+        TableStructureWriteLockHolder & lock)
+{
+    // try
+    DatabaseOnDisk::renameTable(context, table_name, to_database, to_table_name, lock);
+    // replicated stuff
+    String statement = getObjectDefinitionFromCreateQuery(query);
+    // this one is fairly more complex
+}
+
+void DatabaseReplicated::removeTable(
+        const Context & context,
+        const String & table_name)
+{
+    // try
+    DatabaseOnDisk::removeTable(context, table_name);
+    // replicated stuff
+    String statement = getObjectDefinitionFromCreateQuery(query);
+    // ...
+}
+
+void DatabaseReplicated::drop(const Context & context)
+{
+    DatabaseOnDisk::drop(context);
+    // replicated stuff
+    String statement = getObjectDefinitionFromCreateQuery(query);
+    // should it be possible to recover after a drop. 
+    // if not, we can just delete all the zookeeper nodes starting from
+    // zookeeper path. does it work recursively? hope so...
+}
+
+void DatabaseOrdinary::loadStoredObjects(
+    Context & context,
+    bool has_force_restore_data_flag)
+{
+    syncReplicaState(context);
+    updateMetadata(context);
+
+    DatabaseOrdinary::loadStoredObjects(context, has_force_restore_data_flag);
+
+}
+
+// sync replica's zookeeper metadata
+void syncReplicaState(Context & context) {
+
+}
+
+// get the up to date metadata from zookeeper to local metadata dir
+// for replicated (only?) tables
+void updateMetadata(Context & context) {
+
+}
+
+}
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
new file mode 100644
index 00000000000..51f7763bb5a
--- /dev/null
+++ b/src/Databases/DatabaseReplicated.h
@@ -0,0 +1,61 @@
+#pragma once
+
+#include <Databases/DatabaseOrdinary.h>
+#include <Common/randomSeed.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
+
+namespace DB
+{
+/** Replicated database engine.
+  * It stores tables list using list of .sql files,
+  *  that contain declaration of table represented by SQL ATTACH TABLE query
+  *  and operation log in zookeeper
+  */
+class DatabaseReplicated : public DatabaseOrdinary
+{
+public:
+    DatabaseReplicated(const String & name_, const String & metadata_path_, const String & zookeeper_path_, const String & replica_name_, const Context & context);
+
+    String getEngineName() const override { return "Replicated"; }
+
+    void createTable(
+        const Context & context,
+        const String & table_name,
+        const StoragePtr & table,
+        const ASTPtr & query) override;
+
+    void removeTable(
+        const Context & context,
+        const String & table_name) override;
+
+    void renameTable(
+        const Context & context,
+        const String & table_name,
+        IDatabase & to_database,
+        const String & to_table_name,
+        TableStructureWriteLockHolder & lock) override;
+
+    void drop(const Context & context) override;
+
+    void loadStoredObjects(
+        Context & context,
+        bool has_force_restore_data_flag) override;
+
+private:
+    String zookeeper_path;
+    String replica_name;
+    String replica_path;
+
+    zkutil::ZooKeeperPtr current_zookeeper;        /// Use only the methods below.
+    mutable std::mutex current_zookeeper_mutex;    /// To recreate the session in the background thread.
+
+    zkutil::ZooKeeperPtr tryGetZooKeeper() const;
+    zkutil::ZooKeeperPtr getZooKeeper() const;
+    void setZooKeeper(zkutil::ZooKeeperPtr zookeeper);
+
+    void syncReplicaState(Context & context);
+
+    void updateMetadata(Context & context);
+};
+
+}

From 272e31188d9b76bc4680fccf3502e459c89d5956 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sun, 5 Apr 2020 16:06:21 +0300
Subject: [PATCH 0007/1238] databasereplicated add table functions prototype

---
 dbms/src/Databases/DatabaseReplicated.cpp | 156 ++++++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 dbms/src/Databases/DatabaseReplicated.cpp

diff --git a/dbms/src/Databases/DatabaseReplicated.cpp b/dbms/src/Databases/DatabaseReplicated.cpp
new file mode 100644
index 00000000000..704c678f366
--- /dev/null
+++ b/dbms/src/Databases/DatabaseReplicated.cpp
@@ -0,0 +1,156 @@
+#include <iomanip>
+
+#include <Core/Settings.h>
+#include <Databases/DatabaseOnDisk.h>
+#include <Databases/DatabaseReplicated.h>
+#include <Databases/DatabasesCommon.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromFile.h>
+#include <IO/WriteHelpers.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/InterpreterCreateQuery.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Parsers/ASTSetQuery.h>
+#include <Parsers/ParserCreateQuery.h>
+#include <Parsers/formatAST.h>
+#include <Parsers/parseQuery.h>
+#include <Storages/StorageFactory.h>
+#include <TableFunctions/TableFunctionFactory.h>
+
+#include <Parsers/queryToString.h>
+
+#include <Poco/DirectoryIterator.h>
+#include <Poco/Event.h>
+#include <Common/Stopwatch.h>
+#include <Common/ThreadPool.h>
+#include <Common/escapeForFileName.h>
+#include <Common/quoteString.h>
+#include <Common/typeid_cast.h>
+#include <common/logger_useful.h>
+
+#include <Common/ZooKeeper/KeeperException.h>
+#include <Common/ZooKeeper/Types.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
+
+#include <ext/scope_guard.h>
+
+namespace DB
+{
+
+
+namespace ErrorCodes
+{
+    extern const int NO_ZOOKEEPER;
+}
+
+void DatabaseReplicated::setZooKeeper(zkutil::ZooKeeperPtr zookeeper)
+{
+    std::lock_guard lock(current_zookeeper_mutex);
+    current_zookeeper = zookeeper;
+}
+
+zkutil::ZooKeeperPtr DatabaseReplicated::tryGetZooKeeper() const
+{
+    std::lock_guard lock(current_zookeeper_mutex);
+    return current_zookeeper;
+}
+
+zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
+{
+    auto res = tryGetZooKeeper();
+    if (!res)
+        throw Exception("Cannot get ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
+    return res;
+}
+
+
+DatabaseReplicated::DatabaseReplicated(
+    const String & name_,
+    const String & metadata_path_,
+    const String & zookeeper_path_,
+    const String & replica_name_,
+    const Context & context_)
+    : DatabaseOrdinary(name_, metadata_path_, context_)
+    , zookeeper_path(zookeeper_path_)
+    , replica_name(replica_name_)
+{
+
+    if (!zookeeper_path.empty() && zookeeper_path.back() == '/')
+        zookeeper_path.resize(zookeeper_path.size() - 1);
+    /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
+    if (!zookeeper_path.empty() && zookeeper_path.front() != '/')
+        zookeeper_path = "/" + zookeeper_path;
+    replica_path = zookeeper_path + "/replicas/" + replica_name;
+
+    if (context_.hasZooKeeper()) {
+        current_zookeeper = context_.getZooKeeper();
+    }
+
+    if (!current_zookeeper)
+    {
+        // TODO wtf is attach
+        // if (!attach)
+            throw Exception("Can't create replicated table without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
+
+        /// Do not activate the replica. It will be readonly.
+        // TODO is it relevant for engines?
+        // LOG_ERROR(log, "No ZooKeeper: database will be in readonly mode.");
+        // TODO is_readonly = true;
+        // return;
+    }
+    // getObjectDefinitionFromCreateQuery
+    // TODO what to do?
+    // TODO createDatabaseIfNotExists ?
+    // TODO check database structure ?
+}
+
+void DatabaseReplicated::createTable(
+    const Context & context,
+    const String & table_name,
+    const StoragePtr & table,
+    const ASTPtr & query)
+{
+    // try
+    DatabaseOnDisk::createTable(context, table_name, table, query);
+    // replicated stuff
+    String statement = getObjectDefinitionFromCreateQuery(query);
+    // ...
+
+}
+
+
+void DatabaseReplicated::renameTable(
+        const Context & context,
+        const String & table_name,
+        IDatabase & to_database,
+        const String & to_table_name,
+        TableStructureWriteLockHolder & lock)
+{
+    // try
+    DatabaseOnDisk::renameTable(context, table_name, to_database, to_table_name, lock);
+    // replicated stuff
+    String statement = getObjectDefinitionFromCreateQuery(query);
+    // ...
+}
+
+void DatabaseReplicated::removeTable(
+        const Context & context,
+        const String & table_name)
+{
+    // try
+    DatabaseOnDisk::removeTable(context, table_name);
+    // replicated stuff
+    String statement = getObjectDefinitionFromCreateQuery(query);
+    // ...
+}
+
+void DatabaseReplicated::drop(const Context & context)
+{
+    DatabaseOnDisk::drop(context);
+    // replicated stuff
+    String statement = getObjectDefinitionFromCreateQuery(query);
+    // ...
+}
+
+}

From edb871979a66ecd5d07346003360344e5fb51ff0 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Mon, 6 Apr 2020 14:29:45 +0300
Subject: [PATCH 0008/1238] add some zookeeper into the logic

---
 dbms/src/Databases/DatabaseReplicated.cpp | 40 +++++++++++++++++++++--
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/dbms/src/Databases/DatabaseReplicated.cpp b/dbms/src/Databases/DatabaseReplicated.cpp
index 704c678f366..31e28c320cb 100644
--- a/dbms/src/Databases/DatabaseReplicated.cpp
+++ b/dbms/src/Databases/DatabaseReplicated.cpp
@@ -99,7 +99,9 @@ DatabaseReplicated::DatabaseReplicated(
         // TODO is_readonly = true;
         // return;
     }
-    // getObjectDefinitionFromCreateQuery
+
+    current_zookeeper->createIfNotExists(zookeeper_path, String());
+    current_zookeeper->createIfNotExists(replica_path, String());
     // TODO what to do?
     // TODO createDatabaseIfNotExists ?
     // TODO check database structure ?
@@ -115,6 +117,36 @@ void DatabaseReplicated::createTable(
     DatabaseOnDisk::createTable(context, table_name, table, query);
     // replicated stuff
     String statement = getObjectDefinitionFromCreateQuery(query);
+    auto zookeeper = getZooKeeper();
+    // TODO в чем прикол именно так создавать зиноды?
+    Coordination::Requests ops;
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "",
+        zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", metadata,
+        zkutil::CreateMode::Persistent));
+//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/columns", getColumns().toString(),
+//        zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "",
+        zkutil::CreateMode::Persistent));
+//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/blocks", "",
+//        zkutil::CreateMode::Persistent));
+//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/block_numbers", "",
+//        zkutil::CreateMode::Persistent));
+//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/nonincrement_block_numbers", "",
+//        zkutil::CreateMode::Persistent)); /// /nonincrement_block_numbers dir is unused, but is created nonetheless for backwards compatibility.
+    // TODO do we need a leader here? (probably yes) what is it gonna do?       
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/leader_election", "",
+        zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/temp", "",
+        zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "",
+        zkutil::CreateMode::Persistent));
+
+    Coordination::Responses responses;
+    auto code = zookeeper->tryMulti(ops, responses);
+    if (code && code != Coordination::ZNODEEXISTS)
+        throw Coordination::Exception(code);
+
     // ...
 
 }
@@ -131,7 +163,7 @@ void DatabaseReplicated::renameTable(
     DatabaseOnDisk::renameTable(context, table_name, to_database, to_table_name, lock);
     // replicated stuff
     String statement = getObjectDefinitionFromCreateQuery(query);
-    // ...
+    // this one is fairly more complex
 }
 
 void DatabaseReplicated::removeTable(
@@ -150,7 +182,9 @@ void DatabaseReplicated::drop(const Context & context)
     DatabaseOnDisk::drop(context);
     // replicated stuff
     String statement = getObjectDefinitionFromCreateQuery(query);
-    // ...
+    // should it be possible to recover after a drop. 
+    // if not, we can just delete all the zookeeper nodes starting from
+    // zookeeper path. does it work recursively? hope so...
 }
 
 }

From e0f52965e5ebfbb01e7a502190bea17918e22754 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Fri, 24 Apr 2020 16:49:14 +0300
Subject: [PATCH 0009/1238] Add a comment with some thoughts

---
 dbms/src/Databases/DatabaseReplicated.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/dbms/src/Databases/DatabaseReplicated.cpp b/dbms/src/Databases/DatabaseReplicated.cpp
index 31e28c320cb..e18fc1db5f4 100644
--- a/dbms/src/Databases/DatabaseReplicated.cpp
+++ b/dbms/src/Databases/DatabaseReplicated.cpp
@@ -100,6 +100,8 @@ DatabaseReplicated::DatabaseReplicated(
         // return;
     }
 
+    // can the zk path exist and no metadata on disk be available at the same moment? if so, in such a case, the db instance must be restored.
+
     current_zookeeper->createIfNotExists(zookeeper_path, String());
     current_zookeeper->createIfNotExists(replica_path, String());
     // TODO what to do?
@@ -115,6 +117,7 @@ void DatabaseReplicated::createTable(
 {
     // try
     DatabaseOnDisk::createTable(context, table_name, table, query);
+
     // replicated stuff
     String statement = getObjectDefinitionFromCreateQuery(query);
     auto zookeeper = getZooKeeper();

From c1c132502c64d52e5867e3cc4ed6e3b2523567d8 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Fri, 24 Apr 2020 17:12:54 +0300
Subject: [PATCH 0010/1238] add prototypes of loadStoredObject and some
 relevant helpers in replicateddb

---
 dbms/src/Databases/DatabaseReplicated.cpp | 22 ++++++++
 dbms/src/Databases/DatabaseReplicated.h   | 61 +++++++++++++++++++++++
 2 files changed, 83 insertions(+)
 create mode 100644 dbms/src/Databases/DatabaseReplicated.h

diff --git a/dbms/src/Databases/DatabaseReplicated.cpp b/dbms/src/Databases/DatabaseReplicated.cpp
index e18fc1db5f4..fd5f53a596c 100644
--- a/dbms/src/Databases/DatabaseReplicated.cpp
+++ b/dbms/src/Databases/DatabaseReplicated.cpp
@@ -190,4 +190,26 @@ void DatabaseReplicated::drop(const Context & context)
     // zookeeper path. does it work recursively? hope so...
 }
 
+void DatabaseOrdinary::loadStoredObjects(
+    Context & context,
+    bool has_force_restore_data_flag)
+{
+    syncReplicaState(context);
+    updateMetadata(context);
+
+    DatabaseOrdinary::loadStoredObjects(context, has_force_restore_data_flag);
+
+}
+
+// sync replica's zookeeper metadata
+void syncReplicaState(Context & context) {
+
+}
+
+// get the up to date metadata from zookeeper to local metadata dir
+// for replicated (only?) tables
+void updateMetadata(Context & context) {
+
+}
+
 }
diff --git a/dbms/src/Databases/DatabaseReplicated.h b/dbms/src/Databases/DatabaseReplicated.h
new file mode 100644
index 00000000000..51f7763bb5a
--- /dev/null
+++ b/dbms/src/Databases/DatabaseReplicated.h
@@ -0,0 +1,61 @@
+#pragma once
+
+#include <Databases/DatabaseOrdinary.h>
+#include <Common/randomSeed.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
+
+namespace DB
+{
+/** Replicated database engine.
+  * It stores tables list using list of .sql files,
+  *  that contain declaration of table represented by SQL ATTACH TABLE query
+  *  and operation log in zookeeper
+  */
+class DatabaseReplicated : public DatabaseOrdinary
+{
+public:
+    DatabaseReplicated(const String & name_, const String & metadata_path_, const String & zookeeper_path_, const String & replica_name_, const Context & context);
+
+    String getEngineName() const override { return "Replicated"; }
+
+    void createTable(
+        const Context & context,
+        const String & table_name,
+        const StoragePtr & table,
+        const ASTPtr & query) override;
+
+    void removeTable(
+        const Context & context,
+        const String & table_name) override;
+
+    void renameTable(
+        const Context & context,
+        const String & table_name,
+        IDatabase & to_database,
+        const String & to_table_name,
+        TableStructureWriteLockHolder & lock) override;
+
+    void drop(const Context & context) override;
+
+    void loadStoredObjects(
+        Context & context,
+        bool has_force_restore_data_flag) override;
+
+private:
+    String zookeeper_path;
+    String replica_name;
+    String replica_path;
+
+    zkutil::ZooKeeperPtr current_zookeeper;        /// Use only the methods below.
+    mutable std::mutex current_zookeeper_mutex;    /// To recreate the session in the background thread.
+
+    zkutil::ZooKeeperPtr tryGetZooKeeper() const;
+    zkutil::ZooKeeperPtr getZooKeeper() const;
+    void setZooKeeper(zkutil::ZooKeeperPtr zookeeper);
+
+    void syncReplicaState(Context & context);
+
+    void updateMetadata(Context & context);
+};
+
+}

From 0d392bbb34c142f6871a2bd2ab699f5baa768780 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Wed, 29 Apr 2020 14:19:16 +0300
Subject: [PATCH 0011/1238] fix after rebase

---
 src/Databases/DatabaseFactory.cpp    | 17 +++++++++-
 src/Databases/DatabaseReplicated.cpp | 49 +++++++++++++++-------------
 src/Databases/DatabaseReplicated.h   |  7 ++--
 3 files changed, 47 insertions(+), 26 deletions(-)

diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp
index f27bc509ebe..0d7a711b530 100644
--- a/src/Databases/DatabaseFactory.cpp
+++ b/src/Databases/DatabaseFactory.cpp
@@ -1,4 +1,5 @@
 #include <Databases/DatabaseAtomic.h>
+#include <Databases/DatabaseReplicated.h>
 #include <Databases/DatabaseDictionary.h>
 #include <Databases/DatabaseFactory.h>
 #include <Databases/DatabaseLazy.h>
@@ -69,7 +70,7 @@ DatabasePtr DatabaseFactory::getImpl(
 {
     String engine_name = engine_define->engine->name;
 
-    if (engine_name != "MySQL" && engine_name != "Lazy" && engine_define->engine->arguments)
+    if (engine_name != "MySQL" && engine_name != "Lazy" && engine_name != "Replicated" && engine_define->engine->arguments)
         throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS);
 
     if (engine_define->engine->parameters || engine_define->partition_by || engine_define->primary_key || engine_define->order_by ||
@@ -138,6 +139,20 @@ DatabasePtr DatabaseFactory::getImpl(
         return std::make_shared<DatabaseLazy>(database_name, metadata_path, cache_expiration_time_seconds, context);
     }
 
+    else if (engine_name == "Replicated")
+    {
+        const ASTFunction * engine = engine_define->engine;
+
+        if (!engine->arguments || engine->arguments->children.size() != 2)
+            throw Exception("Replicated database requires zoo_path and replica_name arguments", ErrorCodes::BAD_ARGUMENTS);
+
+        const auto & arguments = engine->arguments->children;
+
+        const auto zoo_path = arguments[0]->as<ASTLiteral>()->value.safeGet<String>();
+        const auto replica_name = arguments[1]->as<ASTLiteral>()->value.safeGet<String>();
+        return std::make_shared<DatabaseReplicated>(database_name, metadata_path, zoo_path, replica_name, context);
+    }
+
     throw Exception("Unknown database engine: " + engine_name, ErrorCodes::UNKNOWN_DATABASE_ENGINE);
 }
 
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index fd5f53a596c..92af1c890c2 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -125,8 +125,8 @@ void DatabaseReplicated::createTable(
     Coordination::Requests ops;
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "",
         zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", metadata,
-        zkutil::CreateMode::Persistent));
+    //ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", metadata,
+        //zkutil::CreateMode::Persistent));
 //    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/columns", getColumns().toString(),
 //        zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "",
@@ -160,23 +160,24 @@ void DatabaseReplicated::renameTable(
         const String & table_name,
         IDatabase & to_database,
         const String & to_table_name,
-        TableStructureWriteLockHolder & lock)
+        bool exchange)
 {
     // try
-    DatabaseOnDisk::renameTable(context, table_name, to_database, to_table_name, lock);
-    // replicated stuff
-    String statement = getObjectDefinitionFromCreateQuery(query);
+    DatabaseOnDisk::renameTable(context, table_name, to_database, to_table_name, exchange);
+    // replicated stuff; what to put to a znode
+    // String statement = getObjectDefinitionFromCreateQuery(query);
     // this one is fairly more complex
 }
 
-void DatabaseReplicated::removeTable(
+void DatabaseReplicated::dropTable(
         const Context & context,
-        const String & table_name)
+        const String & table_name,
+        bool no_delay)
 {
     // try
-    DatabaseOnDisk::removeTable(context, table_name);
+    DatabaseOnDisk::dropTable(context, table_name, no_delay);
     // replicated stuff
-    String statement = getObjectDefinitionFromCreateQuery(query);
+    //String statement = getObjectDefinitionFromCreateQuery(query);
     // ...
 }
 
@@ -184,13 +185,26 @@ void DatabaseReplicated::drop(const Context & context)
 {
     DatabaseOnDisk::drop(context);
     // replicated stuff
-    String statement = getObjectDefinitionFromCreateQuery(query);
+    //String statement = getObjectDefinitionFromCreateQuery(query);
     // should it be possible to recover after a drop. 
     // if not, we can just delete all the zookeeper nodes starting from
     // zookeeper path. does it work recursively? hope so...
 }
 
-void DatabaseOrdinary::loadStoredObjects(
+// sync replica's zookeeper metadata
+void DatabaseReplicated::syncReplicaState(Context & context) {
+    auto c = context; // fixes unuser parameter error
+    return;
+}
+
+// get the up to date metadata from zookeeper to local metadata dir
+// for replicated (only?) tables
+void DatabaseReplicated::updateMetadata(Context & context) {
+    auto c = context; // fixes unuser parameter error
+    return;
+}
+
+void DatabaseReplicated::loadStoredObjects(
     Context & context,
     bool has_force_restore_data_flag)
 {
@@ -201,15 +215,6 @@ void DatabaseOrdinary::loadStoredObjects(
 
 }
 
-// sync replica's zookeeper metadata
-void syncReplicaState(Context & context) {
-
-}
-
-// get the up to date metadata from zookeeper to local metadata dir
-// for replicated (only?) tables
-void updateMetadata(Context & context) {
-
-}
+
 
 }
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 51f7763bb5a..bc1af923277 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -24,16 +24,17 @@ public:
         const StoragePtr & table,
         const ASTPtr & query) override;
 
-    void removeTable(
+    void dropTable(
         const Context & context,
-        const String & table_name) override;
+        const String & table_name,
+        bool no_delay) override;
 
     void renameTable(
         const Context & context,
         const String & table_name,
         IDatabase & to_database,
         const String & to_table_name,
-        TableStructureWriteLockHolder & lock) override;
+        bool exchange) override;
 
     void drop(const Context & context) override;
 

From 1cb96bf1762cc8b111f0cb58ed651059156442e2 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Wed, 29 Apr 2020 14:21:12 +0300
Subject: [PATCH 0012/1238] rm old files from nonexistant dir since the rebase

---
 dbms/src/Databases/DatabaseReplicated.cpp | 215 ----------------------
 dbms/src/Databases/DatabaseReplicated.h   |  61 ------
 2 files changed, 276 deletions(-)
 delete mode 100644 dbms/src/Databases/DatabaseReplicated.cpp
 delete mode 100644 dbms/src/Databases/DatabaseReplicated.h

diff --git a/dbms/src/Databases/DatabaseReplicated.cpp b/dbms/src/Databases/DatabaseReplicated.cpp
deleted file mode 100644
index fd5f53a596c..00000000000
--- a/dbms/src/Databases/DatabaseReplicated.cpp
+++ /dev/null
@@ -1,215 +0,0 @@
-#include <iomanip>
-
-#include <Core/Settings.h>
-#include <Databases/DatabaseOnDisk.h>
-#include <Databases/DatabaseReplicated.h>
-#include <Databases/DatabasesCommon.h>
-#include <IO/ReadBufferFromFile.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteBufferFromFile.h>
-#include <IO/WriteHelpers.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/InterpreterCreateQuery.h>
-#include <Parsers/ASTCreateQuery.h>
-#include <Parsers/ASTSetQuery.h>
-#include <Parsers/ParserCreateQuery.h>
-#include <Parsers/formatAST.h>
-#include <Parsers/parseQuery.h>
-#include <Storages/StorageFactory.h>
-#include <TableFunctions/TableFunctionFactory.h>
-
-#include <Parsers/queryToString.h>
-
-#include <Poco/DirectoryIterator.h>
-#include <Poco/Event.h>
-#include <Common/Stopwatch.h>
-#include <Common/ThreadPool.h>
-#include <Common/escapeForFileName.h>
-#include <Common/quoteString.h>
-#include <Common/typeid_cast.h>
-#include <common/logger_useful.h>
-
-#include <Common/ZooKeeper/KeeperException.h>
-#include <Common/ZooKeeper/Types.h>
-#include <Common/ZooKeeper/ZooKeeper.h>
-
-#include <ext/scope_guard.h>
-
-namespace DB
-{
-
-
-namespace ErrorCodes
-{
-    extern const int NO_ZOOKEEPER;
-}
-
-void DatabaseReplicated::setZooKeeper(zkutil::ZooKeeperPtr zookeeper)
-{
-    std::lock_guard lock(current_zookeeper_mutex);
-    current_zookeeper = zookeeper;
-}
-
-zkutil::ZooKeeperPtr DatabaseReplicated::tryGetZooKeeper() const
-{
-    std::lock_guard lock(current_zookeeper_mutex);
-    return current_zookeeper;
-}
-
-zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
-{
-    auto res = tryGetZooKeeper();
-    if (!res)
-        throw Exception("Cannot get ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
-    return res;
-}
-
-
-DatabaseReplicated::DatabaseReplicated(
-    const String & name_,
-    const String & metadata_path_,
-    const String & zookeeper_path_,
-    const String & replica_name_,
-    const Context & context_)
-    : DatabaseOrdinary(name_, metadata_path_, context_)
-    , zookeeper_path(zookeeper_path_)
-    , replica_name(replica_name_)
-{
-
-    if (!zookeeper_path.empty() && zookeeper_path.back() == '/')
-        zookeeper_path.resize(zookeeper_path.size() - 1);
-    /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
-    if (!zookeeper_path.empty() && zookeeper_path.front() != '/')
-        zookeeper_path = "/" + zookeeper_path;
-    replica_path = zookeeper_path + "/replicas/" + replica_name;
-
-    if (context_.hasZooKeeper()) {
-        current_zookeeper = context_.getZooKeeper();
-    }
-
-    if (!current_zookeeper)
-    {
-        // TODO wtf is attach
-        // if (!attach)
-            throw Exception("Can't create replicated table without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
-
-        /// Do not activate the replica. It will be readonly.
-        // TODO is it relevant for engines?
-        // LOG_ERROR(log, "No ZooKeeper: database will be in readonly mode.");
-        // TODO is_readonly = true;
-        // return;
-    }
-
-    // can the zk path exist and no metadata on disk be available at the same moment? if so, in such a case, the db instance must be restored.
-
-    current_zookeeper->createIfNotExists(zookeeper_path, String());
-    current_zookeeper->createIfNotExists(replica_path, String());
-    // TODO what to do?
-    // TODO createDatabaseIfNotExists ?
-    // TODO check database structure ?
-}
-
-void DatabaseReplicated::createTable(
-    const Context & context,
-    const String & table_name,
-    const StoragePtr & table,
-    const ASTPtr & query)
-{
-    // try
-    DatabaseOnDisk::createTable(context, table_name, table, query);
-
-    // replicated stuff
-    String statement = getObjectDefinitionFromCreateQuery(query);
-    auto zookeeper = getZooKeeper();
-    // TODO в чем прикол именно так создавать зиноды?
-    Coordination::Requests ops;
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "",
-        zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", metadata,
-        zkutil::CreateMode::Persistent));
-//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/columns", getColumns().toString(),
-//        zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "",
-        zkutil::CreateMode::Persistent));
-//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/blocks", "",
-//        zkutil::CreateMode::Persistent));
-//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/block_numbers", "",
-//        zkutil::CreateMode::Persistent));
-//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/nonincrement_block_numbers", "",
-//        zkutil::CreateMode::Persistent)); /// /nonincrement_block_numbers dir is unused, but is created nonetheless for backwards compatibility.
-    // TODO do we need a leader here? (probably yes) what is it gonna do?       
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/leader_election", "",
-        zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/temp", "",
-        zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "",
-        zkutil::CreateMode::Persistent));
-
-    Coordination::Responses responses;
-    auto code = zookeeper->tryMulti(ops, responses);
-    if (code && code != Coordination::ZNODEEXISTS)
-        throw Coordination::Exception(code);
-
-    // ...
-
-}
-
-
-void DatabaseReplicated::renameTable(
-        const Context & context,
-        const String & table_name,
-        IDatabase & to_database,
-        const String & to_table_name,
-        TableStructureWriteLockHolder & lock)
-{
-    // try
-    DatabaseOnDisk::renameTable(context, table_name, to_database, to_table_name, lock);
-    // replicated stuff
-    String statement = getObjectDefinitionFromCreateQuery(query);
-    // this one is fairly more complex
-}
-
-void DatabaseReplicated::removeTable(
-        const Context & context,
-        const String & table_name)
-{
-    // try
-    DatabaseOnDisk::removeTable(context, table_name);
-    // replicated stuff
-    String statement = getObjectDefinitionFromCreateQuery(query);
-    // ...
-}
-
-void DatabaseReplicated::drop(const Context & context)
-{
-    DatabaseOnDisk::drop(context);
-    // replicated stuff
-    String statement = getObjectDefinitionFromCreateQuery(query);
-    // should it be possible to recover after a drop. 
-    // if not, we can just delete all the zookeeper nodes starting from
-    // zookeeper path. does it work recursively? hope so...
-}
-
-void DatabaseOrdinary::loadStoredObjects(
-    Context & context,
-    bool has_force_restore_data_flag)
-{
-    syncReplicaState(context);
-    updateMetadata(context);
-
-    DatabaseOrdinary::loadStoredObjects(context, has_force_restore_data_flag);
-
-}
-
-// sync replica's zookeeper metadata
-void syncReplicaState(Context & context) {
-
-}
-
-// get the up to date metadata from zookeeper to local metadata dir
-// for replicated (only?) tables
-void updateMetadata(Context & context) {
-
-}
-
-}
diff --git a/dbms/src/Databases/DatabaseReplicated.h b/dbms/src/Databases/DatabaseReplicated.h
deleted file mode 100644
index 51f7763bb5a..00000000000
--- a/dbms/src/Databases/DatabaseReplicated.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#pragma once
-
-#include <Databases/DatabaseOrdinary.h>
-#include <Common/randomSeed.h>
-#include <Common/ZooKeeper/ZooKeeper.h>
-
-namespace DB
-{
-/** Replicated database engine.
-  * It stores tables list using list of .sql files,
-  *  that contain declaration of table represented by SQL ATTACH TABLE query
-  *  and operation log in zookeeper
-  */
-class DatabaseReplicated : public DatabaseOrdinary
-{
-public:
-    DatabaseReplicated(const String & name_, const String & metadata_path_, const String & zookeeper_path_, const String & replica_name_, const Context & context);
-
-    String getEngineName() const override { return "Replicated"; }
-
-    void createTable(
-        const Context & context,
-        const String & table_name,
-        const StoragePtr & table,
-        const ASTPtr & query) override;
-
-    void removeTable(
-        const Context & context,
-        const String & table_name) override;
-
-    void renameTable(
-        const Context & context,
-        const String & table_name,
-        IDatabase & to_database,
-        const String & to_table_name,
-        TableStructureWriteLockHolder & lock) override;
-
-    void drop(const Context & context) override;
-
-    void loadStoredObjects(
-        Context & context,
-        bool has_force_restore_data_flag) override;
-
-private:
-    String zookeeper_path;
-    String replica_name;
-    String replica_path;
-
-    zkutil::ZooKeeperPtr current_zookeeper;        /// Use only the methods below.
-    mutable std::mutex current_zookeeper_mutex;    /// To recreate the session in the background thread.
-
-    zkutil::ZooKeeperPtr tryGetZooKeeper() const;
-    zkutil::ZooKeeperPtr getZooKeeper() const;
-    void setZooKeeper(zkutil::ZooKeeperPtr zookeeper);
-
-    void syncReplicaState(Context & context);
-
-    void updateMetadata(Context & context);
-};
-
-}

From 8b0366ff4ff08d47b9ca7451ce33ca07683b0012 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Thu, 30 Apr 2020 19:15:27 +0300
Subject: [PATCH 0013/1238] an attempt to make something meaningful

---
 src/Databases/DatabaseReplicated.cpp | 91 ++++++++++++----------------
 1 file changed, 40 insertions(+), 51 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 92af1c890c2..d6bbec24791 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -71,7 +71,7 @@ DatabaseReplicated::DatabaseReplicated(
     const String & zookeeper_path_,
     const String & replica_name_,
     const Context & context_)
-    : DatabaseOrdinary(name_, metadata_path_, context_)
+    : DatabaseOrdinary(name_, metadata_path_, "data/", "DatabaseReplicated (" + name_ + ")", context_)
     , zookeeper_path(zookeeper_path_)
     , replica_name(replica_name_)
 {
@@ -89,24 +89,31 @@ DatabaseReplicated::DatabaseReplicated(
 
     if (!current_zookeeper)
     {
-        // TODO wtf is attach
-        // if (!attach)
-            throw Exception("Can't create replicated table without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
+            throw Exception("Can't create replicated database without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
+
 
-        /// Do not activate the replica. It will be readonly.
-        // TODO is it relevant for engines?
-        // LOG_ERROR(log, "No ZooKeeper: database will be in readonly mode.");
-        // TODO is_readonly = true;
-        // return;
     }
 
-    // can the zk path exist and no metadata on disk be available at the same moment? if so, in such a case, the db instance must be restored.
+    // test without this fancy mess (prob wont work)
+    current_zookeeper->createAncestors(replica_path);
+    current_zookeeper->createOrUpdate(replica_path, String(), zkutil::CreateMode::Persistent);
 
-    current_zookeeper->createIfNotExists(zookeeper_path, String());
-    current_zookeeper->createIfNotExists(replica_path, String());
-    // TODO what to do?
-    // TODO createDatabaseIfNotExists ?
-    // TODO check database structure ?
+//    if (!current_zookeeper->exists(zookeeper_path)) {
+//
+//        LOG_DEBUG(log, "Creating database " << zookeeper_path);
+//        current_zookeeper->createAncestors(zookeeper_path);
+
+        // Coordination::Requests ops;
+        // ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "",
+        //     zkutil::CreateMode::Persistent));
+        // ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "",
+        //     zkutil::CreateMode::Persistent));
+
+        // Coordination::Responses responses;
+        // auto code = current_zookeeper->tryMulti(ops, responses);
+        // if (code && code != Coordination::ZNODEEXISTS)
+        //     throw Coordination::Exception(code);
+        // }
 }
 
 void DatabaseReplicated::createTable(
@@ -115,43 +122,16 @@ void DatabaseReplicated::createTable(
     const StoragePtr & table,
     const ASTPtr & query)
 {
-    // try
+    // try?
     DatabaseOnDisk::createTable(context, table_name, table, query);
 
-    // replicated stuff
+    // suppose it worked
     String statement = getObjectDefinitionFromCreateQuery(query);
-    auto zookeeper = getZooKeeper();
-    // TODO в чем прикол именно так создавать зиноды?
-    Coordination::Requests ops;
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "",
-        zkutil::CreateMode::Persistent));
-    //ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", metadata,
-        //zkutil::CreateMode::Persistent));
-//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/columns", getColumns().toString(),
-//        zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "",
-        zkutil::CreateMode::Persistent));
-//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/blocks", "",
-//        zkutil::CreateMode::Persistent));
-//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/block_numbers", "",
-//        zkutil::CreateMode::Persistent));
-//    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/nonincrement_block_numbers", "",
-//        zkutil::CreateMode::Persistent)); /// /nonincrement_block_numbers dir is unused, but is created nonetheless for backwards compatibility.
-    // TODO do we need a leader here? (probably yes) what is it gonna do?       
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/leader_election", "",
-        zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/temp", "",
-        zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "",
-        zkutil::CreateMode::Persistent));
-
-    Coordination::Responses responses;
-    auto code = zookeeper->tryMulti(ops, responses);
-    if (code && code != Coordination::ZNODEEXISTS)
-        throw Coordination::Exception(code);
-
-    // ...
+    LOG_DEBUG(log, "CREATE TABLE STATEMENT " << statement);
 
+    // let's do dumb write to zk at the first iteration
+    current_zookeeper = getZooKeeper();
+    current_zookeeper->createOrUpdate(replica_path + "/" + table_name, statement, zkutil::CreateMode::Persistent);
 }
 
 
@@ -167,6 +147,14 @@ void DatabaseReplicated::renameTable(
     // replicated stuff; what to put to a znode
     // String statement = getObjectDefinitionFromCreateQuery(query);
     // this one is fairly more complex
+    current_zookeeper = getZooKeeper();
+
+    // no need for now to have stat
+    Coordination::Stat metadata_stat;
+    auto statement = current_zookeeper->get(replica_path + "/" + table_name, &metadata_stat);
+    current_zookeeper->createOrUpdate(replica_path + "/" + to_table_name, statement, zkutil::CreateMode::Persistent);
+    current_zookeeper->remove(replica_path + "/" + table_name);
+    // TODO add rename statement to the log
 }
 
 void DatabaseReplicated::dropTable(
@@ -176,9 +164,10 @@ void DatabaseReplicated::dropTable(
 {
     // try
     DatabaseOnDisk::dropTable(context, table_name, no_delay);
-    // replicated stuff
-    //String statement = getObjectDefinitionFromCreateQuery(query);
-    // ...
+
+    // let's do dumb remove from zk at the first iteration
+    current_zookeeper = getZooKeeper();
+    current_zookeeper->remove(replica_path + "/" + table_name);
 }
 
 void DatabaseReplicated::drop(const Context & context)

From 948bd1c5cc3f069aa621055611b81f484de49dad Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Thu, 30 Apr 2020 19:16:53 +0300
Subject: [PATCH 0014/1238] database replicated basic test (create and drop)

---
 .../01267_replicated_database_engine_zookeeper.sql   | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 tests/queries/0_stateless/01267_replicated_database_engine_zookeeper.sql

diff --git a/tests/queries/0_stateless/01267_replicated_database_engine_zookeeper.sql b/tests/queries/0_stateless/01267_replicated_database_engine_zookeeper.sql
new file mode 100644
index 00000000000..94b461e2f93
--- /dev/null
+++ b/tests/queries/0_stateless/01267_replicated_database_engine_zookeeper.sql
@@ -0,0 +1,12 @@
+DROP DATABASE IF EXISTS test_db1;
+DROP DATABASE IF EXISTS test_db2;
+DROP TABLE IF EXISTS test_table1;
+DROP TABLE IF EXISTS test_table2;
+
+CREATE DATABASE test_db1 ENGINE = Replicated('/clickhouse/databases/test1', 'id1');
+USE test_db1;
+CREATE TABLE test_table1  (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test1', 'id1', d, k, 8192);
+
+CREATE DATABASE test_db2 ENGINE = Replicated('/clickhouse/databases/test1', 'id2');
+USE test_db2;
+CREATE TABLE test_table2 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test1', 'id2', d, k, 8192);

From 0a4c1783a1ef45edc189e1cf19e2fdef1712e140 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Fri, 1 May 2020 16:16:02 +0300
Subject: [PATCH 0015/1238] Make drop work by fixing namespace bug

data dir wasn't set right. now it's fixed.
add non-replicated table to test sql
---
 src/Databases/DatabaseReplicated.cpp          | 19 ++++++++++---------
 ...7_replicated_database_engine_zookeeper.sql | 10 ++++------
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index d6bbec24791..61bcfc8d5a9 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -71,22 +71,24 @@ DatabaseReplicated::DatabaseReplicated(
     const String & zookeeper_path_,
     const String & replica_name_,
     const Context & context_)
-    : DatabaseOrdinary(name_, metadata_path_, "data/", "DatabaseReplicated (" + name_ + ")", context_)
+    : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseReplicated (" + name_ + ")", context_)
     , zookeeper_path(zookeeper_path_)
     , replica_name(replica_name_)
 {
+    LOG_DEBUG(log, "METADATA PATH ARGUMENT " << metadata_path_);
+    LOG_DEBUG(log, "METADATA PATH ACTUAL " << getMetadataPath());
 
     if (!zookeeper_path.empty() && zookeeper_path.back() == '/')
         zookeeper_path.resize(zookeeper_path.size() - 1);
-    /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
+    // If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
     if (!zookeeper_path.empty() && zookeeper_path.front() != '/')
         zookeeper_path = "/" + zookeeper_path;
+
     replica_path = zookeeper_path + "/replicas/" + replica_name;
 
     if (context_.hasZooKeeper()) {
         current_zookeeper = context_.getZooKeeper();
     }
-
     if (!current_zookeeper)
     {
             throw Exception("Can't create replicated database without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
@@ -95,6 +97,7 @@ DatabaseReplicated::DatabaseReplicated(
     }
 
     // test without this fancy mess (prob wont work)
+    // it works
     current_zookeeper->createAncestors(replica_path);
     current_zookeeper->createOrUpdate(replica_path, String(), zkutil::CreateMode::Persistent);
 
@@ -172,12 +175,10 @@ void DatabaseReplicated::dropTable(
 
 void DatabaseReplicated::drop(const Context & context)
 {
-    DatabaseOnDisk::drop(context);
-    // replicated stuff
-    //String statement = getObjectDefinitionFromCreateQuery(query);
-    // should it be possible to recover after a drop. 
-    // if not, we can just delete all the zookeeper nodes starting from
-    // zookeeper path. does it work recursively? hope so...
+    current_zookeeper = getZooKeeper();
+    current_zookeeper->remove(replica_path);
+
+    DatabaseOnDisk::drop(context); // no throw
 }
 
 // sync replica's zookeeper metadata
diff --git a/tests/queries/0_stateless/01267_replicated_database_engine_zookeeper.sql b/tests/queries/0_stateless/01267_replicated_database_engine_zookeeper.sql
index 94b461e2f93..c70de9a50d2 100644
--- a/tests/queries/0_stateless/01267_replicated_database_engine_zookeeper.sql
+++ b/tests/queries/0_stateless/01267_replicated_database_engine_zookeeper.sql
@@ -1,12 +1,10 @@
 DROP DATABASE IF EXISTS test_db1;
 DROP DATABASE IF EXISTS test_db2;
-DROP TABLE IF EXISTS test_table1;
-DROP TABLE IF EXISTS test_table2;
 
 CREATE DATABASE test_db1 ENGINE = Replicated('/clickhouse/databases/test1', 'id1');
-USE test_db1;
-CREATE TABLE test_table1  (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test1', 'id1', d, k, 8192);
+CREATE TABLE test_db1.replicated_table  (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test1', 'id1', d, k, 8192);
+CREATE TABLE test_db1.basic_table (EventDate Date, CounterID Int) engine=MergeTree(EventDate, (CounterID, EventDate), 8192);
 
 CREATE DATABASE test_db2 ENGINE = Replicated('/clickhouse/databases/test1', 'id2');
-USE test_db2;
-CREATE TABLE test_table2 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test1', 'id2', d, k, 8192);
+CREATE TABLE test_db2.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test1', 'id2', d, k, 8192);
+CREATE TABLE test_db2.basic_table (EventDate Date, CounterID Int) engine=MergeTree(EventDate, (CounterID, EventDate), 8192);

From 319256ef4f29b0e4d4d0f5034874961fbb64813d Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Tue, 5 May 2020 17:16:59 +0300
Subject: [PATCH 0016/1238] an attempt to replicated create query from create
 query

---
 src/Databases/DatabaseReplicated.cpp        | 198 +++++++++-----------
 src/Databases/DatabaseReplicated.h          |  60 +++---
 src/Databases/IDatabase.h                   |   4 +
 src/Interpreters/InterpreterCreateQuery.cpp |  15 +-
 4 files changed, 143 insertions(+), 134 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 61bcfc8d5a9..a1eb910dedf 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -2,6 +2,7 @@
 
 #include <Core/Settings.h>
 #include <Databases/DatabaseOnDisk.h>
+#include <Databases/DatabaseOrdinary.h>
 #include <Databases/DatabaseReplicated.h>
 #include <Databases/DatabasesCommon.h>
 #include <IO/ReadBufferFromFile.h>
@@ -70,8 +71,11 @@ DatabaseReplicated::DatabaseReplicated(
     const String & metadata_path_,
     const String & zookeeper_path_,
     const String & replica_name_,
-    const Context & context_)
-    : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseReplicated (" + name_ + ")", context_)
+    Context & context_)
+//    : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseReplicated (" + name_ + ")", context_)
+    // TODO add constructor to Atomic and call it here with path and logger name specification
+    // TODO ask why const and & are ommited in Atomic
+    : DatabaseAtomic(name_, metadata_path_, context_)
     , zookeeper_path(zookeeper_path_)
     , replica_name(replica_name_)
 {
@@ -96,115 +100,97 @@ DatabaseReplicated::DatabaseReplicated(
 
     }
 
-    // test without this fancy mess (prob wont work)
-    // it works
-    current_zookeeper->createAncestors(replica_path);
-    current_zookeeper->createOrUpdate(replica_path, String(), zkutil::CreateMode::Persistent);
+    current_zookeeper->createAncestors(zookeeper_path);
+    current_zookeeper->createOrUpdate(zookeeper_path, String(), zkutil::CreateMode::Persistent);
 
-//    if (!current_zookeeper->exists(zookeeper_path)) {
-//
-//        LOG_DEBUG(log, "Creating database " << zookeeper_path);
-//        current_zookeeper->createAncestors(zookeeper_path);
-
-        // Coordination::Requests ops;
-        // ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "",
-        //     zkutil::CreateMode::Persistent));
-        // ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "",
-        //     zkutil::CreateMode::Persistent));
-
-        // Coordination::Responses responses;
-        // auto code = current_zookeeper->tryMulti(ops, responses);
-        // if (code && code != Coordination::ZNODEEXISTS)
-        //     throw Coordination::Exception(code);
-        // }
-}
-
-void DatabaseReplicated::createTable(
-    const Context & context,
-    const String & table_name,
-    const StoragePtr & table,
-    const ASTPtr & query)
-{
-    // try?
-    DatabaseOnDisk::createTable(context, table_name, table, query);
-
-    // suppose it worked
-    String statement = getObjectDefinitionFromCreateQuery(query);
-    LOG_DEBUG(log, "CREATE TABLE STATEMENT " << statement);
-
-    // let's do dumb write to zk at the first iteration
-    current_zookeeper = getZooKeeper();
-    current_zookeeper->createOrUpdate(replica_path + "/" + table_name, statement, zkutil::CreateMode::Persistent);
+    // TODO launch a worker here
 }
 
 
-void DatabaseReplicated::renameTable(
-        const Context & context,
-        const String & table_name,
-        IDatabase & to_database,
-        const String & to_table_name,
-        bool exchange)
-{
-    // try
-    DatabaseOnDisk::renameTable(context, table_name, to_database, to_table_name, exchange);
-    // replicated stuff; what to put to a znode
-    // String statement = getObjectDefinitionFromCreateQuery(query);
-    // this one is fairly more complex
-    current_zookeeper = getZooKeeper();
-
-    // no need for now to have stat
-    Coordination::Stat metadata_stat;
-    auto statement = current_zookeeper->get(replica_path + "/" + table_name, &metadata_stat);
-    current_zookeeper->createOrUpdate(replica_path + "/" + to_table_name, statement, zkutil::CreateMode::Persistent);
-    current_zookeeper->remove(replica_path + "/" + table_name);
-    // TODO add rename statement to the log
+void DatabaseReplicated::propose(const ASTPtr & query) {
+    LOG_DEBUG(log, "PROPOSING\n" << queryToString(query));
 }
 
-void DatabaseReplicated::dropTable(
-        const Context & context,
-        const String & table_name,
-        bool no_delay)
-{
-    // try
-    DatabaseOnDisk::dropTable(context, table_name, no_delay);
-
-    // let's do dumb remove from zk at the first iteration
-    current_zookeeper = getZooKeeper();
-    current_zookeeper->remove(replica_path + "/" + table_name);
-}
-
-void DatabaseReplicated::drop(const Context & context)
-{
-    current_zookeeper = getZooKeeper();
-    current_zookeeper->remove(replica_path);
-
-    DatabaseOnDisk::drop(context); // no throw
-}
-
-// sync replica's zookeeper metadata
-void DatabaseReplicated::syncReplicaState(Context & context) {
-    auto c = context; // fixes unuser parameter error
-    return;
-}
-
-// get the up to date metadata from zookeeper to local metadata dir
-// for replicated (only?) tables
-void DatabaseReplicated::updateMetadata(Context & context) {
-    auto c = context; // fixes unuser parameter error
-    return;
-}
-
-void DatabaseReplicated::loadStoredObjects(
-    Context & context,
-    bool has_force_restore_data_flag)
-{
-    syncReplicaState(context);
-    updateMetadata(context);
-
-    DatabaseOrdinary::loadStoredObjects(context, has_force_restore_data_flag);
-
-}
-
-
+// void DatabaseReplicated::createTable(
+//     const Context & context,
+//     const String & table_name,
+//     const StoragePtr & table,
+//     const ASTPtr & query)
+// {
+//     LOG_DEBUG(log, "CREATE TABLE");
+// 
+// 
+//     DatabaseOnDisk::createTable(context, table_name, table, query);
+// 
+//     // String statement = getObjectDefinitionFromCreateQuery(query);
+// 
+//     // current_zookeeper = getZooKeeper();
+//     // current_zookeeper->createOrUpdate(replica_path + "/" + table_name + ".sql", statement, zkutil::CreateMode::Persistent);
+//     return;
+// }
+// 
+// 
+// void DatabaseReplicated::renameTable(
+//         const Context & context,
+//         const String & table_name,
+//         IDatabase & to_database,
+//         const String & to_table_name,
+//         bool exchange)
+// {
+//     LOG_DEBUG(log, "RENAME TABLE");
+//     DatabaseAtomic::renameTable(context, table_name, to_database, to_table_name, exchange);
+//     // try
+//     // DatabaseOnDisk::renameTable(context, table_name, to_database, to_table_name, exchange);
+//     // replicated stuff; what to put to a znode
+//     // String statement = getObjectDefinitionFromCreateQuery(query);
+//     // this one is fairly more complex
+//     // current_zookeeper = getZooKeeper();
+// 
+//     // no need for now to have stat
+//     // Coordination::Stat metadata_stat;
+//     // auto statement = current_zookeeper->get(replica_path + "/" + table_name, &metadata_stat);
+//     // current_zookeeper->createOrUpdate(replica_path + "/" + to_table_name, statement, zkutil::CreateMode::Persistent);
+//     // current_zookeeper->remove(replica_path + "/" + table_name);
+//     // TODO add rename statement to the log
+//     return;
+// }
+// 
+// void DatabaseReplicated::dropTable(
+//         const Context & context,
+//         const String & table_name,
+//         bool no_delay)
+// {
+//     LOG_DEBUG(log, "DROP TABLE");
+//     DatabaseAtomic::dropTable(context, table_name, no_delay);
+//     // try
+//     // DatabaseOnDisk::dropTable(context, table_name, no_delay);
+// 
+//     // let's do dumb remove from zk at the first iteration
+//     // current_zookeeper = getZooKeeper();
+//     // current_zookeeper->remove(replica_path + "/" + table_name);
+//     return;
+// }
+// 
+// void DatabaseReplicated::drop(const Context & context)
+// {
+//     LOG_DEBUG(log, "DROP");
+//     DatabaseAtomic::drop(context);
+//     // current_zookeeper = getZooKeeper();
+//     // current_zookeeper->remove(replica_path);
+// 
+//     // DatabaseOnDisk::drop(context); // no throw
+//     return;
+// }
+// 
+// void DatabaseReplicated::loadStoredObjects(
+//     Context & context,
+//     bool has_force_restore_data_flag)
+// {
+//     DatabaseOrdinary::loadStoredObjects(context, has_force_restore_data_flag);
+//     // launch a worker maybe. i don't know
+//     // DatabaseAtomic::loadStoredObjects(context, has_force_restore_data_flag);
+// 
+//     return;
+// }
 
 }
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index bc1af923277..df6f86c1491 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Databases/DatabaseOrdinary.h>
+#include <Databases/DatabaseAtomic.h>
 #include <Common/randomSeed.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 
@@ -11,36 +11,47 @@ namespace DB
   *  that contain declaration of table represented by SQL ATTACH TABLE query
   *  and operation log in zookeeper
   */
-class DatabaseReplicated : public DatabaseOrdinary
+class DatabaseReplicated : public DatabaseAtomic
 {
 public:
-    DatabaseReplicated(const String & name_, const String & metadata_path_, const String & zookeeper_path_, const String & replica_name_, const Context & context);
+    DatabaseReplicated(const String & name_, const String & metadata_path_, const String & zookeeper_path_, const String & replica_name_, Context & context);
+
+//    void drop(const Context & context) override;
 
     String getEngineName() const override { return "Replicated"; }
 
-    void createTable(
-        const Context & context,
-        const String & table_name,
-        const StoragePtr & table,
-        const ASTPtr & query) override;
+    void propose(const ASTPtr & query) override;
 
-    void dropTable(
-        const Context & context,
-        const String & table_name,
-        bool no_delay) override;
+//    void createTable(
+//        const Context & context,
+//        const String & table_name,
+//        const StoragePtr & table,
+//        const ASTPtr & query) override;
+//
+//    void dropTable(
+//        const Context & context,
+//        const String & table_name,
+//        bool no_delay) override;
+//
+//    void renameTable(
+//        const Context & context,
+//        const String & table_name,
+//        IDatabase & to_database,
+//        const String & to_table_name,
+//        bool exchange) override;
+//
+//    void alterTable(
+//        const Context & context,
+//        const StorageID & table_id,
+//        const StorageInMemoryMetadata & metadata) override;
 
-    void renameTable(
-        const Context & context,
-        const String & table_name,
-        IDatabase & to_database,
-        const String & to_table_name,
-        bool exchange) override;
+//    void attachTable(const String & name, const StoragePtr & table, const String & relative_table_path) override;
+//
+//    StoragePtr detachTable(const String & name) override;
 
-    void drop(const Context & context) override;
-
-    void loadStoredObjects(
-        Context & context,
-        bool has_force_restore_data_flag) override;
+//    void loadStoredObjects(
+//        Context & context,
+//        bool has_force_restore_data_flag) override;
 
 private:
     String zookeeper_path;
@@ -54,9 +65,6 @@ private:
     zkutil::ZooKeeperPtr getZooKeeper() const;
     void setZooKeeper(zkutil::ZooKeeperPtr zookeeper);
 
-    void syncReplicaState(Context & context);
-
-    void updateMetadata(Context & context);
 };
 
 }
diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index 26b27045be6..18265b153cf 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -161,6 +161,10 @@ public:
     /// Is the database empty.
     virtual bool empty() const = 0;
 
+    virtual void propose(const ASTPtr & /*query*/) {
+        throw Exception("There is no propose query method for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+    }
+
     /// Add the table to the database. Record its presence in the metadata.
     virtual void createTable(
         const Context & /*context*/,
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 3e09d728c4c..99c021a72fa 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -622,7 +622,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
     if (need_add_to_database)
     {
         database = DatabaseCatalog::instance().getDatabase(create.database);
-        if (database->getEngineName() == "Atomic")
+        if (database->getEngineName() == "Atomic" || database->getEngineName() == "Replicated")
         {
             /// TODO implement ATTACH FROM 'path/to/data': generate UUID and move table data to store/
             if (create.attach && create.uuid == UUIDHelpers::Nil)
@@ -696,7 +696,18 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
             false);
     }
 
-    database->createTable(context, table_name, res, query_ptr);
+    
+    if (database->getEngineName() == "Replicated") {
+        // propose
+        // try to 
+        database->propose(query_ptr);
+        database->createTable(context, table_name, res, query_ptr);
+        // catch
+        // throw and remove proposal
+        // otherwise 
+        // proceed (commit to zk)
+    } else
+        database->createTable(context, table_name, res, query_ptr);
 
     /// We must call "startup" and "shutdown" while holding DDLGuard.
     /// Because otherwise method "shutdown" (from InterpreterDropQuery) can be called before startup

From 0a860c0c2ba760bf8c6ea45378acc0f00cb2bcff Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Mon, 11 May 2020 15:55:17 +0300
Subject: [PATCH 0017/1238] log based replicated

---
 src/Databases/DatabaseReplicated.cpp        | 177 ++++++++++----------
 src/Databases/DatabaseReplicated.h          |  57 +++----
 src/Interpreters/ClientInfo.h               |   1 +
 src/Interpreters/Context.h                  |   3 +
 src/Interpreters/DDLWorker.cpp              |   3 +-
 src/Interpreters/InterpreterAlterQuery.cpp  |   9 +
 src/Interpreters/InterpreterCreateQuery.cpp |   4 +-
 src/Interpreters/InterpreterDropQuery.cpp   |   6 +
 src/Interpreters/InterpreterRenameQuery.cpp |   6 +-
 9 files changed, 142 insertions(+), 124 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index a1eb910dedf..1bc954bfb76 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -6,11 +6,13 @@
 #include <Databases/DatabaseReplicated.h>
 #include <Databases/DatabasesCommon.h>
 #include <IO/ReadBufferFromFile.h>
+#include <IO/ReadBufferFromString.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/InterpreterCreateQuery.h>
+#include <Interpreters/executeQuery.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTSetQuery.h>
 #include <Parsers/ParserCreateQuery.h>
@@ -24,6 +26,7 @@
 #include <Poco/DirectoryIterator.h>
 #include <Poco/Event.h>
 #include <Common/Stopwatch.h>
+#include <Common/setThreadName.h>
 #include <Common/ThreadPool.h>
 #include <Common/escapeForFileName.h>
 #include <Common/quoteString.h>
@@ -33,8 +36,10 @@
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/ZooKeeper/Types.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
+#include <Common/ZooKeeper/Lock.h>
 
 #include <ext/scope_guard.h>
+#include <common/sleep.h>
 
 namespace DB
 {
@@ -75,13 +80,11 @@ DatabaseReplicated::DatabaseReplicated(
 //    : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseReplicated (" + name_ + ")", context_)
     // TODO add constructor to Atomic and call it here with path and logger name specification
     // TODO ask why const and & are ommited in Atomic
-    : DatabaseAtomic(name_, metadata_path_, context_)
+    : DatabaseOrdinary(name_, metadata_path_, context_)
+    , context(context_)
     , zookeeper_path(zookeeper_path_)
     , replica_name(replica_name_)
 {
-    LOG_DEBUG(log, "METADATA PATH ARGUMENT " << metadata_path_);
-    LOG_DEBUG(log, "METADATA PATH ACTUAL " << getMetadataPath());
-
     if (!zookeeper_path.empty() && zookeeper_path.back() == '/')
         zookeeper_path.resize(zookeeper_path.size() - 1);
     // If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
@@ -103,94 +106,96 @@ DatabaseReplicated::DatabaseReplicated(
     current_zookeeper->createAncestors(zookeeper_path);
     current_zookeeper->createOrUpdate(zookeeper_path, String(), zkutil::CreateMode::Persistent);
 
+    // TODO if no last_entry then make it equal to 0 in zk;
+
     // TODO launch a worker here
+
+    main_thread = ThreadFromGlobalPool(&DatabaseReplicated::runMainThread, this);
+}
+
+DatabaseReplicated::~DatabaseReplicated()
+{
+    stop_flag = true;
+    main_thread.join();
+}
+
+void DatabaseReplicated::runMainThread() {
+    setThreadName("ReplctdWorker"); // ok whatever. 15 bytes // + database_name);
+    LOG_DEBUG(log, "Started " << database_name << " database worker thread\n Replica: " << replica_name);
+
+    while (!stop_flag) {
+        attachToThreadGroup();
+
+        sleepForSeconds(10);
+        current_zookeeper = getZooKeeper();
+        String last_n = current_zookeeper->get(zookeeper_path + "/last_entry", {}, NULL);
+        size_t last_n_parsed = parse<size_t>(last_n);
+        while (current_log_entry_n < last_n_parsed) {
+            current_log_entry_n++;
+            executeLog(current_log_entry_n);
+        }
+        break; // debug purpose 
+    }
+}
+
+void DatabaseReplicated::executeLog(size_t n) {
+
+        LOG_DEBUG(log, "EXECUTING LOG! DB: " << database_name << "\n Replica: " << replica_name << "LOG N" << n);
+        current_context = std::make_unique<Context>(context);
+        current_context->from_replicated_log = true;
+        current_context->setCurrentQueryId(""); // generate random query_id
+        current_zookeeper = getZooKeeper();
+
+        String query_to_execute = current_zookeeper->get(zookeeper_path + "/log." + std::to_string(n), {}, NULL);
+        ReadBufferFromString istr(query_to_execute);
+        String dummy_string;
+        WriteBufferFromString ostr(dummy_string);
+        executeQuery(istr, ostr, false, context, {});
+}
+
+// TODO we might not need it here at all
+void DatabaseReplicated::attachToThreadGroup() {
+    if (thread_group)
+    {
+        /// Put all threads to one thread pool
+        CurrentThread::attachToIfDetached(thread_group);
+    }
+    else
+    {
+        CurrentThread::initializeQuery();
+        thread_group = CurrentThread::getGroup();
+    }
+}
+
+// taken from ddlworker
+static std::unique_ptr<zkutil::Lock> createSimpleZooKeeperLock(
+    const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & lock_prefix, const String & lock_name, const String & lock_message)
+{
+    auto zookeeper_holder = std::make_shared<zkutil::ZooKeeperHolder>();
+    zookeeper_holder->initFromInstance(zookeeper);
+    return std::make_unique<zkutil::Lock>(std::move(zookeeper_holder), lock_prefix, lock_name, lock_message);
 }
 
 
 void DatabaseReplicated::propose(const ASTPtr & query) {
+    // TODO if source is zk then omit propose. Throw?
+    
+    // TODO remove that log message i think
     LOG_DEBUG(log, "PROPOSING\n" << queryToString(query));
+
+    current_zookeeper = getZooKeeper();
+    auto lock = createSimpleZooKeeperLock(current_zookeeper, zookeeper_path, "lock", replica_name);
+
+    // TODO check that last_entry is the same as current_log_entry_n for the replica
+
+    current_log_entry_n++; // starting from 1
+    String log_entry = zookeeper_path + "/log." + std::to_string(current_log_entry_n);
+    current_zookeeper->createOrUpdate(log_entry, queryToString(query), zkutil::CreateMode::Persistent);
+
+    current_zookeeper->createOrUpdate(zookeeper_path + "/last_entry", std::to_string(current_log_entry_n), zkutil::CreateMode::Persistent);
+
+    lock->unlock();
+    // write to metastore the last entry?
 }
 
-// void DatabaseReplicated::createTable(
-//     const Context & context,
-//     const String & table_name,
-//     const StoragePtr & table,
-//     const ASTPtr & query)
-// {
-//     LOG_DEBUG(log, "CREATE TABLE");
-// 
-// 
-//     DatabaseOnDisk::createTable(context, table_name, table, query);
-// 
-//     // String statement = getObjectDefinitionFromCreateQuery(query);
-// 
-//     // current_zookeeper = getZooKeeper();
-//     // current_zookeeper->createOrUpdate(replica_path + "/" + table_name + ".sql", statement, zkutil::CreateMode::Persistent);
-//     return;
-// }
-// 
-// 
-// void DatabaseReplicated::renameTable(
-//         const Context & context,
-//         const String & table_name,
-//         IDatabase & to_database,
-//         const String & to_table_name,
-//         bool exchange)
-// {
-//     LOG_DEBUG(log, "RENAME TABLE");
-//     DatabaseAtomic::renameTable(context, table_name, to_database, to_table_name, exchange);
-//     // try
-//     // DatabaseOnDisk::renameTable(context, table_name, to_database, to_table_name, exchange);
-//     // replicated stuff; what to put to a znode
-//     // String statement = getObjectDefinitionFromCreateQuery(query);
-//     // this one is fairly more complex
-//     // current_zookeeper = getZooKeeper();
-// 
-//     // no need for now to have stat
-//     // Coordination::Stat metadata_stat;
-//     // auto statement = current_zookeeper->get(replica_path + "/" + table_name, &metadata_stat);
-//     // current_zookeeper->createOrUpdate(replica_path + "/" + to_table_name, statement, zkutil::CreateMode::Persistent);
-//     // current_zookeeper->remove(replica_path + "/" + table_name);
-//     // TODO add rename statement to the log
-//     return;
-// }
-// 
-// void DatabaseReplicated::dropTable(
-//         const Context & context,
-//         const String & table_name,
-//         bool no_delay)
-// {
-//     LOG_DEBUG(log, "DROP TABLE");
-//     DatabaseAtomic::dropTable(context, table_name, no_delay);
-//     // try
-//     // DatabaseOnDisk::dropTable(context, table_name, no_delay);
-// 
-//     // let's do dumb remove from zk at the first iteration
-//     // current_zookeeper = getZooKeeper();
-//     // current_zookeeper->remove(replica_path + "/" + table_name);
-//     return;
-// }
-// 
-// void DatabaseReplicated::drop(const Context & context)
-// {
-//     LOG_DEBUG(log, "DROP");
-//     DatabaseAtomic::drop(context);
-//     // current_zookeeper = getZooKeeper();
-//     // current_zookeeper->remove(replica_path);
-// 
-//     // DatabaseOnDisk::drop(context); // no throw
-//     return;
-// }
-// 
-// void DatabaseReplicated::loadStoredObjects(
-//     Context & context,
-//     bool has_force_restore_data_flag)
-// {
-//     DatabaseOrdinary::loadStoredObjects(context, has_force_restore_data_flag);
-//     // launch a worker maybe. i don't know
-//     // DatabaseAtomic::loadStoredObjects(context, has_force_restore_data_flag);
-// 
-//     return;
-// }
-
 }
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index df6f86c1491..d61f0a00ef8 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -1,9 +1,12 @@
 #pragma once
 
-#include <Databases/DatabaseAtomic.h>
+#include <Databases/DatabaseOrdinary.h>
 #include <Common/randomSeed.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 
+#include <atomic>
+#include <thread>
+
 namespace DB
 {
 /** Replicated database engine.
@@ -11,49 +14,35 @@ namespace DB
   *  that contain declaration of table represented by SQL ATTACH TABLE query
   *  and operation log in zookeeper
   */
-class DatabaseReplicated : public DatabaseAtomic
+class DatabaseReplicated : public DatabaseOrdinary
 {
 public:
     DatabaseReplicated(const String & name_, const String & metadata_path_, const String & zookeeper_path_, const String & replica_name_, Context & context);
 
-//    void drop(const Context & context) override;
+    ~DatabaseReplicated();
 
     String getEngineName() const override { return "Replicated"; }
 
     void propose(const ASTPtr & query) override;
 
-//    void createTable(
-//        const Context & context,
-//        const String & table_name,
-//        const StoragePtr & table,
-//        const ASTPtr & query) override;
-//
-//    void dropTable(
-//        const Context & context,
-//        const String & table_name,
-//        bool no_delay) override;
-//
-//    void renameTable(
-//        const Context & context,
-//        const String & table_name,
-//        IDatabase & to_database,
-//        const String & to_table_name,
-//        bool exchange) override;
-//
-//    void alterTable(
-//        const Context & context,
-//        const StorageID & table_id,
-//        const StorageInMemoryMetadata & metadata) override;
-
-//    void attachTable(const String & name, const StoragePtr & table, const String & relative_table_path) override;
-//
-//    StoragePtr detachTable(const String & name) override;
-
-//    void loadStoredObjects(
-//        Context & context,
-//        bool has_force_restore_data_flag) override;
-
 private:
+
+    void runMainThread();
+    void runCleanupThread();
+
+    void attachToThreadGroup();
+    
+    void executeLog(size_t n);
+
+    Context & context; // is it overkiill?
+    std::unique_ptr<Context> current_context; // to run executeQuery
+
+    size_t current_log_entry_n = 0;
+    std::atomic<bool> stop_flag{false};
+
+    ThreadFromGlobalPool main_thread;
+    ThreadGroupStatusPtr thread_group;
+
     String zookeeper_path;
     String replica_name;
     String replica_path;
diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h
index 704fba3b3ef..2dff30e40a2 100644
--- a/src/Interpreters/ClientInfo.h
+++ b/src/Interpreters/ClientInfo.h
@@ -38,6 +38,7 @@ public:
         NO_QUERY = 0,            /// Uninitialized object.
         INITIAL_QUERY = 1,
         SECONDARY_QUERY = 2,    /// Query that was initiated by another query for distributed or ON CLUSTER query execution.
+        REPLICATED_LOG_QUERY = 3, /// TODO add comment
     };
 
 
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 5a4e959229f..66ea6f6914c 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -214,6 +214,9 @@ private:
     Context();
 
 public:
+    ///testing
+    bool from_replicated_log = false;
+
     /// Create initial Context with ContextShared and etc.
     static Context createGlobal(ContextShared * shared);
     static SharedContextHolder createShared();
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 28436f192b0..65f984924a3 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -585,7 +585,8 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
     try
     {
         current_context = std::make_unique<Context>(context);
-        current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
+        //current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
+        current_context->from_replicated_log = true;
         current_context->setCurrentQueryId(""); // generate random query_id
         executeQuery(istr, ostr, false, *current_context, {});
     }
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index 61277b8160c..ad79bd68fed 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -15,6 +15,8 @@
 #include <Common/typeid_cast.h>
 #include <boost/range/algorithm_ext/push_back.hpp>
 #include <algorithm>
+#include <Databases/IDatabase.h>
+#include <Databases/DatabaseFactory.h>
 
 
 namespace DB
@@ -37,6 +39,7 @@ BlockIO InterpreterAlterQuery::execute()
 {
     const auto & alter = query_ptr->as<ASTAlterQuery &>();
 
+
     if (!alter.cluster.empty())
         return executeDDLQueryOnCluster(query_ptr, context, getRequiredAccess());
 
@@ -46,6 +49,12 @@ BlockIO InterpreterAlterQuery::execute()
     auto alter_lock = table->lockForAlter(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
     auto metadata_snapshot = table->getInMemoryMetadataPtr();
 
+    // TODO it's dirty. need to add database to parsing stage
+    DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
+    if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
+        database->propose(query_ptr);
+    }
+
     /// Add default database to table identifiers that we can encounter in e.g. default expressions,
     /// mutation expression, etc.
     AddDefaultDatabaseVisitor visitor(table_id.getDatabaseName());
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 99c021a72fa..5698c370fa1 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -622,7 +622,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
     if (need_add_to_database)
     {
         database = DatabaseCatalog::instance().getDatabase(create.database);
-        if (database->getEngineName() == "Atomic" || database->getEngineName() == "Replicated")
+        if (database->getEngineName() == "Atomic") // || database->getEngineName() == "Replicated")
         {
             /// TODO implement ATTACH FROM 'path/to/data': generate UUID and move table data to store/
             if (create.attach && create.uuid == UUIDHelpers::Nil)
@@ -697,7 +697,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
     }
 
     
-    if (database->getEngineName() == "Replicated") {
+    if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
         // propose
         // try to 
         database->propose(query_ptr);
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index e6853a8af4c..bae1b796016 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -97,6 +97,9 @@ BlockIO InterpreterDropQuery::executeToTable(
             if (database->getEngineName() != "Atomic")
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
             /// Drop table from memory, don't touch data and metadata
+            if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
+                database->propose(query_ptr);
+            }
             database->detachTable(table_id.table_name);
         }
         else if (query.kind == ASTDropQuery::Kind::Truncate)
@@ -120,6 +123,9 @@ BlockIO InterpreterDropQuery::executeToTable(
             if (database->getEngineName() != "Atomic")
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
 
+            if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
+                database->propose(query_ptr);
+            }
             database->dropTable(context, table_id.table_name, query.no_delay);
         }
     }
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index de2b6bb0c1c..d93b14a6bc2 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -80,7 +80,11 @@ BlockIO InterpreterRenameQuery::execute()
         if (!rename.exchange)
             database_catalog.assertTableDoesntExist(StorageID(elem.to_database_name, elem.to_table_name), context);
 
-        database_catalog.getDatabase(elem.from_database_name)->renameTable(
+        DatabasePtr database = database_catalog.getDatabase(elem.from_database_name);
+        if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
+            database->propose(query_ptr);
+        }
+        database->renameTable(
             context,
             elem.from_table_name,
             *database_catalog.getDatabase(elem.to_database_name),

From 5eea58039c6f78a93eabd65792e8ed5c47615127 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Mon, 11 May 2020 16:31:14 +0300
Subject: [PATCH 0018/1238] fix not initialized last entry in zk

---
 src/Databases/DatabaseReplicated.cpp | 14 ++++++++------
 src/Databases/DatabaseReplicated.h   |  2 +-
 src/Interpreters/DDLWorker.cpp       |  3 +--
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 1bc954bfb76..36c95f68c2c 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -99,8 +99,6 @@ DatabaseReplicated::DatabaseReplicated(
     if (!current_zookeeper)
     {
             throw Exception("Can't create replicated database without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
-
-
     }
 
     current_zookeeper->createAncestors(zookeeper_path);
@@ -109,7 +107,6 @@ DatabaseReplicated::DatabaseReplicated(
     // TODO if no last_entry then make it equal to 0 in zk;
 
     // TODO launch a worker here
-
     main_thread = ThreadFromGlobalPool(&DatabaseReplicated::runMainThread, this);
 }
 
@@ -126,15 +123,20 @@ void DatabaseReplicated::runMainThread() {
     while (!stop_flag) {
         attachToThreadGroup();
 
-        sleepForSeconds(10);
+        sleepForSeconds(2);
         current_zookeeper = getZooKeeper();
-        String last_n = current_zookeeper->get(zookeeper_path + "/last_entry", {}, NULL);
+        String last_n;
+        if (!current_zookeeper->tryGet(zookeeper_path + "/last_entry", last_n, {}, NULL)) {
+            continue;
+        }
         size_t last_n_parsed = parse<size_t>(last_n);
+        LOG_DEBUG(log, "PARSED " << last_n_parsed);
+        LOG_DEBUG(log, "LOCAL CURRENT " << current_log_entry_n);
         while (current_log_entry_n < last_n_parsed) {
             current_log_entry_n++;
             executeLog(current_log_entry_n);
         }
-        break; // debug purpose 
+        // break; // debug purpose 
     }
 }
 
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index d61f0a00ef8..7700d17d9e4 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -37,7 +37,7 @@ private:
     Context & context; // is it overkiill?
     std::unique_ptr<Context> current_context; // to run executeQuery
 
-    size_t current_log_entry_n = 0;
+    std::atomic<size_t> current_log_entry_n = 0;
     std::atomic<bool> stop_flag{false};
 
     ThreadFromGlobalPool main_thread;
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 65f984924a3..28436f192b0 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -585,8 +585,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
     try
     {
         current_context = std::make_unique<Context>(context);
-        //current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
-        current_context->from_replicated_log = true;
+        current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
         current_context->setCurrentQueryId(""); // generate random query_id
         executeQuery(istr, ostr, false, *current_context, {});
     }

From d61259cd7b2f9f49c8a1e6da6a431a97d6616f45 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Tue, 12 May 2020 16:35:05 +0300
Subject: [PATCH 0019/1238] ddl replication works

---
 src/Databases/DatabaseReplicated.cpp | 23 ++++++++++++++++-------
 src/Databases/DatabaseReplicated.h   |  1 -
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 36c95f68c2c..2c7f6facf71 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -32,6 +32,7 @@
 #include <Common/quoteString.h>
 #include <Common/typeid_cast.h>
 #include <common/logger_useful.h>
+#include <Common/Exception.h>
 
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/ZooKeeper/Types.h>
@@ -81,7 +82,6 @@ DatabaseReplicated::DatabaseReplicated(
     // TODO add constructor to Atomic and call it here with path and logger name specification
     // TODO ask why const and & are ommited in Atomic
     : DatabaseOrdinary(name_, metadata_path_, context_)
-    , context(context_)
     , zookeeper_path(zookeeper_path_)
     , replica_name(replica_name_)
 {
@@ -142,17 +142,26 @@ void DatabaseReplicated::runMainThread() {
 
 void DatabaseReplicated::executeLog(size_t n) {
 
-        LOG_DEBUG(log, "EXECUTING LOG! DB: " << database_name << "\n Replica: " << replica_name << "LOG N" << n);
-        current_context = std::make_unique<Context>(context);
-        current_context->from_replicated_log = true;
-        current_context->setCurrentQueryId(""); // generate random query_id
         current_zookeeper = getZooKeeper();
-
         String query_to_execute = current_zookeeper->get(zookeeper_path + "/log." + std::to_string(n), {}, NULL);
         ReadBufferFromString istr(query_to_execute);
         String dummy_string;
         WriteBufferFromString ostr(dummy_string);
-        executeQuery(istr, ostr, false, context, {});
+
+        try
+        {
+            current_context = std::make_unique<Context>(global_context);
+            current_context->from_replicated_log = true;
+            current_context->setCurrentQueryId(""); // generate random query_id
+            executeQuery(istr, ostr, false, *current_context, {});
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, "Query " + query_to_execute  + " wasn't finished successfully");
+    
+        }
+
+        LOG_DEBUG(log, "Executed query: " << query_to_execute);
 }
 
 // TODO we might not need it here at all
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 7700d17d9e4..504be5a3ec5 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -34,7 +34,6 @@ private:
     
     void executeLog(size_t n);
 
-    Context & context; // is it overkiill?
     std::unique_ptr<Context> current_context; // to run executeQuery
 
     std::atomic<size_t> current_log_entry_n = 0;

From d7a354b24d20d2b78f91f5f745ded28e873a6b49 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Tue, 12 May 2020 17:25:36 +0300
Subject: [PATCH 0020/1238] create query fix for replicated dbs

---
 src/Databases/DatabaseReplicated.cpp        | 1 +
 src/Interpreters/InterpreterCreateQuery.cpp | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 2c7f6facf71..e507894bd3e 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -152,6 +152,7 @@ void DatabaseReplicated::executeLog(size_t n) {
         {
             current_context = std::make_unique<Context>(global_context);
             current_context->from_replicated_log = true;
+            current_context->setCurrentDatabase(database_name);
             current_context->setCurrentQueryId(""); // generate random query_id
             executeQuery(istr, ostr, false, *current_context, {});
         }
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 5698c370fa1..ed4095d63be 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -601,6 +601,11 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     /// Set and retrieve list of columns, indices and constraints. Set table engine if needed. Rewrite query in canonical way.
     TableProperties properties = setProperties(create);
 
+    // testing
+    if (context.from_replicated_log) {
+        create.database = current_database;
+    }
+
     /// Actually creates table
     bool created = doCreateTable(create, properties);
     if (!created)   /// Table already exists

From c0924b5911ce165166a66c8f0055b34ad7dbd2ed Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Tue, 12 May 2020 17:55:24 +0300
Subject: [PATCH 0021/1238] create and alter test for replicated db

---
 ...icated_database_engine_zookeeper.reference | 34 ++++++++++++++++
 ...9_replicated_database_engine_zookeeper.sql | 39 +++++++++++++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.reference
 create mode 100644 tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.sql

diff --git a/tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.reference b/tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.reference
new file mode 100644
index 00000000000..58f951b1257
--- /dev/null
+++ b/tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.reference
@@ -0,0 +1,34 @@
+CounterID	UInt32					
+StartDate	Date					
+UserID	UInt32					
+VisitID	UInt32					
+Added0	String					
+Added1	UInt32					
+Added2	UInt32					
+AddedNested1.A	Array(UInt32)					
+AddedNested1.C	Array(String)					
+AddedNested2.A	Array(UInt32)					
+AddedNested2.B	Array(UInt64)					
+CounterID	UInt32					
+StartDate	Date					
+UserID	UInt32					
+VisitID	UInt32					
+Added0	String					
+Added1	UInt32					
+Added2	UInt32					
+AddedNested1.A	Array(UInt32)					
+AddedNested1.C	Array(String)					
+AddedNested2.A	Array(UInt32)					
+AddedNested2.B	Array(UInt64)					
+CounterID	UInt32					
+StartDate	Date					
+UserID	UInt32					
+VisitID	UInt32					
+Added0	String					
+Added1	UInt32					
+Added2	UInt32					
+AddedNested1.A	Array(UInt32)					
+AddedNested1.C	Array(String)					
+AddedNested2.A	Array(UInt32)					
+AddedNested2.B	Array(UInt64)					
+
diff --git a/tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.sql b/tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.sql
new file mode 100644
index 00000000000..1acc9022014
--- /dev/null
+++ b/tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.sql
@@ -0,0 +1,39 @@
+DROP DATABASE IF EXISTS rdbtest;
+DROP DATABASE IF EXISTS replicatwo;
+DROP DATABASE IF EXISTS replicathree;
+
+CREATE DATABASE rdbtest ENGINE = Replicated('/clickhouse/db/test1/', 'id1');
+CREATE DATABASE replicatwo ENGINE = Replicated('/clickhouse/db/test1/', 'id2');
+CREATE DATABASE replicathree ENGINE = Replicated('/clickhouse/db/test1/', 'id3');
+
+USE rdbtest;
+
+CREATE TABLE alter_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);
+
+ALTER TABLE alter_test ADD COLUMN Added0 UInt32;
+ALTER TABLE alter_test ADD COLUMN Added2 UInt32;
+ALTER TABLE alter_test ADD COLUMN Added1 UInt32 AFTER Added0;
+
+ALTER TABLE alter_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;
+ALTER TABLE alter_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;
+ALTER TABLE alter_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;
+
+ALTER TABLE alter_test DROP COLUMN ToDrop;
+
+ALTER TABLE alter_test MODIFY COLUMN Added0 String;
+
+ALTER TABLE alter_test DROP COLUMN NestedColumn.A;
+ALTER TABLE alter_test DROP COLUMN NestedColumn.S;
+
+ALTER TABLE alter_test DROP COLUMN AddedNested1.B;
+
+ALTER TABLE alter_test ADD COLUMN IF NOT EXISTS Added0 UInt32;
+ALTER TABLE alter_test ADD COLUMN IF NOT EXISTS AddedNested1 Nested(A UInt32, B UInt64);
+ALTER TABLE alter_test ADD COLUMN IF NOT EXISTS AddedNested1.C Array(String);
+ALTER TABLE alter_test MODIFY COLUMN IF EXISTS ToDrop UInt64;
+ALTER TABLE alter_test DROP COLUMN IF EXISTS ToDrop;
+ALTER TABLE alter_test COMMENT COLUMN IF EXISTS ToDrop 'new comment';
+
+DESC TABLE rdbtest.alter_test;
+DESC TABLE replicatwo.alter_test;
+DESC TABLE replicathree.alter_test;

From f103e24a09f475f4d66038b41667b63be01a94be Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Wed, 13 May 2020 17:44:01 +0300
Subject: [PATCH 0022/1238] make db replicated inherited from atomic

---
 src/Databases/DatabaseReplicated.cpp        |  6 ++----
 src/Databases/DatabaseReplicated.h          |  4 ++--
 src/Databases/DatabasesCommon.cpp           |  2 +-
 src/Interpreters/InterpreterCreateQuery.cpp | 18 ++++++++----------
 src/Interpreters/InterpreterDropQuery.cpp   |  9 +++++++--
 5 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index e507894bd3e..2b473c25ce2 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -81,7 +81,7 @@ DatabaseReplicated::DatabaseReplicated(
 //    : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseReplicated (" + name_ + ")", context_)
     // TODO add constructor to Atomic and call it here with path and logger name specification
     // TODO ask why const and & are ommited in Atomic
-    : DatabaseOrdinary(name_, metadata_path_, context_)
+    : DatabaseAtomic(name_, metadata_path_, context_)
     , zookeeper_path(zookeeper_path_)
     , replica_name(replica_name_)
 {
@@ -122,8 +122,7 @@ void DatabaseReplicated::runMainThread() {
 
     while (!stop_flag) {
         attachToThreadGroup();
-
-        sleepForSeconds(2);
+        sleepForSeconds(1);// BURN CPU
         current_zookeeper = getZooKeeper();
         String last_n;
         if (!current_zookeeper->tryGet(zookeeper_path + "/last_entry", last_n, {}, NULL)) {
@@ -136,7 +135,6 @@ void DatabaseReplicated::runMainThread() {
             current_log_entry_n++;
             executeLog(current_log_entry_n);
         }
-        // break; // debug purpose 
     }
 }
 
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 504be5a3ec5..0cb0c57c808 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Databases/DatabaseOrdinary.h>
+#include <Databases/DatabaseAtomic.h>
 #include <Common/randomSeed.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 
@@ -14,7 +14,7 @@ namespace DB
   *  that contain declaration of table represented by SQL ATTACH TABLE query
   *  and operation log in zookeeper
   */
-class DatabaseReplicated : public DatabaseOrdinary
+class DatabaseReplicated : public DatabaseAtomic
 {
 public:
     DatabaseReplicated(const String & name_, const String & metadata_path_, const String & zookeeper_path_, const String & replica_name_, Context & context);
diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp
index 47c54fae800..7925d812241 100644
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@@ -98,7 +98,7 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c
     auto table_id = table->getStorageID();
     if (table_id.hasUUID())
     {
-        assert(getDatabaseName() == DatabaseCatalog::TEMPORARY_DATABASE || getEngineName() == "Atomic");
+        assert(getDatabaseName() == DatabaseCatalog::TEMPORARY_DATABASE || getEngineName() == "Atomic" || getEngineName() == "Replicated");
         DatabaseCatalog::instance().addUUIDMapping(table_id.uuid, shared_from_this(), table);
     }
 }
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index ed4095d63be..648e41327ba 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -627,7 +627,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
     if (need_add_to_database)
     {
         database = DatabaseCatalog::instance().getDatabase(create.database);
-        if (database->getEngineName() == "Atomic") // || database->getEngineName() == "Replicated")
+        if (database->getEngineName() == "Atomic" || (database->getEngineName() == "Replicated" && !context.from_replicated_log))
         {
             /// TODO implement ATTACH FROM 'path/to/data': generate UUID and move table data to store/
             if (create.attach && create.uuid == UUIDHelpers::Nil)
@@ -635,6 +635,11 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
             if (!create.attach && create.uuid == UUIDHelpers::Nil)
                 create.uuid = UUIDHelpers::generateV4();
         }
+        else if (database->getEngineName() == "Replicated" && context.from_replicated_log) {
+            if (create.uuid == UUIDHelpers::Nil)
+                // change error to incorrect log or something
+                throw Exception("Table UUID is not specified in the replicated log", ErrorCodes::INCORRECT_QUERY);
+        }
         else
         {
             if (create.uuid != UUIDHelpers::Nil)
@@ -703,16 +708,9 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
 
     
     if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
-        // propose
-        // try to 
         database->propose(query_ptr);
-        database->createTable(context, table_name, res, query_ptr);
-        // catch
-        // throw and remove proposal
-        // otherwise 
-        // proceed (commit to zk)
-    } else
-        database->createTable(context, table_name, res, query_ptr);
+    }
+    database->createTable(context, table_name, res, query_ptr);
 
     /// We must call "startup" and "shutdown" while holding DDLGuard.
     /// Because otherwise method "shutdown" (from InterpreterDropQuery) can be called before startup
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index bae1b796016..e9221fc273c 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -93,8 +93,8 @@ BlockIO InterpreterDropQuery::executeToTable(
         {
             context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id);
             table->shutdown();
-            TableExclusiveLockHolder table_lock;
-            if (database->getEngineName() != "Atomic")
+            TableStructureWriteLockHolder table_lock;
+            if (database->getEngineName() != "Atomic" && database->getEngineName() != "Replicated")
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
             /// Drop table from memory, don't touch data and metadata
             if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
@@ -119,8 +119,13 @@ BlockIO InterpreterDropQuery::executeToTable(
 
             table->shutdown();
 
+<<<<<<< HEAD
             TableExclusiveLockHolder table_lock;
             if (database->getEngineName() != "Atomic")
+=======
+            TableStructureWriteLockHolder table_lock;
+            if (database->getEngineName() != "Atomic" && database->getEngineName() != "Replicated")
+>>>>>>> 921e85e9c9... make db replicated inherited from atomic
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
 
             if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {

From 5e076b464ea79c4d27e38a55cfc141645ddc9884 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Wed, 13 May 2020 20:00:47 +0300
Subject: [PATCH 0023/1238] add replicated db snapshot, integration test, repl
 alter queries, etc

add an option to create replicated tables within replicated db without specifying zk path and replica id
add replicated sch pool
disable replication of alter queries for replicated tables in replicated dbs
snapshot prototype. amend of replicated db workflow
add prototype of integration tests for replicated db
---
 src/Common/CurrentMetrics.cpp                 |  2 +
 src/Core/Settings.h                           |  1 +
 src/Databases/DatabaseLazy.cpp                |  2 +-
 src/Databases/DatabaseLazy.h                  |  2 +-
 src/Databases/DatabaseOnDisk.h                |  3 +-
 src/Databases/DatabaseOrdinary.cpp            |  2 +-
 src/Databases/DatabaseOrdinary.h              |  4 +-
 src/Databases/DatabaseReplicated.cpp          | 93 ++++++++++++-------
 src/Databases/DatabaseReplicated.h            | 16 ++--
 src/Databases/DatabaseWithDictionaries.cpp    |  2 +-
 src/Databases/DatabaseWithDictionaries.h      |  2 +-
 src/Interpreters/Context.cpp                  | 18 ++++
 src/Interpreters/Context.h                    |  1 +
 src/Interpreters/InterpreterAlterQuery.cpp    |  2 +-
 .../MergeTree/registerStorageMergeTree.cpp    | 35 ++++++-
 .../test_replicated_database/test.py          | 38 ++++++++
 16 files changed, 166 insertions(+), 57 deletions(-)
 create mode 100644 tests/integration/test_replicated_database/test.py

diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index 4bab9ef2844..36c65953a6f 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -14,6 +14,7 @@
     M(BackgroundSchedulePoolTask, "Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc.") \
     M(BackgroundBufferFlushSchedulePoolTask, "Number of active tasks in BackgroundBufferFlushSchedulePool. This pool is used for periodic Buffer flushes") \
     M(BackgroundDistributedSchedulePoolTask, "Number of active tasks in BackgroundDistributedSchedulePool. This pool is used for distributed sends that is done in background.") \
+    M(BackgroundReplicatedSchedulePoolTask, "Number of active tasks in BackgroundReplicatedSchedulePoolTask. TODO.") \
     M(CacheDictionaryUpdateQueueBatches, "Number of 'batches' (a set of keys) in update queue in CacheDictionaries.") \
     M(CacheDictionaryUpdateQueueKeys, "Exact number of keys in update queue in CacheDictionaries.") \
     M(DiskSpaceReservedForMerge, "Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts.") \
@@ -38,6 +39,7 @@
     M(MemoryTrackingInBackgroundSchedulePool, "Total amount of memory (bytes) allocated in background schedule pool (that is dedicated for bookkeeping tasks of Replicated tables).") \
     M(MemoryTrackingInBackgroundBufferFlushSchedulePool, "Total amount of memory (bytes) allocated in background buffer flushes pool (that is dedicated for background buffer flushes).") \
     M(MemoryTrackingInBackgroundDistributedSchedulePool, "Total amount of memory (bytes) allocated in background distributed schedule pool (that is dedicated for distributed sends).") \
+    M(MemoryTrackingInBackgroundReplicatedSchedulePool, "Total amount of memory (bytes) allocated in replicated schedule pool (TODO).") \
     M(MemoryTrackingForMerges, "Total amount of memory (bytes) allocated for background merges. Included in MemoryTrackingInBackgroundProcessingPool. Note that this value may include a drift when the memory was allocated in a context of background processing pool and freed in other context or vice-versa. This happens naturally due to caches for tables indexes and doesn't indicate memory leaks.") \
     M(EphemeralNode, "Number of ephemeral nodes hold in ZooKeeper.") \
     M(ZooKeeperSession, "Number of sessions (connections) to ZooKeeper. Should be no more than one, because using more than one connection to ZooKeeper may lead to bugs due to lack of linearizability (stale reads) that ZooKeeper consistency model allows.") \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index f434132eccd..ea950afa70a 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -87,6 +87,7 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingUInt64, background_move_pool_size, 8, "Number of threads performing background moves for tables. Only has meaning at server startup.", 0) \
     M(SettingUInt64, background_schedule_pool_size, 16, "Number of threads performing background tasks for replicated tables, kafka streaming, dns cache updates. Only has meaning at server startup.", 0) \
     M(SettingUInt64, background_distributed_schedule_pool_size, 16, "Number of threads performing background tasks for distributed sends. Only has meaning at server startup.", 0) \
+    M(SettingUInt64, background_replicated_schedule_pool_size, 16, "Number of threads performing background tasks in replicated databases. Only has meaning at server startup.", 0) \
     \
     M(SettingMilliseconds, distributed_directory_monitor_sleep_time_ms, 100, "Sleep time for StorageDistributed DirectoryMonitors, in case of any errors delay grows exponentially.", 0) \
     M(SettingMilliseconds, distributed_directory_monitor_max_sleep_time_ms, 30000, "Maximum sleep time for StorageDistributed DirectoryMonitors, it limits exponential growth too.", 0) \
diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index 11e5272110e..d1a6c191bfc 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -27,7 +27,7 @@ namespace ErrorCodes
 }
 
 
-DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, const Context & context_)
+DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, Context & context_)
     : DatabaseOnDisk(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseLazy (" + name_ + ")", context_)
     , expiration_time(expiration_time_)
 {
diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h
index 2e24b687be5..adda103a21e 100644
--- a/src/Databases/DatabaseLazy.h
+++ b/src/Databases/DatabaseLazy.h
@@ -18,7 +18,7 @@ class Context;
 class DatabaseLazy final : public DatabaseOnDisk
 {
 public:
-    DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, const Context & context_);
+    DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, Context & context_);
 
     String getEngineName() const override { return "Lazy"; }
 
diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h
index d4fb9b2aa17..dc347c99542 100644
--- a/src/Databases/DatabaseOnDisk.h
+++ b/src/Databases/DatabaseOnDisk.h
@@ -31,7 +31,7 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query);
 class DatabaseOnDisk : public DatabaseWithOwnTablesBase
 {
 public:
-    DatabaseOnDisk(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, const Context & context);
+    DatabaseOnDisk(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, Context & context);
 
     void createTable(
         const Context & context,
@@ -86,6 +86,7 @@ protected:
 
     const String metadata_path;
     const String data_path;
+    Context & global_context;
 };
 
 }
diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp
index 9194558dffb..2f4f584b091 100644
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@@ -94,7 +94,7 @@ namespace
 }
 
 
-DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, const Context & context_)
+DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, Context & context_)
     : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseOrdinary (" + name_ + ")", context_)
 {
 }
diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h
index a9e53edfe28..4767ccdc123 100644
--- a/src/Databases/DatabaseOrdinary.h
+++ b/src/Databases/DatabaseOrdinary.h
@@ -14,8 +14,8 @@ namespace DB
 class DatabaseOrdinary : public DatabaseWithDictionaries
 {
 public:
-    DatabaseOrdinary(const String & name_, const String & metadata_path_, const Context & context);
-    DatabaseOrdinary(const String & name_, const String & metadata_path_, const String & data_path_, const String & logger, const Context & context_);
+    DatabaseOrdinary(const String & name_, const String & metadata_path_, Context & context);
+    DatabaseOrdinary(const String & name_, const String & metadata_path_, const String & data_path_, const String & logger, Context & context_);
 
     String getEngineName() const override { return "Ordinary"; }
 
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 2b473c25ce2..9dd8530fc46 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -101,43 +101,58 @@ DatabaseReplicated::DatabaseReplicated(
             throw Exception("Can't create replicated database without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
     }
 
-    current_zookeeper->createAncestors(zookeeper_path);
-    current_zookeeper->createOrUpdate(zookeeper_path, String(), zkutil::CreateMode::Persistent);
+    if (!current_zookeeper->exists(zookeeper_path, {}, NULL)) {
+        current_zookeeper->createAncestors(zookeeper_path);
+        current_zookeeper->createOrUpdate(zookeeper_path, String(), zkutil::CreateMode::Persistent);
+        current_zookeeper->createOrUpdate(zookeeper_path + "/last_entry", "0", zkutil::CreateMode::Persistent);
+        current_zookeeper->createAncestors(replica_path);
+    } else {
+    }
+    current_zookeeper->createOrUpdate(replica_path, String(), zkutil::CreateMode::Persistent);
 
-    // TODO if no last_entry then make it equal to 0 in zk;
-
-    // TODO launch a worker here
-    main_thread = ThreadFromGlobalPool(&DatabaseReplicated::runMainThread, this);
+    backgroundLogExecutor = global_context.getReplicatedSchedulePool().createTask(database_name + "(DatabaseReplicated::the_threeeed)", [this]{ runMainThread();} );
+    backgroundLogExecutor->schedule();
 }
 
 DatabaseReplicated::~DatabaseReplicated()
 {
     stop_flag = true;
-    main_thread.join();
 }
 
 void DatabaseReplicated::runMainThread() {
-    setThreadName("ReplctdWorker"); // ok whatever. 15 bytes // + database_name);
     LOG_DEBUG(log, "Started " << database_name << " database worker thread\n Replica: " << replica_name);
-
-    while (!stop_flag) {
-        attachToThreadGroup();
-        sleepForSeconds(1);// BURN CPU
+    if (!stop_flag) { // TODO is there a need for the flag?
         current_zookeeper = getZooKeeper();
-        String last_n;
-        if (!current_zookeeper->tryGet(zookeeper_path + "/last_entry", last_n, {}, NULL)) {
-            continue;
-        }
+        String last_n = current_zookeeper->get(zookeeper_path + "/last_entry", {}, NULL);
         size_t last_n_parsed = parse<size_t>(last_n);
         LOG_DEBUG(log, "PARSED " << last_n_parsed);
         LOG_DEBUG(log, "LOCAL CURRENT " << current_log_entry_n);
+
+        bool newEntries = current_log_entry_n < last_n_parsed;
         while (current_log_entry_n < last_n_parsed) {
             current_log_entry_n++;
             executeLog(current_log_entry_n);
         }
+        if (newEntries) {
+            saveState();
+        }
+        backgroundLogExecutor->scheduleAfter(500);
     }
 }
 
+void DatabaseReplicated::saveState() {
+    current_zookeeper->createOrUpdate(replica_path + "/last_entry", std::to_string(current_log_entry_n), zkutil::CreateMode::Persistent);
+    // TODO rename vars
+    String statement = std::to_string(current_log_entry_n);
+    String metadatafile = getMetadataPath() + ".last_entry";
+    WriteBufferFromFile out(metadatafile, statement.size(), O_WRONLY | O_CREAT);
+    writeString(statement, out);
+    out.next();
+    if (global_context.getSettingsRef().fsync_metadata)
+        out.sync();
+    out.close();
+}
+
 void DatabaseReplicated::executeLog(size_t n) {
 
         current_zookeeper = getZooKeeper();
@@ -163,21 +178,7 @@ void DatabaseReplicated::executeLog(size_t n) {
         LOG_DEBUG(log, "Executed query: " << query_to_execute);
 }
 
-// TODO we might not need it here at all
-void DatabaseReplicated::attachToThreadGroup() {
-    if (thread_group)
-    {
-        /// Put all threads to one thread pool
-        CurrentThread::attachToIfDetached(thread_group);
-    }
-    else
-    {
-        CurrentThread::initializeQuery();
-        thread_group = CurrentThread::getGroup();
-    }
-}
-
-// taken from ddlworker
+// TODO Move to ZooKeeper/Lock and remove it from here and ddlworker
 static std::unique_ptr<zkutil::Lock> createSimpleZooKeeperLock(
     const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & lock_prefix, const String & lock_name, const String & lock_message)
 {
@@ -188,15 +189,24 @@ static std::unique_ptr<zkutil::Lock> createSimpleZooKeeperLock(
 
 
 void DatabaseReplicated::propose(const ASTPtr & query) {
-    // TODO if source is zk then omit propose. Throw?
-    
     // TODO remove that log message i think
     LOG_DEBUG(log, "PROPOSING\n" << queryToString(query));
 
     current_zookeeper = getZooKeeper();
-    auto lock = createSimpleZooKeeperLock(current_zookeeper, zookeeper_path, "lock", replica_name);
+    auto lock = createSimpleZooKeeperLock(current_zookeeper, zookeeper_path, "propose_lock", replica_name);
 
-    // TODO check that last_entry is the same as current_log_entry_n for the replica
+
+    // schedule and deactive combo 
+    // ensures that replica is up to date
+    // and since propose lock is acquired,
+    // no other propose can happen from
+    // different replicas during this call
+    backgroundLogExecutor->schedule();
+    backgroundLogExecutor->deactivate();
+
+    if (current_log_entry_n > 5) { // make a settings variable
+        createSnapshot();
+    }
 
     current_log_entry_n++; // starting from 1
     String log_entry = zookeeper_path + "/log." + std::to_string(current_log_entry_n);
@@ -205,7 +215,18 @@ void DatabaseReplicated::propose(const ASTPtr & query) {
     current_zookeeper->createOrUpdate(zookeeper_path + "/last_entry", std::to_string(current_log_entry_n), zkutil::CreateMode::Persistent);
 
     lock->unlock();
-    // write to metastore the last entry?
+    saveState();
+}
+
+void DatabaseReplicated::createSnapshot() {
+    current_zookeeper->createAncestors(zookeeper_path + "/snapshot");
+    current_zookeeper->createOrUpdate(zookeeper_path + "/snapshot", std::to_string(current_log_entry_n), zkutil::CreateMode::Persistent);
+    for (auto iterator = getTablesIterator({}); iterator->isValid(); iterator->next()) {
+        String table_name = iterator->name();
+        auto query = getCreateQueryFromMetadata(getObjectMetadataPath(table_name), true);
+        String statement = queryToString(query);
+        current_zookeeper->createOrUpdate(zookeeper_path + "/snapshot/" + table_name, statement, zkutil::CreateMode::Persistent);
+    }
 }
 
 }
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 0cb0c57c808..0b2d097caac 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -3,6 +3,7 @@
 #include <Databases/DatabaseAtomic.h>
 #include <Common/randomSeed.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
+#include <Core/BackgroundSchedulePool.h>
 
 #include <atomic>
 #include <thread>
@@ -25,25 +26,26 @@ public:
 
     void propose(const ASTPtr & query) override;
 
+    String zookeeper_path;
+    String replica_name;
+
 private:
 
     void runMainThread();
-    void runCleanupThread();
 
-    void attachToThreadGroup();
-    
     void executeLog(size_t n);
 
+    void saveState();
+
+    void createSnapshot();
+
     std::unique_ptr<Context> current_context; // to run executeQuery
 
     std::atomic<size_t> current_log_entry_n = 0;
     std::atomic<bool> stop_flag{false};
 
-    ThreadFromGlobalPool main_thread;
-    ThreadGroupStatusPtr thread_group;
+    BackgroundSchedulePool::TaskHolder backgroundLogExecutor;
 
-    String zookeeper_path;
-    String replica_name;
     String replica_path;
 
     zkutil::ZooKeeperPtr current_zookeeper;        /// Use only the methods below.
diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp
index e0f2aa9286b..37f5b51f4ed 100644
--- a/src/Databases/DatabaseWithDictionaries.cpp
+++ b/src/Databases/DatabaseWithDictionaries.cpp
@@ -317,7 +317,7 @@ void DatabaseWithDictionaries::shutdown()
 
 
 DatabaseWithDictionaries::DatabaseWithDictionaries(
-    const String & name, const String & metadata_path_, const String & data_path_, const String & logger, const Context & context)
+    const String & name, const String & metadata_path_, const String & data_path_, const String & logger, Context & context)
     : DatabaseOnDisk(name, metadata_path_, data_path_, logger, context)
     , external_loader(context.getExternalDictionariesLoader())
 {
diff --git a/src/Databases/DatabaseWithDictionaries.h b/src/Databases/DatabaseWithDictionaries.h
index eb9e105e31d..0e87ae686cf 100644
--- a/src/Databases/DatabaseWithDictionaries.h
+++ b/src/Databases/DatabaseWithDictionaries.h
@@ -37,7 +37,7 @@ public:
     ~DatabaseWithDictionaries() override;
 
 protected:
-    DatabaseWithDictionaries(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, const Context & context);
+    DatabaseWithDictionaries(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, Context & context);
 
     ASTPtr getCreateDictionaryQueryImpl(const String & dictionary_name, bool throw_on_error) const override;
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index b691e9aaf60..ccd489f6c45 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -82,6 +82,9 @@ namespace CurrentMetrics
 
     extern const Metric BackgroundDistributedSchedulePoolTask;
     extern const Metric MemoryTrackingInBackgroundDistributedSchedulePool;
+
+    extern const Metric BackgroundReplicatedSchedulePoolTask;
+    extern const Metric MemoryTrackingInBackgroundReplicatedSchedulePool;
 }
 
 
@@ -338,6 +341,8 @@ struct ContextShared
     std::optional<BackgroundProcessingPool> background_move_pool; /// The thread pool for the background moves performed by the tables.
     std::optional<BackgroundSchedulePool> schedule_pool;    /// A thread pool that can run different jobs in background (used in replicated tables)
     std::optional<BackgroundSchedulePool> distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends)
+    // TODO Rename replicated table pool or even both; adjust comments 
+    std::optional<BackgroundSchedulePool> replicated_schedule_pool; /// A thread pool that can run different jobs in background (used in replicated database engine)
     MultiVersion<Macros> macros;                            /// Substitutions extracted from config.
     std::unique_ptr<DDLWorker> ddl_worker;                  /// Process ddl commands from zk.
     /// Rules for selecting the compression settings, depending on the size of the part.
@@ -437,6 +442,7 @@ struct ContextShared
         background_move_pool.reset();
         schedule_pool.reset();
         distributed_schedule_pool.reset();
+        replicated_schedule_pool.reset();
         ddl_worker.reset();
 
         /// Stop trace collector if any
@@ -1415,6 +1421,18 @@ BackgroundSchedulePool & Context::getDistributedSchedulePool()
     return *shared->distributed_schedule_pool;
 }
 
+BackgroundSchedulePool & Context::getReplicatedSchedulePool()
+{
+    auto lock = getLock();
+    if (!shared->replicated_schedule_pool)
+        shared->replicated_schedule_pool.emplace(
+            settings.background_replicated_schedule_pool_size,
+            CurrentMetrics::BackgroundReplicatedSchedulePoolTask,
+            CurrentMetrics::MemoryTrackingInBackgroundReplicatedSchedulePool,
+            "BgRplSchPool");
+    return *shared->replicated_schedule_pool;
+}
+
 void Context::setDDLWorker(std::unique_ptr<DDLWorker> ddl_worker)
 {
     auto lock = getLock();
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 66ea6f6914c..e9c78a175d4 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -502,6 +502,7 @@ public:
     BackgroundProcessingPool & getBackgroundMovePool();
     BackgroundSchedulePool & getSchedulePool();
     BackgroundSchedulePool & getDistributedSchedulePool();
+    BackgroundSchedulePool & getReplicatedSchedulePool();
 
     void setDDLWorker(std::unique_ptr<DDLWorker> ddl_worker);
     DDLWorker & getDDLWorker() const;
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index ad79bd68fed..cef1ebd7469 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -51,7 +51,7 @@ BlockIO InterpreterAlterQuery::execute()
 
     // TODO it's dirty. need to add database to parsing stage
     DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
-    if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
+    if (database->getEngineName() == "Replicated" && !context.from_replicated_log && !table->supportsReplication()) {
         database->propose(query_ptr);
     }
 
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 1ecac8f413d..eb62c80cc49 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -1,3 +1,6 @@
+#include <Databases/IDatabase.h>
+#include <Databases/DatabaseReplicated.h>
+
 #include <Storages/StorageFactory.h>
 #include <Storages/StorageMergeTree.h>
 #include <Storages/StorageReplicatedMergeTree.h>
@@ -277,10 +280,18 @@ static StoragePtr create(const StorageFactory::Arguments & args)
 
     String name_part = args.engine_name.substr(0, args.engine_name.size() - strlen("MergeTree"));
 
-    bool replicated = startsWith(name_part, "Replicated");
-    if (replicated)
+    bool replicatedStorage = startsWith(name_part, "Replicated");
+    if (replicatedStorage)
         name_part = name_part.substr(strlen("Replicated"));
 
+    String database_name = args.query.database;
+    auto database = DatabaseCatalog::instance().getDatabase(database_name);
+    bool replicatedDatabase = false;
+
+    if (database->getEngineName() == "Replicated") {
+        replicatedDatabase = true;
+    }
+
     MergeTreeData::MergingParams merging_params;
     merging_params.mode = MergeTreeData::MergingParams::Ordinary;
 
@@ -322,7 +333,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
         needed_params += "]";
     };
 
-    if (replicated)
+    if (replicatedStorage && !replicatedDatabase)
     {
         add_mandatory_param("path in ZooKeeper");
         add_mandatory_param("replica name");
@@ -392,7 +403,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
     String zookeeper_path;
     String replica_name;
 
-    if (replicated)
+    if (replicatedStorage && !replicatedDatabase)
     {
         const auto * ast = engine_args[arg_num]->as<ASTLiteral>();
         if (ast && ast->value.getType() == Field::Types::String)
@@ -418,6 +429,12 @@ static StoragePtr create(const StorageFactory::Arguments & args)
         ++arg_num;
     }
 
+    if (replicatedStorage && replicatedDatabase) {
+        auto * database_replicated = typeid_cast<DatabaseReplicated *>(database.get());
+        zookeeper_path = database_replicated->zookeeper_path + "/tables/" + toString(args.query.uuid);
+        replica_name   = database_replicated->replica_name;
+    }
+
     /// This merging param maybe used as part of sorting key
     std::optional<String> merging_param_key_arg;
 
@@ -617,7 +634,15 @@ static StoragePtr create(const StorageFactory::Arguments & args)
         throw Exception("You must set the setting `allow_experimental_data_skipping_indices` to 1 " \
                         "before using data skipping indices.", ErrorCodes::BAD_ARGUMENTS);
 
-    if (replicated)
+    StorageInMemoryMetadata metadata(args.columns, indices_description, args.constraints);
+    metadata.partition_by_ast = partition_by_ast;
+    metadata.order_by_ast = order_by_ast;
+    metadata.primary_key_ast = primary_key_ast;
+    metadata.ttl_for_table_ast = ttl_table_ast;
+    metadata.sample_by_ast = sample_by_ast;
+    metadata.settings_ast = settings_ast;
+
+    if (replicatedStorage)
         return StorageReplicatedMergeTree::create(
             zookeeper_path, replica_name, args.attach, args.table_id, args.relative_data_path,
             metadata, args.context, date_column_name,  merging_params, std::move(storage_settings),
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
new file mode 100644
index 00000000000..23268bcdfd8
--- /dev/null
+++ b/tests/integration/test_replicated_database/test.py
@@ -0,0 +1,38 @@
+import time
+import logging
+
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+
+logging.getLogger().setLevel(logging.INFO)
+logging.getLogger().addHandler(logging.StreamHandler())
+
+cluster = ClickHouseCluster(__file__)
+
+node1 = cluster.add_instance('node1', macros={'replica': 'test1'}, with_zookeeper=True)
+node2 = cluster.add_instance('node2', macros={'replica': 'test2'}, with_zookeeper=True)
+
+all_nodes = [node1, node2]
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        for node in all_nodes:
+            node.query("DROP DATABASE IF EXISTS testdb")
+            node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', '{replica}');")
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_db(started_cluster):
+    DURATION_SECONDS = 5
+    node1.query("CREATE TABLE testdb.replicated_table  (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree(d, k, 8192);")
+
+    time.sleep(DURATION_SECONDS)
+    logging.info(node2.query("desc table testdb.replicated_table"))
+    assert node1.query("desc table testdb.replicated_table") == node2.query("desc table testdb.replicated_table")

From 34f74ff7851fbb68fb740219f339ced64242636c Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sun, 24 May 2020 20:12:24 +0300
Subject: [PATCH 0024/1238] add test cases for replicated db

---
 .../test_replicated_database/test.py          | 44 ++++++++++++++++---
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 23268bcdfd8..38977aa0bdb 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -12,15 +12,14 @@ cluster = ClickHouseCluster(__file__)
 
 node1 = cluster.add_instance('node1', macros={'replica': 'test1'}, with_zookeeper=True)
 node2 = cluster.add_instance('node2', macros={'replica': 'test2'}, with_zookeeper=True)
-
-all_nodes = [node1, node2]
+node3 = cluster.add_instance('node3', macros={'replica': 'test3'}, with_zookeeper=True)
 
 @pytest.fixture(scope="module")
 def started_cluster():
     try:
         cluster.start()
 
-        for node in all_nodes:
+        for node in [node1, node2]:
             node.query("DROP DATABASE IF EXISTS testdb")
             node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', '{replica}');")
         yield cluster
@@ -29,10 +28,43 @@ def started_cluster():
         cluster.shutdown()
 
 
-def test_db(started_cluster):
-    DURATION_SECONDS = 5
-    node1.query("CREATE TABLE testdb.replicated_table  (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree(d, k, 8192);")
+def test_create_replicated_table(started_cluster):
+    DURATION_SECONDS = 1
+    node1.query("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree(d, k, 8192);")
 
     time.sleep(DURATION_SECONDS)
     logging.info(node2.query("desc table testdb.replicated_table"))
     assert node1.query("desc table testdb.replicated_table") == node2.query("desc table testdb.replicated_table")
+
+def test_alter_table(started_cluster):
+    DURATION_SECONDS = 1
+    node1.query("CREATE TABLE testdb.alter_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);\
+        ALTER TABLE testdb.alter_test ADD COLUMN Added0 UInt32;\
+        ALTER TABLE testdb.alter_test ADD COLUMN Added2 UInt32;\
+        ALTER TABLE testdb.alter_test ADD COLUMN Added1 UInt32 AFTER Added0;\
+        ALTER TABLE testdb.alter_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;\
+        ALTER TABLE testdb.alter_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;\
+        ALTER TABLE testdb.alter_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
+
+    time.sleep(DURATION_SECONDS)
+    assert node1.query("desc table testdb.alter_test") == node2.query("desc table testdb.alter_test")
+
+def test_create_replica_from_snapshot(started_cluster):
+    DURATION_SECONDS = 3
+    """
+    right now snapshot's created every 6 proposes.
+    later on it must be configurable
+    for now let's check snapshot 
+    by creating a new node just after 10 log entries 
+    """
+    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added3 UInt32 ;") #9
+    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added4 UInt32 ;") #10
+    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added5 UInt32 ;") #1
+    # by this moment snapshot must be created
+
+    node3.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', '{replica}');")
+
+    time.sleep(DURATION_SECONDS)
+
+    assert node3.query("desc table testdb.alter_test") == node1.query("desc table testdb.alter_test")
+

From 1f03839830c1ec92b912bab6cdcfba6908780ccf Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sun, 24 May 2020 20:12:59 +0300
Subject: [PATCH 0025/1238] add zookeeper tryRemoveChildren method

---
 src/Common/ZooKeeper/ZooKeeper.cpp | 17 +++++++++++++++++
 src/Common/ZooKeeper/ZooKeeper.h   |  5 +++++
 2 files changed, 22 insertions(+)

diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index 476e88d7e72..541625149dd 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -579,6 +579,23 @@ void ZooKeeper::removeChildren(const std::string & path)
 }
 
 
+void ZooKeeper::tryRemoveChildren(const std::string & path)
+{
+    Strings children;
+    if (tryGetChildren(path, children) != Coordination::ZOK)
+        return;
+    while (!children.empty())
+    {
+        Coordination::Requests ops;
+        for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i)
+        {
+            ops.emplace_back(makeRemoveRequest(path + "/" + children.back(), -1));
+            children.pop_back();
+        }
+        multi(ops);
+    }
+}
+
 void ZooKeeper::removeChildrenRecursive(const std::string & path)
 {
     Strings children = getChildren(path);
diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index 416e40c2da4..cb28f442392 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -187,7 +187,12 @@ public:
     /// Remove all children nodes (non recursive).
     void removeChildren(const std::string & path);
 
+    /// Remove all children nodes (non recursive).
+    /// If there're no children, this method doesn't throw an exception
+    void tryRemoveChildren(const std::string & path);
+
     using WaitCondition = std::function<bool()>;
+
     /// Wait for the node to disappear or return immediately if it doesn't exist.
     /// If condition is speficied, it is used to return early (when condition returns false)
     /// The function returns true if waited and false if waiting was interrupted by condition.

From 4921dc6dab978d05bf16a5cf6bfd8572a5c0f12b Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sun, 24 May 2020 20:13:53 +0300
Subject: [PATCH 0026/1238] db replicated refactoring

---
 src/Databases/DatabaseReplicated.cpp | 105 ++++++++++++++++-----------
 src/Databases/DatabaseReplicated.h   |  14 ++--
 2 files changed, 69 insertions(+), 50 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 9dd8530fc46..ae5a8249202 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -80,7 +80,6 @@ DatabaseReplicated::DatabaseReplicated(
     Context & context_)
 //    : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseReplicated (" + name_ + ")", context_)
     // TODO add constructor to Atomic and call it here with path and logger name specification
-    // TODO ask why const and & are ommited in Atomic
     : DatabaseAtomic(name_, metadata_path_, context_)
     , zookeeper_path(zookeeper_path_)
     , replica_name(replica_name_)
@@ -102,42 +101,50 @@ DatabaseReplicated::DatabaseReplicated(
     }
 
     if (!current_zookeeper->exists(zookeeper_path, {}, NULL)) {
-        current_zookeeper->createAncestors(zookeeper_path);
-        current_zookeeper->createOrUpdate(zookeeper_path, String(), zkutil::CreateMode::Persistent);
-        current_zookeeper->createOrUpdate(zookeeper_path + "/last_entry", "0", zkutil::CreateMode::Persistent);
+        createDatabaseZKNodes();
+    } 
+
+    // replica
+    if (!current_zookeeper->exists(replica_path, {}, NULL)) {
         current_zookeeper->createAncestors(replica_path);
-    } else {
+        current_zookeeper->createOrUpdate(replica_path, String(), zkutil::CreateMode::Persistent);
     }
-    current_zookeeper->createOrUpdate(replica_path, String(), zkutil::CreateMode::Persistent);
 
-    backgroundLogExecutor = global_context.getReplicatedSchedulePool().createTask(database_name + "(DatabaseReplicated::the_threeeed)", [this]{ runMainThread();} );
-    backgroundLogExecutor->schedule();
+    //loadMetadataFromSnapshot();
+
+    background_log_executor = global_context.getReplicatedSchedulePool().createTask(database_name + "(DatabaseReplicated::the_threeeed)", [this]{ runBackgroundLogExecutor();} );
+    background_log_executor->schedule();
 }
 
-DatabaseReplicated::~DatabaseReplicated()
-{
-    stop_flag = true;
+void DatabaseReplicated::createDatabaseZKNodes() {
+    current_zookeeper = getZooKeeper();
+
+    if (current_zookeeper->exists(zookeeper_path))
+        return;
+
+    current_zookeeper->createAncestors(zookeeper_path);
+
+    current_zookeeper->createIfNotExists(zookeeper_path, String());
+    current_zookeeper->createIfNotExists(zookeeper_path + "/last_entry", "0");
+    current_zookeeper->createIfNotExists(zookeeper_path + "/log", String());
+    current_zookeeper->createIfNotExists(zookeeper_path + "/snapshot", String());
 }
 
-void DatabaseReplicated::runMainThread() {
-    LOG_DEBUG(log, "Started " << database_name << " database worker thread\n Replica: " << replica_name);
-    if (!stop_flag) { // TODO is there a need for the flag?
-        current_zookeeper = getZooKeeper();
-        String last_n = current_zookeeper->get(zookeeper_path + "/last_entry", {}, NULL);
-        size_t last_n_parsed = parse<size_t>(last_n);
-        LOG_DEBUG(log, "PARSED " << last_n_parsed);
-        LOG_DEBUG(log, "LOCAL CURRENT " << current_log_entry_n);
+void DatabaseReplicated::runBackgroundLogExecutor() {
+    current_zookeeper = getZooKeeper();
+    String last_n = current_zookeeper->get(zookeeper_path + "/last_entry", {}, NULL);
+    size_t last_n_parsed = parse<size_t>(last_n);
 
-        bool newEntries = current_log_entry_n < last_n_parsed;
-        while (current_log_entry_n < last_n_parsed) {
-            current_log_entry_n++;
-            executeLog(current_log_entry_n);
-        }
-        if (newEntries) {
-            saveState();
-        }
-        backgroundLogExecutor->scheduleAfter(500);
+    bool newEntries = current_log_entry_n < last_n_parsed;
+    while (current_log_entry_n < last_n_parsed) {
+        current_log_entry_n++;
+        String log_path = zookeeper_path + "/log/log." + std::to_string(current_log_entry_n);
+        executeFromZK(log_path);
     }
+    if (newEntries) {
+        saveState();
+    }
+    background_log_executor->scheduleAfter(500);
 }
 
 void DatabaseReplicated::saveState() {
@@ -153,10 +160,9 @@ void DatabaseReplicated::saveState() {
     out.close();
 }
 
-void DatabaseReplicated::executeLog(size_t n) {
-
+void DatabaseReplicated::executeFromZK(String & path) {
         current_zookeeper = getZooKeeper();
-        String query_to_execute = current_zookeeper->get(zookeeper_path + "/log." + std::to_string(n), {}, NULL);
+        String query_to_execute = current_zookeeper->get(path, {}, NULL);
         ReadBufferFromString istr(query_to_execute);
         String dummy_string;
         WriteBufferFromString ostr(dummy_string);
@@ -171,7 +177,7 @@ void DatabaseReplicated::executeLog(size_t n) {
         }
         catch (...)
         {
-            tryLogCurrentException(log, "Query " + query_to_execute  + " wasn't finished successfully");
+            tryLogCurrentException(log, "Query from zookeeper " + query_to_execute + " wasn't finished successfully");
     
         }
 
@@ -195,21 +201,23 @@ void DatabaseReplicated::propose(const ASTPtr & query) {
     current_zookeeper = getZooKeeper();
     auto lock = createSimpleZooKeeperLock(current_zookeeper, zookeeper_path, "propose_lock", replica_name);
 
-
     // schedule and deactive combo 
     // ensures that replica is up to date
     // and since propose lock is acquired,
     // no other propose can happen from
     // different replicas during this call
-    backgroundLogExecutor->schedule();
-    backgroundLogExecutor->deactivate();
+    background_log_executor->schedule();
+    background_log_executor->deactivate();
 
-    if (current_log_entry_n > 5) { // make a settings variable
-        createSnapshot();
-    }
+//    if (current_log_entry_n > 5) { // make a settings variable
+//        // TODO check that all the replicas are up to date!
+//        updateSnapshot();
+//        current_log_entry_n = 0;
+//        current_zookeeper->removeChildren(zookeeper_path + "/log");
+//    }
 
     current_log_entry_n++; // starting from 1
-    String log_entry = zookeeper_path + "/log." + std::to_string(current_log_entry_n);
+    String log_entry = zookeeper_path + "/log/log." + std::to_string(current_log_entry_n);
     current_zookeeper->createOrUpdate(log_entry, queryToString(query), zkutil::CreateMode::Persistent);
 
     current_zookeeper->createOrUpdate(zookeeper_path + "/last_entry", std::to_string(current_log_entry_n), zkutil::CreateMode::Persistent);
@@ -218,9 +226,9 @@ void DatabaseReplicated::propose(const ASTPtr & query) {
     saveState();
 }
 
-void DatabaseReplicated::createSnapshot() {
-    current_zookeeper->createAncestors(zookeeper_path + "/snapshot");
-    current_zookeeper->createOrUpdate(zookeeper_path + "/snapshot", std::to_string(current_log_entry_n), zkutil::CreateMode::Persistent);
+void DatabaseReplicated::updateSnapshot() {
+    current_zookeeper = getZooKeeper();
+    current_zookeeper->tryRemoveChildren(zookeeper_path + "/snapshot");
     for (auto iterator = getTablesIterator({}); iterator->isValid(); iterator->next()) {
         String table_name = iterator->name();
         auto query = getCreateQueryFromMetadata(getObjectMetadataPath(table_name), true);
@@ -229,4 +237,17 @@ void DatabaseReplicated::createSnapshot() {
     }
 }
 
+void DatabaseReplicated::loadMetadataFromSnapshot() {
+    current_zookeeper = getZooKeeper();
+
+    Strings metadatas;
+    if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshot", metadatas) != Coordination::ZOK)
+        return;
+
+    for (auto t = metadatas.begin(); t != metadatas.end(); ++t) {
+        String path = zookeeper_path + "/snapshot/" + *t;
+        executeFromZK(path);
+    }
+}
+
 }
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 0b2d097caac..bd2f11390d2 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -20,8 +20,6 @@ class DatabaseReplicated : public DatabaseAtomic
 public:
     DatabaseReplicated(const String & name_, const String & metadata_path_, const String & zookeeper_path_, const String & replica_name_, Context & context);
 
-    ~DatabaseReplicated();
-
     String getEngineName() const override { return "Replicated"; }
 
     void propose(const ASTPtr & query) override;
@@ -30,21 +28,21 @@ public:
     String replica_name;
 
 private:
+    void createDatabaseZKNodes();
 
-    void runMainThread();
+    void runBackgroundLogExecutor();
 
-    void executeLog(size_t n);
+    void executeFromZK(String & path);
 
     void saveState();
-
-    void createSnapshot();
+    void updateSnapshot();
+    void loadMetadataFromSnapshot();
 
     std::unique_ptr<Context> current_context; // to run executeQuery
 
     std::atomic<size_t> current_log_entry_n = 0;
-    std::atomic<bool> stop_flag{false};
 
-    BackgroundSchedulePool::TaskHolder backgroundLogExecutor;
+    BackgroundSchedulePool::TaskHolder background_log_executor;
 
     String replica_path;
 

From cbcd1bea0eef7ee647f1cdcca51612cecc4697d1 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Tue, 26 May 2020 16:35:05 +0300
Subject: [PATCH 0027/1238] provide better comments and information

---
 src/Common/CurrentMetrics.cpp               |  4 ++--
 src/Common/ZooKeeper/ZooKeeper.h            |  3 ++-
 src/Core/Settings.h                         |  2 +-
 src/Databases/IDatabase.h                   | 22 ++++++++++-----------
 src/Interpreters/Context.cpp                |  1 -
 src/Interpreters/InterpreterCreateQuery.cpp |  8 +++++---
 src/Interpreters/InterpreterDropQuery.cpp   |  8 +++-----
 7 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index 36c65953a6f..a6a08897505 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -14,7 +14,7 @@
     M(BackgroundSchedulePoolTask, "Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc.") \
     M(BackgroundBufferFlushSchedulePoolTask, "Number of active tasks in BackgroundBufferFlushSchedulePool. This pool is used for periodic Buffer flushes") \
     M(BackgroundDistributedSchedulePoolTask, "Number of active tasks in BackgroundDistributedSchedulePool. This pool is used for distributed sends that is done in background.") \
-    M(BackgroundReplicatedSchedulePoolTask, "Number of active tasks in BackgroundReplicatedSchedulePoolTask. TODO.") \
+    M(BackgroundReplicatedSchedulePoolTask, "Number of active tasks in BackgroundReplicatedSchedulePoolTask. The pool is used by replicated database for executing DDL log coming from other replicas. One task corresponds to one replicated database") \
     M(CacheDictionaryUpdateQueueBatches, "Number of 'batches' (a set of keys) in update queue in CacheDictionaries.") \
     M(CacheDictionaryUpdateQueueKeys, "Exact number of keys in update queue in CacheDictionaries.") \
     M(DiskSpaceReservedForMerge, "Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts.") \
@@ -39,7 +39,7 @@
     M(MemoryTrackingInBackgroundSchedulePool, "Total amount of memory (bytes) allocated in background schedule pool (that is dedicated for bookkeeping tasks of Replicated tables).") \
     M(MemoryTrackingInBackgroundBufferFlushSchedulePool, "Total amount of memory (bytes) allocated in background buffer flushes pool (that is dedicated for background buffer flushes).") \
     M(MemoryTrackingInBackgroundDistributedSchedulePool, "Total amount of memory (bytes) allocated in background distributed schedule pool (that is dedicated for distributed sends).") \
-    M(MemoryTrackingInBackgroundReplicatedSchedulePool, "Total amount of memory (bytes) allocated in replicated schedule pool (TODO).") \
+    M(MemoryTrackingInBackgroundReplicatedSchedulePool, "Total amount of memory (bytes) allocated in background replicated schedule pool (that is dedicated for ddl log execution by replicated database replicas).") \
     M(MemoryTrackingForMerges, "Total amount of memory (bytes) allocated for background merges. Included in MemoryTrackingInBackgroundProcessingPool. Note that this value may include a drift when the memory was allocated in a context of background processing pool and freed in other context or vice-versa. This happens naturally due to caches for tables indexes and doesn't indicate memory leaks.") \
     M(EphemeralNode, "Number of ephemeral nodes hold in ZooKeeper.") \
     M(ZooKeeperSession, "Number of sessions (connections) to ZooKeeper. Should be no more than one, because using more than one connection to ZooKeeper may lead to bugs due to lack of linearizability (stale reads) that ZooKeeper consistency model allows.") \
diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index cb28f442392..47eaefa51fc 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -188,7 +188,8 @@ public:
     void removeChildren(const std::string & path);
 
     /// Remove all children nodes (non recursive).
-    /// If there're no children, this method doesn't throw an exception
+    /// If there're no children for the given path,
+    /// this method does not throw an exception.
     void tryRemoveChildren(const std::string & path);
 
     using WaitCondition = std::function<bool()>;
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index ea950afa70a..1351b752136 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -87,7 +87,7 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingUInt64, background_move_pool_size, 8, "Number of threads performing background moves for tables. Only has meaning at server startup.", 0) \
     M(SettingUInt64, background_schedule_pool_size, 16, "Number of threads performing background tasks for replicated tables, kafka streaming, dns cache updates. Only has meaning at server startup.", 0) \
     M(SettingUInt64, background_distributed_schedule_pool_size, 16, "Number of threads performing background tasks for distributed sends. Only has meaning at server startup.", 0) \
-    M(SettingUInt64, background_replicated_schedule_pool_size, 16, "Number of threads performing background tasks in replicated databases. Only has meaning at server startup.", 0) \
+    M(SettingUInt64, background_replicated_schedule_pool_size, 4, "Number of threads performing background tasks in replicated databases. One task corresponds to one replicated database replica. Only has meaning at server startup.", 0) \
     \
     M(SettingMilliseconds, distributed_directory_monitor_sleep_time_ms, 100, "Sleep time for StorageDistributed DirectoryMonitors, in case of any errors delay grows exponentially.", 0) \
     M(SettingMilliseconds, distributed_directory_monitor_max_sleep_time_ms, 30000, "Maximum sleep time for StorageDistributed DirectoryMonitors, it limits exponential growth too.", 0) \
diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index 18265b153cf..5b3003f36b4 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -162,7 +162,7 @@ public:
     virtual bool empty() const = 0;
 
     virtual void propose(const ASTPtr & /*query*/) {
-        throw Exception("There is no propose query method for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception(getEngineName() + ": propose() is not supported", ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Add the table to the database. Record its presence in the metadata.
@@ -172,7 +172,7 @@ public:
         const StoragePtr & /*table*/,
         const ASTPtr & /*query*/)
     {
-        throw Exception("There is no CREATE TABLE query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no CREATE TABLE query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Add the dictionary to the database. Record its presence in the metadata.
@@ -181,7 +181,7 @@ public:
         const String & /*dictionary_name*/,
         const ASTPtr & /*query*/)
     {
-        throw Exception("There is no CREATE DICTIONARY query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no CREATE DICTIONARY query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Delete the table from the database, drop table and delete the metadata.
@@ -190,7 +190,7 @@ public:
         const String & /*name*/,
         [[maybe_unused]] bool no_delay = false)
     {
-        throw Exception("There is no DROP TABLE query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no DROP TABLE query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Delete the dictionary from the database. Delete the metadata.
@@ -198,32 +198,32 @@ public:
         const Context & /*context*/,
         const String & /*dictionary_name*/)
     {
-        throw Exception("There is no DROP DICTIONARY query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no DROP DICTIONARY query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Add a table to the database, but do not add it to the metadata. The database may not support this method.
     virtual void attachTable(const String & /*name*/, const StoragePtr & /*table*/, [[maybe_unused]] const String & relative_table_path = {})
     {
-        throw Exception("There is no ATTACH TABLE query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no ATTACH TABLE query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Add dictionary to the database, but do not add it to the metadata. The database may not support this method.
     /// If dictionaries_lazy_load is false it also starts loading the dictionary asynchronously.
     virtual void attachDictionary(const String & /* dictionary_name */, const DictionaryAttachInfo & /* attach_info */)
     {
-        throw Exception("There is no ATTACH DICTIONARY query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no ATTACH DICTIONARY query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Forget about the table without deleting it, and return it. The database may not support this method.
     virtual StoragePtr detachTable(const String & /*name*/)
     {
-        throw Exception("There is no DETACH TABLE query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no DETACH TABLE query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Forget about the dictionary without deleting it. The database may not support this method.
     virtual void detachDictionary(const String & /*name*/)
     {
-        throw Exception("There is no DETACH DICTIONARY query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no DETACH DICTIONARY query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Rename the table and possibly move the table to another database.
@@ -314,14 +314,14 @@ protected:
     virtual ASTPtr getCreateTableQueryImpl(const String & /*name*/, const Context & /*context*/, bool throw_on_error) const
     {
         if (throw_on_error)
-            throw Exception("There is no SHOW CREATE TABLE query for Database" + getEngineName(), ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY);
+            throw Exception("There is no SHOW CREATE TABLE query for Database " + getEngineName(), ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY);
         return nullptr;
     }
 
     virtual ASTPtr getCreateDictionaryQueryImpl(const String & /*name*/, bool throw_on_error) const
     {
         if (throw_on_error)
-            throw Exception("There is no SHOW CREATE DICTIONARY query for Database" + getEngineName(), ErrorCodes::CANNOT_GET_CREATE_DICTIONARY_QUERY);
+            throw Exception("There is no SHOW CREATE DICTIONARY query for Database " + getEngineName(), ErrorCodes::CANNOT_GET_CREATE_DICTIONARY_QUERY);
         return nullptr;
     }
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index ccd489f6c45..14ee5284bab 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -341,7 +341,6 @@ struct ContextShared
     std::optional<BackgroundProcessingPool> background_move_pool; /// The thread pool for the background moves performed by the tables.
     std::optional<BackgroundSchedulePool> schedule_pool;    /// A thread pool that can run different jobs in background (used in replicated tables)
     std::optional<BackgroundSchedulePool> distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends)
-    // TODO Rename replicated table pool or even both; adjust comments 
     std::optional<BackgroundSchedulePool> replicated_schedule_pool; /// A thread pool that can run different jobs in background (used in replicated database engine)
     MultiVersion<Macros> macros;                            /// Substitutions extracted from config.
     std::unique_ptr<DDLWorker> ddl_worker;                  /// Process ddl commands from zk.
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 648e41327ba..6ff474e096f 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -601,7 +601,10 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     /// Set and retrieve list of columns, indices and constraints. Set table engine if needed. Rewrite query in canonical way.
     TableProperties properties = setProperties(create);
 
-    // testing
+    /// DDL log for replicated databases can not
+    /// contain the right database name for every replica
+    /// therefore for such queries the AST database
+    /// field is modified right before an actual execution
     if (context.from_replicated_log) {
         create.database = current_database;
     }
@@ -637,8 +640,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
         }
         else if (database->getEngineName() == "Replicated" && context.from_replicated_log) {
             if (create.uuid == UUIDHelpers::Nil)
-                // change error to incorrect log or something
-                throw Exception("Table UUID is not specified in the replicated log", ErrorCodes::INCORRECT_QUERY);
+                throw Exception("Table UUID is not specified in DDL log", ErrorCodes::INCORRECT_QUERY);
         }
         else
         {
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index e9221fc273c..fe94a394ba2 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -110,6 +110,9 @@ BlockIO InterpreterDropQuery::executeToTable(
             auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
             auto metadata_snapshot = table->getInMemoryMetadataPtr();
             /// Drop table data, don't touch metadata
+            if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
+                database->propose(query_ptr);
+            }
             table->truncate(query_ptr, metadata_snapshot, context, table_lock);
         }
         else if (query.kind == ASTDropQuery::Kind::Drop)
@@ -119,13 +122,8 @@ BlockIO InterpreterDropQuery::executeToTable(
 
             table->shutdown();
 
-<<<<<<< HEAD
             TableExclusiveLockHolder table_lock;
-            if (database->getEngineName() != "Atomic")
-=======
-            TableStructureWriteLockHolder table_lock;
             if (database->getEngineName() != "Atomic" && database->getEngineName() != "Replicated")
->>>>>>> 921e85e9c9... make db replicated inherited from atomic
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
 
             if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {

From 31910e9bf1a526a2bf3e8fdf167ff3447e37747f Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Tue, 26 May 2020 18:08:09 +0300
Subject: [PATCH 0028/1238] Use ClientInf::QueryKind to distinguish replicated
 db log queries

---
 src/Databases/DatabaseReplicated.cpp        | 2 +-
 src/Interpreters/ClientInfo.h               | 2 +-
 src/Interpreters/Context.h                  | 3 ---
 src/Interpreters/InterpreterAlterQuery.cpp  | 3 +--
 src/Interpreters/InterpreterCreateQuery.cpp | 8 ++++----
 src/Interpreters/InterpreterDropQuery.cpp   | 7 ++++---
 src/Interpreters/InterpreterRenameQuery.cpp | 2 +-
 7 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index ae5a8249202..c6840ac0d81 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -170,7 +170,7 @@ void DatabaseReplicated::executeFromZK(String & path) {
         try
         {
             current_context = std::make_unique<Context>(global_context);
-            current_context->from_replicated_log = true;
+            current_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
             current_context->setCurrentDatabase(database_name);
             current_context->setCurrentQueryId(""); // generate random query_id
             executeQuery(istr, ostr, false, *current_context, {});
diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h
index 2dff30e40a2..42b3ab42bc1 100644
--- a/src/Interpreters/ClientInfo.h
+++ b/src/Interpreters/ClientInfo.h
@@ -38,7 +38,7 @@ public:
         NO_QUERY = 0,            /// Uninitialized object.
         INITIAL_QUERY = 1,
         SECONDARY_QUERY = 2,    /// Query that was initiated by another query for distributed or ON CLUSTER query execution.
-        REPLICATED_LOG_QUERY = 3, /// TODO add comment
+        REPLICATED_LOG_QUERY = 3, /// Query from replicated DDL log.
     };
 
 
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index e9c78a175d4..5d1fda03221 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -214,9 +214,6 @@ private:
     Context();
 
 public:
-    ///testing
-    bool from_replicated_log = false;
-
     /// Create initial Context with ContextShared and etc.
     static Context createGlobal(ContextShared * shared);
     static SharedContextHolder createShared();
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index cef1ebd7469..134531d0cf0 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -49,9 +49,8 @@ BlockIO InterpreterAlterQuery::execute()
     auto alter_lock = table->lockForAlter(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
     auto metadata_snapshot = table->getInMemoryMetadataPtr();
 
-    // TODO it's dirty. need to add database to parsing stage
     DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
-    if (database->getEngineName() == "Replicated" && !context.from_replicated_log && !table->supportsReplication()) {
+    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY && !table->supportsReplication()) {
         database->propose(query_ptr);
     }
 
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 6ff474e096f..0b06fbfd874 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -605,7 +605,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     /// contain the right database name for every replica
     /// therefore for such queries the AST database
     /// field is modified right before an actual execution
-    if (context.from_replicated_log) {
+    if (context.getClientInfo().query_kind == ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
         create.database = current_database;
     }
 
@@ -630,7 +630,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
     if (need_add_to_database)
     {
         database = DatabaseCatalog::instance().getDatabase(create.database);
-        if (database->getEngineName() == "Atomic" || (database->getEngineName() == "Replicated" && !context.from_replicated_log))
+        if (database->getEngineName() == "Atomic" || (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY))
         {
             /// TODO implement ATTACH FROM 'path/to/data': generate UUID and move table data to store/
             if (create.attach && create.uuid == UUIDHelpers::Nil)
@@ -638,7 +638,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
             if (!create.attach && create.uuid == UUIDHelpers::Nil)
                 create.uuid = UUIDHelpers::generateV4();
         }
-        else if (database->getEngineName() == "Replicated" && context.from_replicated_log) {
+        else if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind == ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
             if (create.uuid == UUIDHelpers::Nil)
                 throw Exception("Table UUID is not specified in DDL log", ErrorCodes::INCORRECT_QUERY);
         }
@@ -709,7 +709,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
     }
 
     
-    if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
+    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
         database->propose(query_ptr);
     }
     database->createTable(context, table_name, res, query_ptr);
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index fe94a394ba2..afbf5d31fbf 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -97,7 +97,7 @@ BlockIO InterpreterDropQuery::executeToTable(
             if (database->getEngineName() != "Atomic" && database->getEngineName() != "Replicated")
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
             /// Drop table from memory, don't touch data and metadata
-            if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
+            if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
                 database->propose(query_ptr);
             }
             database->detachTable(table_id.table_name);
@@ -110,7 +110,8 @@ BlockIO InterpreterDropQuery::executeToTable(
             auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
             auto metadata_snapshot = table->getInMemoryMetadataPtr();
             /// Drop table data, don't touch metadata
-            if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
+            auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
+            if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
                 database->propose(query_ptr);
             }
             table->truncate(query_ptr, metadata_snapshot, context, table_lock);
@@ -126,7 +127,7 @@ BlockIO InterpreterDropQuery::executeToTable(
             if (database->getEngineName() != "Atomic" && database->getEngineName() != "Replicated")
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
 
-            if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
+            if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
                 database->propose(query_ptr);
             }
             database->dropTable(context, table_id.table_name, query.no_delay);
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index d93b14a6bc2..45003ab0d14 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -81,7 +81,7 @@ BlockIO InterpreterRenameQuery::execute()
             database_catalog.assertTableDoesntExist(StorageID(elem.to_database_name, elem.to_table_name), context);
 
         DatabasePtr database = database_catalog.getDatabase(elem.from_database_name);
-        if (database->getEngineName() == "Replicated" && !context.from_replicated_log) {
+        if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
             database->propose(query_ptr);
         }
         database->renameTable(

From fbbccaf98ae02b5ed463b3c05fc79595743e817a Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Tue, 26 May 2020 18:10:15 +0300
Subject: [PATCH 0029/1238] remove stateless tests for replicated db

---
 ...7_replicated_database_engine_zookeeper.sql | 10 -----
 ...icated_database_engine_zookeeper.reference | 34 ----------------
 ...9_replicated_database_engine_zookeeper.sql | 39 -------------------
 3 files changed, 83 deletions(-)
 delete mode 100644 tests/queries/0_stateless/01267_replicated_database_engine_zookeeper.sql
 delete mode 100644 tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.reference
 delete mode 100644 tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.sql

diff --git a/tests/queries/0_stateless/01267_replicated_database_engine_zookeeper.sql b/tests/queries/0_stateless/01267_replicated_database_engine_zookeeper.sql
deleted file mode 100644
index c70de9a50d2..00000000000
--- a/tests/queries/0_stateless/01267_replicated_database_engine_zookeeper.sql
+++ /dev/null
@@ -1,10 +0,0 @@
-DROP DATABASE IF EXISTS test_db1;
-DROP DATABASE IF EXISTS test_db2;
-
-CREATE DATABASE test_db1 ENGINE = Replicated('/clickhouse/databases/test1', 'id1');
-CREATE TABLE test_db1.replicated_table  (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test1', 'id1', d, k, 8192);
-CREATE TABLE test_db1.basic_table (EventDate Date, CounterID Int) engine=MergeTree(EventDate, (CounterID, EventDate), 8192);
-
-CREATE DATABASE test_db2 ENGINE = Replicated('/clickhouse/databases/test1', 'id2');
-CREATE TABLE test_db2.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test1', 'id2', d, k, 8192);
-CREATE TABLE test_db2.basic_table (EventDate Date, CounterID Int) engine=MergeTree(EventDate, (CounterID, EventDate), 8192);
diff --git a/tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.reference b/tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.reference
deleted file mode 100644
index 58f951b1257..00000000000
--- a/tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.reference
+++ /dev/null
@@ -1,34 +0,0 @@
-CounterID	UInt32					
-StartDate	Date					
-UserID	UInt32					
-VisitID	UInt32					
-Added0	String					
-Added1	UInt32					
-Added2	UInt32					
-AddedNested1.A	Array(UInt32)					
-AddedNested1.C	Array(String)					
-AddedNested2.A	Array(UInt32)					
-AddedNested2.B	Array(UInt64)					
-CounterID	UInt32					
-StartDate	Date					
-UserID	UInt32					
-VisitID	UInt32					
-Added0	String					
-Added1	UInt32					
-Added2	UInt32					
-AddedNested1.A	Array(UInt32)					
-AddedNested1.C	Array(String)					
-AddedNested2.A	Array(UInt32)					
-AddedNested2.B	Array(UInt64)					
-CounterID	UInt32					
-StartDate	Date					
-UserID	UInt32					
-VisitID	UInt32					
-Added0	String					
-Added1	UInt32					
-Added2	UInt32					
-AddedNested1.A	Array(UInt32)					
-AddedNested1.C	Array(String)					
-AddedNested2.A	Array(UInt32)					
-AddedNested2.B	Array(UInt64)					
-
diff --git a/tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.sql b/tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.sql
deleted file mode 100644
index 1acc9022014..00000000000
--- a/tests/queries/0_stateless/01269_replicated_database_engine_zookeeper.sql
+++ /dev/null
@@ -1,39 +0,0 @@
-DROP DATABASE IF EXISTS rdbtest;
-DROP DATABASE IF EXISTS replicatwo;
-DROP DATABASE IF EXISTS replicathree;
-
-CREATE DATABASE rdbtest ENGINE = Replicated('/clickhouse/db/test1/', 'id1');
-CREATE DATABASE replicatwo ENGINE = Replicated('/clickhouse/db/test1/', 'id2');
-CREATE DATABASE replicathree ENGINE = Replicated('/clickhouse/db/test1/', 'id3');
-
-USE rdbtest;
-
-CREATE TABLE alter_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);
-
-ALTER TABLE alter_test ADD COLUMN Added0 UInt32;
-ALTER TABLE alter_test ADD COLUMN Added2 UInt32;
-ALTER TABLE alter_test ADD COLUMN Added1 UInt32 AFTER Added0;
-
-ALTER TABLE alter_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;
-ALTER TABLE alter_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;
-ALTER TABLE alter_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;
-
-ALTER TABLE alter_test DROP COLUMN ToDrop;
-
-ALTER TABLE alter_test MODIFY COLUMN Added0 String;
-
-ALTER TABLE alter_test DROP COLUMN NestedColumn.A;
-ALTER TABLE alter_test DROP COLUMN NestedColumn.S;
-
-ALTER TABLE alter_test DROP COLUMN AddedNested1.B;
-
-ALTER TABLE alter_test ADD COLUMN IF NOT EXISTS Added0 UInt32;
-ALTER TABLE alter_test ADD COLUMN IF NOT EXISTS AddedNested1 Nested(A UInt32, B UInt64);
-ALTER TABLE alter_test ADD COLUMN IF NOT EXISTS AddedNested1.C Array(String);
-ALTER TABLE alter_test MODIFY COLUMN IF EXISTS ToDrop UInt64;
-ALTER TABLE alter_test DROP COLUMN IF EXISTS ToDrop;
-ALTER TABLE alter_test COMMENT COLUMN IF EXISTS ToDrop 'new comment';
-
-DESC TABLE rdbtest.alter_test;
-DESC TABLE replicatwo.alter_test;
-DESC TABLE replicathree.alter_test;

From 0e9f516738adad2a22cf95d92304c6ffe3c6e55a Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Wed, 27 May 2020 18:04:10 +0300
Subject: [PATCH 0030/1238] add comment for replicated db class

---
 src/Databases/DatabaseReplicated.h | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index bd2f11390d2..e81b78386f7 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -10,10 +10,27 @@
 
 namespace DB
 {
-/** Replicated database engine.
-  * It stores tables list using list of .sql files,
-  *  that contain declaration of table represented by SQL ATTACH TABLE query
-  *  and operation log in zookeeper
+/** DatabaseReplicated engine
+  * supports replication of metadata
+  * via DDL log being written to ZooKeeper
+  * and executed on all of the replicas
+  * for a given database.
+  *
+  * One Clickhouse server can have multiple
+  * replicated databases running and updating
+  * at the same time.
+  * 
+  * The engine has two parameters ZooKeeper path and 
+  * replica name.
+  * The same ZooKeeper path corresponds to the same
+  * database. Replica names must be different for all replicas
+  * of the same database.
+  *
+  * Using this engine, creation of Replicated tables
+  * requires no ZooKeeper path and replica name parameters.
+  * Table's replica name is the same as database replica name.
+  * Table's ZooKeeper path is a concatenation of database's
+  * ZooKeeper path, /tables/, and UUID of the table.
   */
 class DatabaseReplicated : public DatabaseAtomic
 {

From a0af67b636d4a2b47d0c0898833e8c1c86731561 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Wed, 27 May 2020 21:33:37 +0300
Subject: [PATCH 0031/1238] Add one more test for db replicated and fix related
 bug

---
 src/Databases/DatabaseReplicated.cpp          |  8 +++
 .../test_replicated_database/test.py          | 52 ++++++++++++-------
 2 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index c6840ac0d81..202e46c3f82 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -201,6 +201,13 @@ void DatabaseReplicated::propose(const ASTPtr & query) {
     current_zookeeper = getZooKeeper();
     auto lock = createSimpleZooKeeperLock(current_zookeeper, zookeeper_path, "propose_lock", replica_name);
 
+    while (!lock->tryLock()) {
+        // TODO it seems that zk lock doesn't work at all
+        // need to find a different solution for proposal
+        pcg64 rng(randomSeed());
+        std::this_thread::sleep_for(std::chrono::milliseconds(std::uniform_int_distribution<int>(0, 1000)(rng)));
+    }
+
     // schedule and deactive combo 
     // ensures that replica is up to date
     // and since propose lock is acquired,
@@ -224,6 +231,7 @@ void DatabaseReplicated::propose(const ASTPtr & query) {
 
     lock->unlock();
     saveState();
+    background_log_executor->activateAndSchedule();
 }
 
 void DatabaseReplicated::updateSnapshot() {
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 38977aa0bdb..703690a7218 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -33,38 +33,50 @@ def test_create_replicated_table(started_cluster):
     node1.query("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree(d, k, 8192);")
 
     time.sleep(DURATION_SECONDS)
-    logging.info(node2.query("desc table testdb.replicated_table"))
     assert node1.query("desc table testdb.replicated_table") == node2.query("desc table testdb.replicated_table")
 
-def test_alter_table(started_cluster):
+def test_simple_alter_table(started_cluster):
     DURATION_SECONDS = 1
-    node1.query("CREATE TABLE testdb.alter_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);\
-        ALTER TABLE testdb.alter_test ADD COLUMN Added0 UInt32;\
-        ALTER TABLE testdb.alter_test ADD COLUMN Added2 UInt32;\
-        ALTER TABLE testdb.alter_test ADD COLUMN Added1 UInt32 AFTER Added0;\
-        ALTER TABLE testdb.alter_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;\
-        ALTER TABLE testdb.alter_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;\
-        ALTER TABLE testdb.alter_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
+    node1.query("CREATE TABLE testdb.alter_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
+    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added0 UInt32;")
+    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added2 UInt32;")
+    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added1 UInt32 AFTER Added0;")
+    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;")
+    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;")
+    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
 
     time.sleep(DURATION_SECONDS)
     assert node1.query("desc table testdb.alter_test") == node2.query("desc table testdb.alter_test")
 
-def test_create_replica_from_snapshot(started_cluster):
+def test_create_replica_after_delay(started_cluster):
     DURATION_SECONDS = 3
-    """
-    right now snapshot's created every 6 proposes.
-    later on it must be configurable
-    for now let's check snapshot 
-    by creating a new node just after 10 log entries 
-    """
-    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added3 UInt32 ;") #9
-    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added4 UInt32 ;") #10
-    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added5 UInt32 ;") #1
-    # by this moment snapshot must be created
 
     node3.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', '{replica}');")
 
+    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added3 UInt32 ;")
+    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added4 UInt32 ;")
+    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added5 UInt32 ;")
+
     time.sleep(DURATION_SECONDS)
 
     assert node3.query("desc table testdb.alter_test") == node1.query("desc table testdb.alter_test")
 
+def test_alters_from_different_replicas(started_cluster):
+    DURATION_SECONDS = 1
+
+    node1.query("CREATE TABLE testdb.concurrent_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
+    time.sleep(DURATION_SECONDS)
+
+    node3.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added0 UInt32;")
+    time.sleep(DURATION_SECONDS)
+    node1.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added2 UInt32;")
+    time.sleep(DURATION_SECONDS)
+    node3.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added1 UInt32 AFTER Added0;")
+    time.sleep(DURATION_SECONDS)
+    node1.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;")
+    time.sleep(DURATION_SECONDS)
+    node3.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;")
+    time.sleep(DURATION_SECONDS)
+    node1.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
+    time.sleep(DURATION_SECONDS)
+    assert node3.query("desc table testdb.concurrent_test") == node1.query("desc table testdb.concurrent_test")

From 469f9738dff25544a35c23da2f6e207355b5f16c Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Wed, 27 May 2020 21:40:00 +0300
Subject: [PATCH 0032/1238] refactor save state in db replicated

---
 src/Databases/DatabaseReplicated.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 202e46c3f82..3dbacbaf33d 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -148,12 +148,14 @@ void DatabaseReplicated::runBackgroundLogExecutor() {
 }
 
 void DatabaseReplicated::saveState() {
-    current_zookeeper->createOrUpdate(replica_path + "/last_entry", std::to_string(current_log_entry_n), zkutil::CreateMode::Persistent);
-    // TODO rename vars
-    String statement = std::to_string(current_log_entry_n);
-    String metadatafile = getMetadataPath() + ".last_entry";
-    WriteBufferFromFile out(metadatafile, statement.size(), O_WRONLY | O_CREAT);
-    writeString(statement, out);
+    String state = std::to_string(current_log_entry_n);
+
+    current_zookeeper = getZooKeeper();
+    current_zookeeper->createOrUpdate(replica_path + "/last_entry", state, zkutil::CreateMode::Persistent);
+
+    String metadata_file = getMetadataPath() + ".last_entry";
+    WriteBufferFromFile out(metadata_file, state.size(), O_WRONLY | O_CREAT);
+    writeString(state, out);
     out.next();
     if (global_context.getSettingsRef().fsync_metadata)
         out.sync();

From f928c897cf68b4bf73bf7b6108e469ef87bb385d Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sun, 7 Jun 2020 14:20:05 +0300
Subject: [PATCH 0033/1238] change replication algorithm, remove zk lock

In this version of the databaseReplicated sequential persistent zk nodes
are used to order DDL queries. Db replicated ddl queries are executed
in the backgrould pool no matter whether it's proposed by the same
replica or not.
---
 src/Databases/DatabaseReplicated.cpp        | 84 +++++++++------------
 src/Databases/DatabaseReplicated.h          |  2 +
 src/Interpreters/InterpreterAlterQuery.cpp  |  1 +
 src/Interpreters/InterpreterCreateQuery.cpp | 10 +--
 src/Interpreters/InterpreterDropQuery.cpp   |  9 ++-
 src/Interpreters/InterpreterRenameQuery.cpp | 14 ++--
 6 files changed, 55 insertions(+), 65 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 3dbacbaf33d..2650bd46a58 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -132,19 +132,34 @@ void DatabaseReplicated::createDatabaseZKNodes() {
 
 void DatabaseReplicated::runBackgroundLogExecutor() {
     current_zookeeper = getZooKeeper();
-    String last_n = current_zookeeper->get(zookeeper_path + "/last_entry", {}, NULL);
-    size_t last_n_parsed = parse<size_t>(last_n);
+    Strings log_entry_names = current_zookeeper->getChildren(zookeeper_path + "/log");
 
-    bool newEntries = current_log_entry_n < last_n_parsed;
-    while (current_log_entry_n < last_n_parsed) {
-        current_log_entry_n++;
-        String log_path = zookeeper_path + "/log/log." + std::to_string(current_log_entry_n);
-        executeFromZK(log_path);
-    }
-    if (newEntries) {
-        saveState();
+    std::sort(log_entry_names.begin(), log_entry_names.end());
+    auto newest_entry_it = std::upper_bound(log_entry_names.begin(), log_entry_names.end(), last_executed_log_entry);
+
+    log_entry_names.erase(log_entry_names.begin(), newest_entry_it);
+
+    for (const String & log_entry_name : log_entry_names) {
+        String log_entry_path = zookeeper_path + "/log/" + log_entry_name;
+        executeFromZK(log_entry_path);
+        last_executed_log_entry = log_entry_name;
     }
+
     background_log_executor->scheduleAfter(500);
+
+    // String last_n = current_zookeeper->get(zookeeper_path + "/last_entry", {}, NULL);
+    // size_t last_n_parsed = parse<size_t>(last_n);
+
+    // bool newEntries = current_log_entry_n < last_n_parsed;
+    // while (current_log_entry_n < last_n_parsed) {
+    //     current_log_entry_n++;
+    //     String log_path = zookeeper_path + "/log/log." + std::to_string(current_log_entry_n);
+    //     executeFromZK(log_path);
+    // }
+    // if (newEntries) {
+    //     saveState();
+    // }
+    // background_log_executor->scheduleAfter(500);
 }
 
 void DatabaseReplicated::saveState() {
@@ -187,53 +202,22 @@ void DatabaseReplicated::executeFromZK(String & path) {
 }
 
 // TODO Move to ZooKeeper/Lock and remove it from here and ddlworker
-static std::unique_ptr<zkutil::Lock> createSimpleZooKeeperLock(
-    const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & lock_prefix, const String & lock_name, const String & lock_message)
-{
-    auto zookeeper_holder = std::make_shared<zkutil::ZooKeeperHolder>();
-    zookeeper_holder->initFromInstance(zookeeper);
-    return std::make_unique<zkutil::Lock>(std::move(zookeeper_holder), lock_prefix, lock_name, lock_message);
-}
+// static std::unique_ptr<zkutil::Lock> createSimpleZooKeeperLock(
+//     const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & lock_prefix, const String & lock_name, const String & lock_message)
+// {
+//     auto zookeeper_holder = std::make_shared<zkutil::ZooKeeperHolder>();
+//     zookeeper_holder->initFromInstance(zookeeper);
+//     return std::make_unique<zkutil::Lock>(std::move(zookeeper_holder), lock_prefix, lock_name, lock_message);
+// }
 
 
 void DatabaseReplicated::propose(const ASTPtr & query) {
-    // TODO remove that log message i think
-    LOG_DEBUG(log, "PROPOSING\n" << queryToString(query));
-
     current_zookeeper = getZooKeeper();
-    auto lock = createSimpleZooKeeperLock(current_zookeeper, zookeeper_path, "propose_lock", replica_name);
 
-    while (!lock->tryLock()) {
-        // TODO it seems that zk lock doesn't work at all
-        // need to find a different solution for proposal
-        pcg64 rng(randomSeed());
-        std::this_thread::sleep_for(std::chrono::milliseconds(std::uniform_int_distribution<int>(0, 1000)(rng)));
-    }
+    LOG_DEBUG(log, "PROPOSINGGG query: " << queryToString(query));
+    current_zookeeper->create(zookeeper_path + "/log/log-", queryToString(query), zkutil::CreateMode::PersistentSequential);
 
-    // schedule and deactive combo 
-    // ensures that replica is up to date
-    // and since propose lock is acquired,
-    // no other propose can happen from
-    // different replicas during this call
     background_log_executor->schedule();
-    background_log_executor->deactivate();
-
-//    if (current_log_entry_n > 5) { // make a settings variable
-//        // TODO check that all the replicas are up to date!
-//        updateSnapshot();
-//        current_log_entry_n = 0;
-//        current_zookeeper->removeChildren(zookeeper_path + "/log");
-//    }
-
-    current_log_entry_n++; // starting from 1
-    String log_entry = zookeeper_path + "/log/log." + std::to_string(current_log_entry_n);
-    current_zookeeper->createOrUpdate(log_entry, queryToString(query), zkutil::CreateMode::Persistent);
-
-    current_zookeeper->createOrUpdate(zookeeper_path + "/last_entry", std::to_string(current_log_entry_n), zkutil::CreateMode::Persistent);
-
-    lock->unlock();
-    saveState();
-    background_log_executor->activateAndSchedule();
 }
 
 void DatabaseReplicated::updateSnapshot() {
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index e81b78386f7..19a0ea09e11 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -59,6 +59,8 @@ private:
 
     std::atomic<size_t> current_log_entry_n = 0;
 
+    String last_executed_log_entry = "";
+
     BackgroundSchedulePool::TaskHolder background_log_executor;
 
     String replica_path;
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index 134531d0cf0..6b4bcdde067 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -52,6 +52,7 @@ BlockIO InterpreterAlterQuery::execute()
     DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
     if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY && !table->supportsReplication()) {
         database->propose(query_ptr);
+        return {};
     }
 
     /// Add default database to table identifiers that we can encounter in e.g. default expressions,
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 0b06fbfd874..6806679cb4d 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -688,6 +688,11 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
         return true;
     }
 
+    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
+        database->propose(query_ptr);
+        return true;
+    }
+
     StoragePtr res;
     /// NOTE: CREATE query may be rewritten by Storage creator or table function
     if (create.as_table_function)
@@ -707,11 +712,6 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
             properties.constraints,
             false);
     }
-
-    
-    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
-        database->propose(query_ptr);
-    }
     database->createTable(context, table_name, res, query_ptr);
 
     /// We must call "startup" and "shutdown" while holding DDLGuard.
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index afbf5d31fbf..05418f275a2 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -99,8 +99,9 @@ BlockIO InterpreterDropQuery::executeToTable(
             /// Drop table from memory, don't touch data and metadata
             if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
                 database->propose(query_ptr);
+            } else {
+                database->detachTable(table_id.table_name);
             }
-            database->detachTable(table_id.table_name);
         }
         else if (query.kind == ASTDropQuery::Kind::Truncate)
         {
@@ -113,8 +114,9 @@ BlockIO InterpreterDropQuery::executeToTable(
             auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
             if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
                 database->propose(query_ptr);
+            } else {
+                table->truncate(query_ptr, metadata_snapshot, context, table_lock);
             }
-            table->truncate(query_ptr, metadata_snapshot, context, table_lock);
         }
         else if (query.kind == ASTDropQuery::Kind::Drop)
         {
@@ -129,8 +131,9 @@ BlockIO InterpreterDropQuery::executeToTable(
 
             if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
                 database->propose(query_ptr);
+            } else {
+                database->dropTable(context, table_id.table_name, query.no_delay);
             }
-            database->dropTable(context, table_id.table_name, query.no_delay);
         }
     }
 
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index 45003ab0d14..97206f6b364 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -83,15 +83,15 @@ BlockIO InterpreterRenameQuery::execute()
         DatabasePtr database = database_catalog.getDatabase(elem.from_database_name);
         if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
             database->propose(query_ptr);
+        } else {
+            database->renameTable(
+                context,
+                elem.from_table_name,
+                *database_catalog.getDatabase(elem.to_database_name),
+                elem.to_table_name,
+                rename.exchange);
         }
-        database->renameTable(
-            context,
-            elem.from_table_name,
-            *database_catalog.getDatabase(elem.to_database_name),
-            elem.to_table_name,
-            rename.exchange);
     }
-
     return {};
 }
 

From f6de720f59e8bc8619fbf8684e6d80e8459ba432 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sun, 7 Jun 2020 14:26:42 +0300
Subject: [PATCH 0034/1238] speed up db replicated test

---
 tests/integration/test_replicated_database/test.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 703690a7218..95ca5c1e138 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -49,7 +49,7 @@ def test_simple_alter_table(started_cluster):
     assert node1.query("desc table testdb.alter_test") == node2.query("desc table testdb.alter_test")
 
 def test_create_replica_after_delay(started_cluster):
-    DURATION_SECONDS = 3
+    DURATION_SECONDS = 2
 
     node3.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', '{replica}');")
 
@@ -65,18 +65,20 @@ def test_alters_from_different_replicas(started_cluster):
     DURATION_SECONDS = 1
 
     node1.query("CREATE TABLE testdb.concurrent_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
+
     time.sleep(DURATION_SECONDS)
 
     node3.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added0 UInt32;")
-    time.sleep(DURATION_SECONDS)
     node1.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added2 UInt32;")
-    time.sleep(DURATION_SECONDS)
     node3.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added1 UInt32 AFTER Added0;")
-    time.sleep(DURATION_SECONDS)
     node1.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;")
-    time.sleep(DURATION_SECONDS)
     node3.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;")
-    time.sleep(DURATION_SECONDS)
     node1.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
+
     time.sleep(DURATION_SECONDS)
+
+    logging.info("NODE3")
+    logging.info(node3.query("desc table testdb.concurrent_test"))
+    logging.info("NODE1")
+    logging.info(node1.query("desc table testdb.concurrent_test"))
     assert node3.query("desc table testdb.concurrent_test") == node1.query("desc table testdb.concurrent_test")

From e8e4e4d21c559fc3548d791dea65aa7871e8d19f Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sat, 20 Jun 2020 18:38:20 +0300
Subject: [PATCH 0035/1238] add tests for db replicated

---
 .../configs/disable_snapshots.xml             |  3 ++
 .../configs/snapshot_each_query.xml           |  3 ++
 .../test_replicated_database/test.py          | 40 ++++++++++++-------
 3 files changed, 31 insertions(+), 15 deletions(-)
 create mode 100644 tests/integration/test_replicated_database/configs/disable_snapshots.xml
 create mode 100644 tests/integration/test_replicated_database/configs/snapshot_each_query.xml

diff --git a/tests/integration/test_replicated_database/configs/disable_snapshots.xml b/tests/integration/test_replicated_database/configs/disable_snapshots.xml
new file mode 100644
index 00000000000..9a656bdcea1
--- /dev/null
+++ b/tests/integration/test_replicated_database/configs/disable_snapshots.xml
@@ -0,0 +1,3 @@
+<yandex>
+    <database_replicated_snapshot_period>0</database_replicated_snapshot_period>
+</yandex>
diff --git a/tests/integration/test_replicated_database/configs/snapshot_each_query.xml b/tests/integration/test_replicated_database/configs/snapshot_each_query.xml
new file mode 100644
index 00000000000..6eae1d9d992
--- /dev/null
+++ b/tests/integration/test_replicated_database/configs/snapshot_each_query.xml
@@ -0,0 +1,3 @@
+<yandex>
+    <database_replicated_snapshot_period>1</database_replicated_snapshot_period>
+</yandex>
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 95ca5c1e138..b557354b6ba 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -10,18 +10,16 @@ logging.getLogger().addHandler(logging.StreamHandler())
 
 cluster = ClickHouseCluster(__file__)
 
-node1 = cluster.add_instance('node1', macros={'replica': 'test1'}, with_zookeeper=True)
-node2 = cluster.add_instance('node2', macros={'replica': 'test2'}, with_zookeeper=True)
-node3 = cluster.add_instance('node3', macros={'replica': 'test3'}, with_zookeeper=True)
+node1 = cluster.add_instance('node1', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True, stay_alive=True)
+node2 = cluster.add_instance('node2', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True)
+node3 = cluster.add_instance('node3', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True)
 
 @pytest.fixture(scope="module")
 def started_cluster():
     try:
         cluster.start()
-
-        for node in [node1, node2]:
-            node.query("DROP DATABASE IF EXISTS testdb")
-            node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', '{replica}');")
+        node1.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica1');")
+        node2.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica2');")
         yield cluster
 
     finally:
@@ -49,15 +47,13 @@ def test_simple_alter_table(started_cluster):
     assert node1.query("desc table testdb.alter_test") == node2.query("desc table testdb.alter_test")
 
 def test_create_replica_after_delay(started_cluster):
-    DURATION_SECONDS = 2
-
-    node3.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', '{replica}');")
+    node3.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica3');")
 
     node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added3 UInt32 ;")
     node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added4 UInt32 ;")
     node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added5 UInt32 ;")
 
-    time.sleep(DURATION_SECONDS)
+    time.sleep(6)
 
     assert node3.query("desc table testdb.alter_test") == node1.query("desc table testdb.alter_test")
 
@@ -77,8 +73,22 @@ def test_alters_from_different_replicas(started_cluster):
 
     time.sleep(DURATION_SECONDS)
 
-    logging.info("NODE3")
-    logging.info(node3.query("desc table testdb.concurrent_test"))
-    logging.info("NODE1")
-    logging.info(node1.query("desc table testdb.concurrent_test"))
     assert node3.query("desc table testdb.concurrent_test") == node1.query("desc table testdb.concurrent_test")
+
+def test_drop_and_create_table(started_cluster):
+    node1.query("DROP TABLE testdb.concurrent_test")
+    node1.query("CREATE TABLE testdb.concurrent_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
+    time.sleep(5)
+    assert node3.query("desc table testdb.concurrent_test") == node1.query("desc table testdb.concurrent_test")
+
+def test_replica_restart(started_cluster):
+    node1.restart_clickhouse()
+    time.sleep(5)
+    assert node3.query("desc table testdb.concurrent_test") == node1.query("desc table testdb.concurrent_test")
+
+#def test_drop_and_create_replica(started_cluster):
+#    node1.query("DROP DATABASE testdb")
+#    node1.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica1');")
+#    time.sleep(6)
+#    assert node3.query("desc table testdb.concurrent_test") == node1.query("desc table testdb.concurrent_test")
+

From f57fd52e3b564072d7c2ae61ecaf06138c4201ed Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sat, 20 Jun 2020 18:39:05 +0300
Subject: [PATCH 0036/1238] fix recursive propose for drop database db
 replicated query

---
 src/Interpreters/InterpreterDropQuery.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index 05418f275a2..368024da043 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -129,7 +129,8 @@ BlockIO InterpreterDropQuery::executeToTable(
             if (database->getEngineName() != "Atomic" && database->getEngineName() != "Replicated")
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
 
-            if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
+            // Prevents recursive drop from drop database query. The original query must specify a table.
+            if (!query_ptr->as<ASTDropQuery &>().table.empty() && database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
                 database->propose(query_ptr);
             } else {
                 database->dropTable(context, table_id.table_name, query.no_delay);

From 4fc4b1d195bce04dfd08252eb6c0e3f58d0182f9 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sat, 20 Jun 2020 18:39:58 +0300
Subject: [PATCH 0037/1238] db replicated minor enhancements

---
 src/Databases/DatabaseAtomic.cpp     |   7 ++
 src/Databases/DatabaseAtomic.h       |   1 +
 src/Databases/DatabaseReplicated.cpp | 176 +++++++++++++++++++--------
 src/Databases/DatabaseReplicated.h   |  16 +--
 src/Databases/DatabasesCommon.cpp    |   4 +-
 5 files changed, 142 insertions(+), 62 deletions(-)

diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index ff30b95d139..85f6c70a07c 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -40,6 +40,13 @@ DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, Context & co
     Poco::File(path_to_table_symlinks).createDirectories();
 }
 
+DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, const String & data_path_, const String & logger, Context & context_)
+    : DatabaseOrdinary(name_, std::move(metadata_path_), data_path_, logger, context_)
+    , path_to_table_symlinks(context_.getPath() + "data/" + escapeForFileName(name_) + "/")
+{
+    Poco::File(path_to_table_symlinks).createDirectories();
+}
+
 String DatabaseAtomic::getTableDataPath(const String & table_name) const
 {
     std::lock_guard lock(mutex);
diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h
index 71428fdb420..88a77da53a4 100644
--- a/src/Databases/DatabaseAtomic.h
+++ b/src/Databases/DatabaseAtomic.h
@@ -22,6 +22,7 @@ class DatabaseAtomic : public DatabaseOrdinary
 public:
 
     DatabaseAtomic(String name_, String metadata_path_, Context & context_);
+    DatabaseAtomic(String name_, String metadata_path_, const String & data_path_, const String & logger, Context & context_);
 
     String getEngineName() const override { return "Atomic"; }
 
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 2650bd46a58..4d16a5d05c0 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -49,6 +49,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NO_ZOOKEEPER;
+    extern const int FILE_DOESNT_EXIST;
 }
 
 void DatabaseReplicated::setZooKeeper(zkutil::ZooKeeperPtr zookeeper)
@@ -78,9 +79,7 @@ DatabaseReplicated::DatabaseReplicated(
     const String & zookeeper_path_,
     const String & replica_name_,
     Context & context_)
-//    : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseReplicated (" + name_ + ")", context_)
-    // TODO add constructor to Atomic and call it here with path and logger name specification
-    : DatabaseAtomic(name_, metadata_path_, context_)
+    : DatabaseAtomic(name_, metadata_path_, "store/", "DatabaseReplicated (" + name_ + ")", context_)
     , zookeeper_path(zookeeper_path_)
     , replica_name(replica_name_)
 {
@@ -90,8 +89,6 @@ DatabaseReplicated::DatabaseReplicated(
     if (!zookeeper_path.empty() && zookeeper_path.front() != '/')
         zookeeper_path = "/" + zookeeper_path;
 
-    replica_path = zookeeper_path + "/replicas/" + replica_name;
-
     if (context_.hasZooKeeper()) {
         current_zookeeper = context_.getZooKeeper();
     }
@@ -100,37 +97,101 @@ DatabaseReplicated::DatabaseReplicated(
             throw Exception("Can't create replicated database without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
     }
 
+    // New database
     if (!current_zookeeper->exists(zookeeper_path, {}, NULL)) {
         createDatabaseZKNodes();
-    } 
+    // Old replica recovery
+    } else if (current_zookeeper->exists(zookeeper_path + "/replicas/" + replica_name, {}, NULL)) {
+        String local_last_entry;
+        try
+        {
+            ReadBufferFromFile in(getMetadataPath() + ".last_entry", 16);
+            readStringUntilEOF(local_last_entry, in);
+        }
+        catch (const Exception & e)
+        {
+            if (e.code() == ErrorCodes::FILE_DOESNT_EXIST) {
+                // that is risky cause 
+                // if replica name is the same
+                // than the last one wins
+                saveState();
+            } else {
+                throw;
+            }
+        }
 
-    // replica
-    if (!current_zookeeper->exists(replica_path, {}, NULL)) {
-        current_zookeeper->createAncestors(replica_path);
-        current_zookeeper->createOrUpdate(replica_path, String(), zkutil::CreateMode::Persistent);
+        String remote_last_entry = current_zookeeper->get(zookeeper_path + "/replicas/" + replica_name, {}, NULL);
+        if (local_last_entry == remote_last_entry) {
+            last_executed_log_entry = local_last_entry;
+        } else {
+            LOG_DEBUG(log, "LOCAL: " << local_last_entry);
+            LOG_DEBUG(log, "ZK: " << remote_last_entry);
+            throw Exception("Can't create replicated database MISCONFIGURATION or something", ErrorCodes::NO_ZOOKEEPER);
+        }
     }
 
-    //loadMetadataFromSnapshot();
+    snapshot_period = context_.getConfigRef().getInt("database_replicated_snapshot_period", 10);
+    LOG_DEBUG(log, "Snapshot period is set to " << snapshot_period);
 
-    background_log_executor = global_context.getReplicatedSchedulePool().createTask(database_name + "(DatabaseReplicated::the_threeeed)", [this]{ runBackgroundLogExecutor();} );
-    background_log_executor->schedule();
+    background_log_executor = global_context.getReplicatedSchedulePool().createTask(database_name + "(DatabaseReplicated::background_executor)", [this]{ runBackgroundLogExecutor();} );
+
+    background_log_executor->scheduleAfter(500);
 }
 
 void DatabaseReplicated::createDatabaseZKNodes() {
     current_zookeeper = getZooKeeper();
 
-    if (current_zookeeper->exists(zookeeper_path))
-        return;
-
     current_zookeeper->createAncestors(zookeeper_path);
 
     current_zookeeper->createIfNotExists(zookeeper_path, String());
-    current_zookeeper->createIfNotExists(zookeeper_path + "/last_entry", "0");
     current_zookeeper->createIfNotExists(zookeeper_path + "/log", String());
-    current_zookeeper->createIfNotExists(zookeeper_path + "/snapshot", String());
+    current_zookeeper->createIfNotExists(zookeeper_path + "/snapshots", String());
+    current_zookeeper->createIfNotExists(zookeeper_path + "/replicas", String());
+}
+
+void DatabaseReplicated::RemoveOutdatedSnapshotsAndLog() {
+    // This method removes all snapshots and logged queries
+    // that no longer will be in use by current replicas or
+    // new coming ones.
+    // Each registered replica has its state in ZooKeeper.
+    // Therefore removed snapshots and logged queries are less
+    // than a least advanced replica.
+    // It does not interfere with a new coming replica
+    // metadata loading from snapshot
+    // because the replica will use the last snapshot available
+    // and this snapshot will set the last executed log query
+    // to a greater one than the least advanced current replica.
+    current_zookeeper = getZooKeeper();
+    Strings replica_states = current_zookeeper->getChildren(zookeeper_path + "/replicas");
+    auto least_advanced = std::min_element(replica_states.begin(), replica_states.end());
+    Strings snapshots = current_zookeeper->getChildren(zookeeper_path + "/snapshots");
+    
+    if (snapshots.size() < 2) {
+        return;
+    }
+
+    std::sort(snapshots.begin(), snapshots.end());
+    auto still_useful = std::lower_bound(snapshots.begin(), snapshots.end(), *least_advanced);
+    snapshots.erase(still_useful, snapshots.end());
+    for (const String & snapshot : snapshots) {
+        current_zookeeper->tryRemoveRecursive(zookeeper_path + "/snapshots/" + snapshot);
+    }
+
+    Strings log_entry_names = current_zookeeper->getChildren(zookeeper_path + "/log");
+    std::sort(log_entry_names.begin(), log_entry_names.end());
+    auto still_useful_log = std::upper_bound(log_entry_names.begin(), log_entry_names.end(), *still_useful);
+    log_entry_names.erase(still_useful_log, log_entry_names.end());
+    for (const String & log_entry_name : log_entry_names) {
+        String log_entry_path = zookeeper_path + "/log/" + log_entry_name;
+        current_zookeeper->tryRemove(log_entry_path);
+    }
 }
 
 void DatabaseReplicated::runBackgroundLogExecutor() {
+    if (last_executed_log_entry == "") {
+        loadMetadataFromSnapshot();
+    }
+
     current_zookeeper = getZooKeeper();
     Strings log_entry_names = current_zookeeper->getChildren(zookeeper_path + "/log");
 
@@ -143,34 +204,27 @@ void DatabaseReplicated::runBackgroundLogExecutor() {
         String log_entry_path = zookeeper_path + "/log/" + log_entry_name;
         executeFromZK(log_entry_path);
         last_executed_log_entry = log_entry_name;
+        saveState();
+
+        int log_n = parse<int>(log_entry_name.substr(4));
+        int last_log_n = parse<int>(log_entry_names.back().substr(4));
+
+        // The third condition gurantees at most one snapshot per batch
+        if (log_n > 0 && snapshot_period > 0 && (last_log_n - log_n) / snapshot_period == 0 && log_n % snapshot_period == 0) {
+            createSnapshot();
+        }
     }
 
     background_log_executor->scheduleAfter(500);
-
-    // String last_n = current_zookeeper->get(zookeeper_path + "/last_entry", {}, NULL);
-    // size_t last_n_parsed = parse<size_t>(last_n);
-
-    // bool newEntries = current_log_entry_n < last_n_parsed;
-    // while (current_log_entry_n < last_n_parsed) {
-    //     current_log_entry_n++;
-    //     String log_path = zookeeper_path + "/log/log." + std::to_string(current_log_entry_n);
-    //     executeFromZK(log_path);
-    // }
-    // if (newEntries) {
-    //     saveState();
-    // }
-    // background_log_executor->scheduleAfter(500);
 }
 
 void DatabaseReplicated::saveState() {
-    String state = std::to_string(current_log_entry_n);
-
     current_zookeeper = getZooKeeper();
-    current_zookeeper->createOrUpdate(replica_path + "/last_entry", state, zkutil::CreateMode::Persistent);
+    current_zookeeper->createOrUpdate(zookeeper_path + "/replicas/" + replica_name, last_executed_log_entry, zkutil::CreateMode::Persistent);
 
     String metadata_file = getMetadataPath() + ".last_entry";
-    WriteBufferFromFile out(metadata_file, state.size(), O_WRONLY | O_CREAT);
-    writeString(state, out);
+    WriteBufferFromFile out(metadata_file, last_executed_log_entry.size(), O_WRONLY | O_CREAT);
+    writeString(last_executed_log_entry, out);
     out.next();
     if (global_context.getSettingsRef().fsync_metadata)
         out.sync();
@@ -201,47 +255,63 @@ void DatabaseReplicated::executeFromZK(String & path) {
         LOG_DEBUG(log, "Executed query: " << query_to_execute);
 }
 
-// TODO Move to ZooKeeper/Lock and remove it from here and ddlworker
-// static std::unique_ptr<zkutil::Lock> createSimpleZooKeeperLock(
-//     const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & lock_prefix, const String & lock_name, const String & lock_message)
-// {
-//     auto zookeeper_holder = std::make_shared<zkutil::ZooKeeperHolder>();
-//     zookeeper_holder->initFromInstance(zookeeper);
-//     return std::make_unique<zkutil::Lock>(std::move(zookeeper_holder), lock_prefix, lock_name, lock_message);
-// }
-
-
 void DatabaseReplicated::propose(const ASTPtr & query) {
     current_zookeeper = getZooKeeper();
 
-    LOG_DEBUG(log, "PROPOSINGGG query: " << queryToString(query));
+    LOG_DEBUG(log, "Writing the query to log: " << queryToString(query));
     current_zookeeper->create(zookeeper_path + "/log/log-", queryToString(query), zkutil::CreateMode::PersistentSequential);
 
     background_log_executor->schedule();
 }
 
-void DatabaseReplicated::updateSnapshot() {
+void DatabaseReplicated::createSnapshot() {
     current_zookeeper = getZooKeeper();
-    current_zookeeper->tryRemoveChildren(zookeeper_path + "/snapshot");
+    String snapshot_path = zookeeper_path + "/snapshots/" + last_executed_log_entry;
+
+    if (Coordination::ZNODEEXISTS == current_zookeeper->tryCreate(snapshot_path, String(), zkutil::CreateMode::Persistent)) {
+        return;
+    }
+    
     for (auto iterator = getTablesIterator({}); iterator->isValid(); iterator->next()) {
         String table_name = iterator->name();
         auto query = getCreateQueryFromMetadata(getObjectMetadataPath(table_name), true);
         String statement = queryToString(query);
-        current_zookeeper->createOrUpdate(zookeeper_path + "/snapshot/" + table_name, statement, zkutil::CreateMode::Persistent);
+        current_zookeeper->createOrUpdate(snapshot_path + "/" + table_name, statement, zkutil::CreateMode::Persistent);
     }
+
+    RemoveOutdatedSnapshotsAndLog();
 }
 
 void DatabaseReplicated::loadMetadataFromSnapshot() {
     current_zookeeper = getZooKeeper();
 
+    Strings snapshots;
+    if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshots", snapshots) != Coordination::ZOK)
+        return;
+
+    if (snapshots.size() < 1) {
+        return;
+    }
+
+    auto latest_snapshot = std::max_element(snapshots.begin(), snapshots.end());
     Strings metadatas;
-    if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshot", metadatas) != Coordination::ZOK)
+    if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshots/" + *latest_snapshot, metadatas) != Coordination::ZOK)
         return;
 
     for (auto t = metadatas.begin(); t != metadatas.end(); ++t) {
-        String path = zookeeper_path + "/snapshot/" + *t;
+        String path = zookeeper_path + "/snapshots/" + *latest_snapshot + "/" + *t;
         executeFromZK(path);
     }
+
+    last_executed_log_entry = *latest_snapshot;
+    saveState();
+}
+
+void DatabaseReplicated::drop(const Context & context_)
+{
+    current_zookeeper = getZooKeeper();
+    current_zookeeper->tryRemove(zookeeper_path + "/replicas/" + replica_name);
+    DatabaseAtomic::drop(context_);
 }
 
 }
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 19a0ea09e11..471365361b7 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -23,13 +23,13 @@ namespace DB
   * The engine has two parameters ZooKeeper path and 
   * replica name.
   * The same ZooKeeper path corresponds to the same
-  * database. Replica names must be different for all replicas
+  * database. Replica names MUST be different for all replicas
   * of the same database.
   *
   * Using this engine, creation of Replicated tables
   * requires no ZooKeeper path and replica name parameters.
   * Table's replica name is the same as database replica name.
-  * Table's ZooKeeper path is a concatenation of database's
+  * Table's ZooKeeper path is a concatenation of database
   * ZooKeeper path, /tables/, and UUID of the table.
   */
 class DatabaseReplicated : public DatabaseAtomic
@@ -37,6 +37,8 @@ class DatabaseReplicated : public DatabaseAtomic
 public:
     DatabaseReplicated(const String & name_, const String & metadata_path_, const String & zookeeper_path_, const String & replica_name_, Context & context);
 
+    void drop(const Context & /*context*/) override;
+
     String getEngineName() const override { return "Replicated"; }
 
     void propose(const ASTPtr & query) override;
@@ -48,23 +50,23 @@ private:
     void createDatabaseZKNodes();
 
     void runBackgroundLogExecutor();
-
+    
     void executeFromZK(String & path);
 
     void saveState();
-    void updateSnapshot();
+
     void loadMetadataFromSnapshot();
+    void createSnapshot();
+    void RemoveOutdatedSnapshotsAndLog();
 
     std::unique_ptr<Context> current_context; // to run executeQuery
 
-    std::atomic<size_t> current_log_entry_n = 0;
+    int snapshot_period;
 
     String last_executed_log_entry = "";
 
     BackgroundSchedulePool::TaskHolder background_log_executor;
 
-    String replica_path;
-
     zkutil::ZooKeeperPtr current_zookeeper;        /// Use only the methods below.
     mutable std::mutex current_zookeeper_mutex;    /// To recreate the session in the background thread.
 
diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp
index 7925d812241..4575e6da953 100644
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@@ -78,7 +78,7 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n
     auto table_id = res->getStorageID();
     if (table_id.hasUUID())
     {
-        assert(getDatabaseName() == DatabaseCatalog::TEMPORARY_DATABASE || getEngineName() == "Atomic");
+        assert(getDatabaseName() == DatabaseCatalog::TEMPORARY_DATABASE || getEngineName() == "Atomic" || getEngineName() == "Replicated");
         DatabaseCatalog::instance().removeUUIDMapping(table_id.uuid);
     }
 
@@ -120,7 +120,7 @@ void DatabaseWithOwnTablesBase::shutdown()
         kv.second->shutdown();
         if (table_id.hasUUID())
         {
-            assert(getDatabaseName() == DatabaseCatalog::TEMPORARY_DATABASE || getEngineName() == "Atomic");
+            assert(getDatabaseName() == DatabaseCatalog::TEMPORARY_DATABASE || getEngineName() == "Atomic" || getEngineName() == "Replicated");
             DatabaseCatalog::instance().removeUUIDMapping(table_id.uuid);
         }
     }

From 82f5281cfe52ce4643ced3b4ad3f2c229b894014 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sun, 21 Jun 2020 18:03:04 +0300
Subject: [PATCH 0038/1238] remove redundant includes

---
 src/Databases/DatabaseReplicated.cpp | 28 ----------------------------
 src/Databases/DatabaseReplicated.h   |  4 ----
 2 files changed, 32 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 4d16a5d05c0..5a42edd9f0d 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1,46 +1,18 @@
-#include <iomanip>
-
-#include <Core/Settings.h>
-#include <Databases/DatabaseOnDisk.h>
-#include <Databases/DatabaseOrdinary.h>
 #include <Databases/DatabaseReplicated.h>
-#include <Databases/DatabasesCommon.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/InterpreterCreateQuery.h>
 #include <Interpreters/executeQuery.h>
-#include <Parsers/ASTCreateQuery.h>
-#include <Parsers/ASTSetQuery.h>
-#include <Parsers/ParserCreateQuery.h>
-#include <Parsers/formatAST.h>
-#include <Parsers/parseQuery.h>
-#include <Storages/StorageFactory.h>
-#include <TableFunctions/TableFunctionFactory.h>
-
 #include <Parsers/queryToString.h>
-
-#include <Poco/DirectoryIterator.h>
-#include <Poco/Event.h>
-#include <Common/Stopwatch.h>
-#include <Common/setThreadName.h>
-#include <Common/ThreadPool.h>
-#include <Common/escapeForFileName.h>
-#include <Common/quoteString.h>
-#include <Common/typeid_cast.h>
-#include <common/logger_useful.h>
 #include <Common/Exception.h>
-
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/ZooKeeper/Types.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/ZooKeeper/Lock.h>
 
-#include <ext/scope_guard.h>
-#include <common/sleep.h>
 
 namespace DB
 {
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 471365361b7..ab7b596eb4e 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -1,13 +1,9 @@
 #pragma once
 
 #include <Databases/DatabaseAtomic.h>
-#include <Common/randomSeed.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Core/BackgroundSchedulePool.h>
 
-#include <atomic>
-#include <thread>
-
 namespace DB
 {
 /** DatabaseReplicated engine

From 67588edcf5c5fea7e29958329b38b6d3db2b9d0f Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Mon, 22 Jun 2020 17:19:26 +0300
Subject: [PATCH 0039/1238] clean up db replicated files and add more tests

---
 src/Databases/DatabaseReplicated.cpp          | 39 +++++----
 src/Databases/DatabaseReplicated.h            |  2 +-
 .../test_replicated_database/test.py          | 81 ++++++++++---------
 3 files changed, 65 insertions(+), 57 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 5a42edd9f0d..6a137a2af0c 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -21,7 +21,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NO_ZOOKEEPER;
-    extern const int FILE_DOESNT_EXIST;
+    extern const int LOGICAL_ERROR;
 }
 
 void DatabaseReplicated::setZooKeeper(zkutil::ZooKeeperPtr zookeeper)
@@ -74,6 +74,8 @@ DatabaseReplicated::DatabaseReplicated(
         createDatabaseZKNodes();
     // Old replica recovery
     } else if (current_zookeeper->exists(zookeeper_path + "/replicas/" + replica_name, {}, NULL)) {
+        String remote_last_entry = current_zookeeper->get(zookeeper_path + "/replicas/" + replica_name, {}, NULL);
+
         String local_last_entry;
         try
         {
@@ -82,28 +84,21 @@ DatabaseReplicated::DatabaseReplicated(
         }
         catch (const Exception & e)
         {
-            if (e.code() == ErrorCodes::FILE_DOESNT_EXIST) {
-                // that is risky cause 
-                // if replica name is the same
-                // than the last one wins
-                saveState();
-            } else {
-                throw;
-            }
+                // Metadata is corrupted.
+                // Replica erases the previous zk last executed log entry
+                // and behaves like a new clean replica.
+                writeLastExecutedToDiskAndZK();
         }
 
-        String remote_last_entry = current_zookeeper->get(zookeeper_path + "/replicas/" + replica_name, {}, NULL);
-        if (local_last_entry == remote_last_entry) {
+        if (!local_last_entry.empty() && local_last_entry == remote_last_entry) {
             last_executed_log_entry = local_last_entry;
         } else {
-            LOG_DEBUG(log, "LOCAL: " << local_last_entry);
-            LOG_DEBUG(log, "ZK: " << remote_last_entry);
-            throw Exception("Can't create replicated database MISCONFIGURATION or something", ErrorCodes::NO_ZOOKEEPER);
+            throw Exception("Replica name might be in use by a different node. Please check replica_name parameter. Remove .last_entry file from metadata to create a new replica.", ErrorCodes::LOGICAL_ERROR);
         }
     }
 
     snapshot_period = context_.getConfigRef().getInt("database_replicated_snapshot_period", 10);
-    LOG_DEBUG(log, "Snapshot period is set to " << snapshot_period);
+    LOG_DEBUG(log, "Snapshot period is set to " << snapshot_period << " log entries per one snapshot");
 
     background_log_executor = global_context.getReplicatedSchedulePool().createTask(database_name + "(DatabaseReplicated::background_executor)", [this]{ runBackgroundLogExecutor();} );
 
@@ -176,12 +171,12 @@ void DatabaseReplicated::runBackgroundLogExecutor() {
         String log_entry_path = zookeeper_path + "/log/" + log_entry_name;
         executeFromZK(log_entry_path);
         last_executed_log_entry = log_entry_name;
-        saveState();
+        writeLastExecutedToDiskAndZK();
 
         int log_n = parse<int>(log_entry_name.substr(4));
         int last_log_n = parse<int>(log_entry_names.back().substr(4));
 
-        // The third condition gurantees at most one snapshot per batch
+        // The third condition gurantees at most one snapshot creation per batch
         if (log_n > 0 && snapshot_period > 0 && (last_log_n - log_n) / snapshot_period == 0 && log_n % snapshot_period == 0) {
             createSnapshot();
         }
@@ -190,7 +185,7 @@ void DatabaseReplicated::runBackgroundLogExecutor() {
     background_log_executor->scheduleAfter(500);
 }
 
-void DatabaseReplicated::saveState() {
+void DatabaseReplicated::writeLastExecutedToDiskAndZK() {
     current_zookeeper = getZooKeeper();
     current_zookeeper->createOrUpdate(zookeeper_path + "/replicas/" + replica_name, last_executed_log_entry, zkutil::CreateMode::Persistent);
 
@@ -230,7 +225,7 @@ void DatabaseReplicated::executeFromZK(String & path) {
 void DatabaseReplicated::propose(const ASTPtr & query) {
     current_zookeeper = getZooKeeper();
 
-    LOG_DEBUG(log, "Writing the query to log: " << queryToString(query));
+    LOG_DEBUG(log, "Proposing query: " << queryToString(query));
     current_zookeeper->create(zookeeper_path + "/log/log-", queryToString(query), zkutil::CreateMode::PersistentSequential);
 
     background_log_executor->schedule();
@@ -255,6 +250,8 @@ void DatabaseReplicated::createSnapshot() {
 }
 
 void DatabaseReplicated::loadMetadataFromSnapshot() {
+    // Executes the latest snapshot.
+    // Used by new replicas only.
     current_zookeeper = getZooKeeper();
 
     Strings snapshots;
@@ -270,13 +267,15 @@ void DatabaseReplicated::loadMetadataFromSnapshot() {
     if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshots/" + *latest_snapshot, metadatas) != Coordination::ZOK)
         return;
 
+    LOG_DEBUG(log, "Executing " << *latest_snapshot << " snapshot");
     for (auto t = metadatas.begin(); t != metadatas.end(); ++t) {
         String path = zookeeper_path + "/snapshots/" + *latest_snapshot + "/" + *t;
+
         executeFromZK(path);
     }
 
     last_executed_log_entry = *latest_snapshot;
-    saveState();
+    writeLastExecutedToDiskAndZK();
 }
 
 void DatabaseReplicated::drop(const Context & context_)
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index ab7b596eb4e..1cdcc3e990c 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -49,7 +49,7 @@ private:
     
     void executeFromZK(String & path);
 
-    void saveState();
+    void writeLastExecutedToDiskAndZK();
 
     void loadMetadataFromSnapshot();
     void createSnapshot();
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index b557354b6ba..0b7f8aadec2 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -10,16 +10,18 @@ logging.getLogger().addHandler(logging.StreamHandler())
 
 cluster = ClickHouseCluster(__file__)
 
-node1 = cluster.add_instance('node1', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True, stay_alive=True)
-node2 = cluster.add_instance('node2', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True)
-node3 = cluster.add_instance('node3', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True)
+main_node = cluster.add_instance('main_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True, stay_alive=True)
+dummy_node = cluster.add_instance('dummy_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True)
+competing_node = cluster.add_instance('competing_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True)
+snapshotting_node = cluster.add_instance('snapshotting_node', main_configs=['configs/snapshot_each_query.xml'], with_zookeeper=True)
+snapshot_recovering_node = cluster.add_instance('snapshot_recovering_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True)
 
 @pytest.fixture(scope="module")
 def started_cluster():
     try:
         cluster.start()
-        node1.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica1');")
-        node2.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica2');")
+        main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica1');")
+        dummy_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica2');")
         yield cluster
 
     finally:
@@ -28,67 +30,74 @@ def started_cluster():
 
 def test_create_replicated_table(started_cluster):
     DURATION_SECONDS = 1
-    node1.query("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree(d, k, 8192);")
+    main_node.query("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree(d, k, 8192);")
 
     time.sleep(DURATION_SECONDS)
-    assert node1.query("desc table testdb.replicated_table") == node2.query("desc table testdb.replicated_table")
+    assert main_node.query("desc table testdb.replicated_table") == dummy_node.query("desc table testdb.replicated_table")
 
 def test_simple_alter_table(started_cluster):
     DURATION_SECONDS = 1
-    node1.query("CREATE TABLE testdb.alter_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
-    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added0 UInt32;")
-    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added2 UInt32;")
-    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added1 UInt32 AFTER Added0;")
-    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;")
-    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;")
-    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
+    main_node.query("CREATE TABLE testdb.alter_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
+    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added0 UInt32;")
+    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added2 UInt32;")
+    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added1 UInt32 AFTER Added0;")
+    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;")
+    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;")
+    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
 
     time.sleep(DURATION_SECONDS)
-    assert node1.query("desc table testdb.alter_test") == node2.query("desc table testdb.alter_test")
+    assert main_node.query("desc table testdb.alter_test") == dummy_node.query("desc table testdb.alter_test")
 
 def test_create_replica_after_delay(started_cluster):
-    node3.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica3');")
+    competing_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica3');")
 
-    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added3 UInt32 ;")
-    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added4 UInt32 ;")
-    node1.query("ALTER TABLE testdb.alter_test ADD COLUMN Added5 UInt32 ;")
+    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added3 UInt32 ;")
+    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added4 UInt32 ;")
+    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added5 UInt32 ;")
 
     time.sleep(6)
 
-    assert node3.query("desc table testdb.alter_test") == node1.query("desc table testdb.alter_test")
+    assert competing_node.query("desc table testdb.alter_test") == main_node.query("desc table testdb.alter_test")
 
 def test_alters_from_different_replicas(started_cluster):
     DURATION_SECONDS = 1
 
-    node1.query("CREATE TABLE testdb.concurrent_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
+    main_node.query("CREATE TABLE testdb.concurrent_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
 
     time.sleep(DURATION_SECONDS)
 
-    node3.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added0 UInt32;")
-    node1.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added2 UInt32;")
-    node3.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added1 UInt32 AFTER Added0;")
-    node1.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;")
-    node3.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;")
-    node1.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
+    competing_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added0 UInt32;")
+    main_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added2 UInt32;")
+    competing_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added1 UInt32 AFTER Added0;")
+    main_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;")
+    competing_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;")
+    main_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
 
     time.sleep(DURATION_SECONDS)
 
-    assert node3.query("desc table testdb.concurrent_test") == node1.query("desc table testdb.concurrent_test")
+    assert competing_node.query("desc table testdb.concurrent_test") == main_node.query("desc table testdb.concurrent_test")
 
 def test_drop_and_create_table(started_cluster):
-    node1.query("DROP TABLE testdb.concurrent_test")
-    node1.query("CREATE TABLE testdb.concurrent_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
+    main_node.query("DROP TABLE testdb.concurrent_test")
+    main_node.query("CREATE TABLE testdb.concurrent_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
     time.sleep(5)
-    assert node3.query("desc table testdb.concurrent_test") == node1.query("desc table testdb.concurrent_test")
+    assert competing_node.query("desc table testdb.concurrent_test") == main_node.query("desc table testdb.concurrent_test")
 
 def test_replica_restart(started_cluster):
-    node1.restart_clickhouse()
+    main_node.restart_clickhouse()
     time.sleep(5)
-    assert node3.query("desc table testdb.concurrent_test") == node1.query("desc table testdb.concurrent_test")
+    assert competing_node.query("desc table testdb.concurrent_test") == main_node.query("desc table testdb.concurrent_test")
+
+def test_snapshot_and_snapshot_recover(started_cluster):
+    snapshotting_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica4');")
+    time.sleep(5)
+    snapshot_recovering_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica5');")
+    time.sleep(5)
+    assert snapshotting_node.query("desc table testdb.alter_test") == snapshot_recovering_node.query("desc table testdb.alter_test")
 
 #def test_drop_and_create_replica(started_cluster):
-#    node1.query("DROP DATABASE testdb")
-#    node1.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica1');")
+#    main_node.query("DROP DATABASE testdb")
+#    main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica1');")
 #    time.sleep(6)
-#    assert node3.query("desc table testdb.concurrent_test") == node1.query("desc table testdb.concurrent_test")
+#    assert competing_node.query("desc table testdb.concurrent_test") == main_node.query("desc table testdb.concurrent_test")
 

From 16e50e33d76f4c4e4ccd167f2354c41782fcf76a Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Mon, 22 Jun 2020 17:22:26 +0300
Subject: [PATCH 0040/1238] fix typo

---
 src/Databases/DatabaseReplicated.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 6a137a2af0c..bf974901e41 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -121,11 +121,11 @@ void DatabaseReplicated::RemoveOutdatedSnapshotsAndLog() {
     // that no longer will be in use by current replicas or
     // new coming ones.
     // Each registered replica has its state in ZooKeeper.
-    // Therefore removed snapshots and logged queries are less
-    // than a least advanced replica.
+    // Therefore, snapshots and logged queries that are less
+    // than a least advanced replica are removed.
     // It does not interfere with a new coming replica
     // metadata loading from snapshot
-    // because the replica will use the last snapshot available
+    // because the replica will use the latest snapshot available
     // and this snapshot will set the last executed log query
     // to a greater one than the least advanced current replica.
     current_zookeeper = getZooKeeper();

From d293e002a7251f58eee5601749169435d25136ba Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Wed, 24 Jun 2020 15:45:42 +0300
Subject: [PATCH 0041/1238] address pr comments

---
 src/Databases/DatabaseReplicated.cpp        | 24 +++++++++++++++------
 src/Interpreters/InterpreterCreateQuery.cpp |  2 +-
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index bf974901e41..adfd28f8914 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -22,6 +22,7 @@ namespace ErrorCodes
 {
     extern const int NO_ZOOKEEPER;
     extern const int LOGICAL_ERROR;
+    extern const int BAD_ARGUMENTS;
 }
 
 void DatabaseReplicated::setZooKeeper(zkutil::ZooKeeperPtr zookeeper)
@@ -55,10 +56,14 @@ DatabaseReplicated::DatabaseReplicated(
     , zookeeper_path(zookeeper_path_)
     , replica_name(replica_name_)
 {
-    if (!zookeeper_path.empty() && zookeeper_path.back() == '/')
+    if (zookeeper_path.empty() || replica_name.empty()) {
+        throw Exception("ZooKeeper path and replica name must be non-empty", ErrorCodes::BAD_ARGUMENTS);
+    }
+
+    if (zookeeper_path.back() == '/')
         zookeeper_path.resize(zookeeper_path.size() - 1);
     // If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
-    if (!zookeeper_path.empty() && zookeeper_path.front() != '/')
+    if (zookeeper_path.front() != '/')
         zookeeper_path = "/" + zookeeper_path;
 
     if (context_.hasZooKeeper()) {
@@ -70,10 +75,10 @@ DatabaseReplicated::DatabaseReplicated(
     }
 
     // New database
-    if (!current_zookeeper->exists(zookeeper_path, {}, NULL)) {
+    if (!current_zookeeper->exists(zookeeper_path)) {
         createDatabaseZKNodes();
     // Old replica recovery
-    } else if (current_zookeeper->exists(zookeeper_path + "/replicas/" + replica_name, {}, NULL)) {
+    } else if (current_zookeeper->exists(zookeeper_path + "/replicas/" + replica_name)) {
         String remote_last_entry = current_zookeeper->get(zookeeper_path + "/replicas/" + replica_name, {}, NULL);
 
         String local_last_entry;
@@ -243,8 +248,9 @@ void DatabaseReplicated::createSnapshot() {
         String table_name = iterator->name();
         auto query = getCreateQueryFromMetadata(getObjectMetadataPath(table_name), true);
         String statement = queryToString(query);
-        current_zookeeper->createOrUpdate(snapshot_path + "/" + table_name, statement, zkutil::CreateMode::Persistent);
+        current_zookeeper->createIfNotExists(snapshot_path + "/" + table_name, statement);
     }
+    current_zookeeper->createIfNotExists(snapshot_path + "/.completed", String());
 
     RemoveOutdatedSnapshotsAndLog();
 }
@@ -258,11 +264,17 @@ void DatabaseReplicated::loadMetadataFromSnapshot() {
     if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshots", snapshots) != Coordination::ZOK)
         return;
 
+    auto latest_snapshot = std::max_element(snapshots.begin(), snapshots.end());
+    while (snapshots.size() > 0 && !current_zookeeper->exists(zookeeper_path + "/snapshots/" + *latest_snapshot + "/.completed")) {
+        snapshots.erase(latest_snapshot);
+        latest_snapshot = std::max_element(snapshots.begin(), snapshots.end());
+    }
+
     if (snapshots.size() < 1) {
         return;
     }
 
-    auto latest_snapshot = std::max_element(snapshots.begin(), snapshots.end());
+
     Strings metadatas;
     if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshots/" + *latest_snapshot, metadatas) != Coordination::ZOK)
         return;
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 6806679cb4d..9d3abf2c8a6 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -640,7 +640,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
         }
         else if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind == ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
             if (create.uuid == UUIDHelpers::Nil)
-                throw Exception("Table UUID is not specified in DDL log", ErrorCodes::INCORRECT_QUERY);
+                throw Exception("Table UUID is not specified in DDL log", ErrorCodes::LOGICAL_ERROR);
         }
         else
         {

From 9635ea64bed93a587a147a21fbeda27cc08cf43d Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Wed, 24 Jun 2020 15:50:23 +0300
Subject: [PATCH 0042/1238] Add desc of propose idatabase method

---
 src/Databases/IDatabase.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index 5b3003f36b4..b80e73be108 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -161,6 +161,7 @@ public:
     /// Is the database empty.
     virtual bool empty() const = 0;
 
+    /// Submit query to log. Currently used by DatabaseReplicated engine only.
     virtual void propose(const ASTPtr & /*query*/) {
         throw Exception(getEngineName() + ": propose() is not supported", ErrorCodes::NOT_IMPLEMENTED);
     }

From dde293fc3d10470bbe65b5ef4f58a5c2cd2d851e Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Wed, 24 Jun 2020 16:37:29 +0300
Subject: [PATCH 0043/1238] check schema after alters in test

---
 .../test_replicated_database/test.py          | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 0b7f8aadec2..346114cb8c4 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -46,6 +46,28 @@ def test_simple_alter_table(started_cluster):
     main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
 
     time.sleep(DURATION_SECONDS)
+
+    schema = main_node.query("show create table testdb.alter_test")
+    fields = [
+        "`CounterID`",
+        "`StartDate`",
+        "`UserID`",
+        "`VisitID`",
+        "`NestedColumn.A`",
+        "`NestedColumn.S`",
+        "`ToDrop`",
+        "`Added0`",
+        "`Added1`",
+        "`Added2`",
+        "`AddedNested1.A`",
+        "`AddedNested1.B`",
+        "`AddedNested1.C`",
+        "`AddedNested2.A`",
+        "`AddedNested2.B`"]
+
+    for field in fields:
+        assert field in schema
+
     assert main_node.query("desc table testdb.alter_test") == dummy_node.query("desc table testdb.alter_test")
 
 def test_create_replica_after_delay(started_cluster):

From e23c7a313eaafa174b3e0404469c152c1ff08c00 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Fri, 26 Jun 2020 17:05:27 +0300
Subject: [PATCH 0044/1238] address pr comments

---
 src/Databases/DatabaseOnDisk.h       |  2 +-
 src/Databases/DatabaseReplicated.cpp | 15 ++++++++-------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h
index dc347c99542..00689900edf 100644
--- a/src/Databases/DatabaseOnDisk.h
+++ b/src/Databases/DatabaseOnDisk.h
@@ -86,7 +86,7 @@ protected:
 
     const String metadata_path;
     const String data_path;
-    Context & global_context;
+    const Context & global_context;
 };
 
 }
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index adfd28f8914..0ddc976d8d0 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -105,7 +105,7 @@ DatabaseReplicated::DatabaseReplicated(
     snapshot_period = context_.getConfigRef().getInt("database_replicated_snapshot_period", 10);
     LOG_DEBUG(log, "Snapshot period is set to " << snapshot_period << " log entries per one snapshot");
 
-    background_log_executor = global_context.getReplicatedSchedulePool().createTask(database_name + "(DatabaseReplicated::background_executor)", [this]{ runBackgroundLogExecutor();} );
+    background_log_executor = context_.getReplicatedSchedulePool().createTask(database_name + "(DatabaseReplicated::background_executor)", [this]{ runBackgroundLogExecutor();} );
 
     background_log_executor->scheduleAfter(500);
 }
@@ -206,9 +206,9 @@ void DatabaseReplicated::writeLastExecutedToDiskAndZK() {
 void DatabaseReplicated::executeFromZK(String & path) {
         current_zookeeper = getZooKeeper();
         String query_to_execute = current_zookeeper->get(path, {}, NULL);
-        ReadBufferFromString istr(query_to_execute);
-        String dummy_string;
-        WriteBufferFromString ostr(dummy_string);
+        //ReadBufferFromString istr(query_to_execute);
+        //String dummy_string;
+        //WriteBufferFromString ostr(dummy_string);
 
         try
         {
@@ -216,7 +216,8 @@ void DatabaseReplicated::executeFromZK(String & path) {
             current_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
             current_context->setCurrentDatabase(database_name);
             current_context->setCurrentQueryId(""); // generate random query_id
-            executeQuery(istr, ostr, false, *current_context, {});
+            //executeQuery(istr, ostr, false, *current_context, {});
+            executeQuery(query_to_execute, *current_context);
         }
         catch (...)
         {
@@ -248,9 +249,9 @@ void DatabaseReplicated::createSnapshot() {
         String table_name = iterator->name();
         auto query = getCreateQueryFromMetadata(getObjectMetadataPath(table_name), true);
         String statement = queryToString(query);
-        current_zookeeper->createIfNotExists(snapshot_path + "/" + table_name, statement);
+        current_zookeeper->create(snapshot_path + "/" + table_name, statement, zkutil::CreateMode::Persistent);
     }
-    current_zookeeper->createIfNotExists(snapshot_path + "/.completed", String());
+    current_zookeeper->create(snapshot_path + "/.completed", String(), zkutil::CreateMode::Persistent);
 
     RemoveOutdatedSnapshotsAndLog();
 }

From 8273248c4e3cc8431ee30b71729a9da369f54a7a Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sat, 27 Jun 2020 16:39:41 +0300
Subject: [PATCH 0045/1238] add log_name_to_exec to dbreplicated

---
 src/Databases/DatabaseFactory.cpp             |  5 +-
 src/Databases/DatabaseOnDisk.cpp              |  2 +-
 src/Databases/DatabaseOnDisk.h                |  1 -
 src/Databases/DatabaseOrdinary.cpp            |  2 +-
 src/Databases/DatabaseReplicated.cpp          | 47 ++++++++++++-------
 src/Databases/DatabaseReplicated.h            |  9 +++-
 src/Interpreters/InterpreterDropQuery.cpp     |  3 +-
 .../MergeTree/registerStorageMergeTree.cpp    |  8 ----
 tests/integration/runner                      |  4 +-
 9 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp
index 0d7a711b530..752eeba4e81 100644
--- a/src/Databases/DatabaseFactory.cpp
+++ b/src/Databases/DatabaseFactory.cpp
@@ -148,8 +148,9 @@ DatabasePtr DatabaseFactory::getImpl(
 
         const auto & arguments = engine->arguments->children;
 
-        const auto zoo_path = arguments[0]->as<ASTLiteral>()->value.safeGet<String>();
-        const auto replica_name = arguments[1]->as<ASTLiteral>()->value.safeGet<String>();
+        const auto & zoo_path = safeGetLiteralValue<String>(arguments[0], "Replicated");
+        const auto & replica_name  = safeGetLiteralValue<String>(arguments[1], "Replicated");
+
         return std::make_shared<DatabaseReplicated>(database_name, metadata_path, zoo_path, replica_name, context);
     }
 
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index 0a16b6eacff..6c72773fb69 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -127,7 +127,7 @@ DatabaseOnDisk::DatabaseOnDisk(
     const String & metadata_path_,
     const String & data_path_,
     const String & logger,
-    const Context & context)
+    Context & context)
     : DatabaseWithOwnTablesBase(name, logger, context)
     , metadata_path(metadata_path_)
     , data_path(data_path_)
diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h
index 00689900edf..4e7b2ab1709 100644
--- a/src/Databases/DatabaseOnDisk.h
+++ b/src/Databases/DatabaseOnDisk.h
@@ -86,7 +86,6 @@ protected:
 
     const String metadata_path;
     const String data_path;
-    const Context & global_context;
 };
 
 }
diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp
index 2f4f584b091..69fbbce8b7d 100644
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@@ -100,7 +100,7 @@ DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata
 }
 
 DatabaseOrdinary::DatabaseOrdinary(
-    const String & name_, const String & metadata_path_, const String & data_path_, const String & logger, const Context & context_)
+    const String & name_, const String & metadata_path_, const String & data_path_, const String & logger, Context & context_)
     : DatabaseWithDictionaries(name_, metadata_path_, data_path_, logger, context_)
 {
 }
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 0ddc976d8d0..47298996236 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -13,6 +13,8 @@
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/ZooKeeper/Lock.h>
 
+#include <common/sleep.h>
+
 
 namespace DB
 {
@@ -103,13 +105,15 @@ DatabaseReplicated::DatabaseReplicated(
     }
 
     snapshot_period = context_.getConfigRef().getInt("database_replicated_snapshot_period", 10);
-    LOG_DEBUG(log, "Snapshot period is set to " << snapshot_period << " log entries per one snapshot");
+    LOG_DEBUG(log, "Snapshot period is set to {} log entries per one snapshot", snapshot_period);
 
     background_log_executor = context_.getReplicatedSchedulePool().createTask(database_name + "(DatabaseReplicated::background_executor)", [this]{ runBackgroundLogExecutor();} );
 
     background_log_executor->scheduleAfter(500);
 }
 
+DatabaseReplicated::~DatabaseReplicated() = default;
+
 void DatabaseReplicated::createDatabaseZKNodes() {
     current_zookeeper = getZooKeeper();
 
@@ -174,7 +178,13 @@ void DatabaseReplicated::runBackgroundLogExecutor() {
 
     for (const String & log_entry_name : log_entry_names) {
         String log_entry_path = zookeeper_path + "/log/" + log_entry_name;
-        executeFromZK(log_entry_path);
+        bool yield = false;
+        {
+            std::lock_guard lock(log_name_mutex);
+            if (log_name_to_exec_with_result == log_entry_name)
+                yield = true;
+        }
+        executeFromZK(log_entry_path, yield);
         last_executed_log_entry = log_entry_name;
         writeLastExecutedToDiskAndZK();
 
@@ -203,12 +213,9 @@ void DatabaseReplicated::writeLastExecutedToDiskAndZK() {
     out.close();
 }
 
-void DatabaseReplicated::executeFromZK(String & path) {
+void DatabaseReplicated::executeFromZK(String & path, bool yield) {
         current_zookeeper = getZooKeeper();
         String query_to_execute = current_zookeeper->get(path, {}, NULL);
-        //ReadBufferFromString istr(query_to_execute);
-        //String dummy_string;
-        //WriteBufferFromString ostr(dummy_string);
 
         try
         {
@@ -216,23 +223,29 @@ void DatabaseReplicated::executeFromZK(String & path) {
             current_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
             current_context->setCurrentDatabase(database_name);
             current_context->setCurrentQueryId(""); // generate random query_id
-            //executeQuery(istr, ostr, false, *current_context, {});
             executeQuery(query_to_execute, *current_context);
         }
         catch (...)
         {
-            tryLogCurrentException(log, "Query from zookeeper " + query_to_execute + " wasn't finished successfully");
+            if (yield)
+                tryLogCurrentException(log, "Query from zookeeper " + query_to_execute + " wasn't finished successfully");
     
         }
 
-        LOG_DEBUG(log, "Executed query: " << query_to_execute);
+        std::lock_guard lock(log_name_mutex);
+        log_name_to_exec_with_result.clear();
+        LOG_DEBUG(log, "Executed query: {}", query_to_execute);
 }
 
 void DatabaseReplicated::propose(const ASTPtr & query) {
     current_zookeeper = getZooKeeper();
 
-    LOG_DEBUG(log, "Proposing query: " << queryToString(query));
-    current_zookeeper->create(zookeeper_path + "/log/log-", queryToString(query), zkutil::CreateMode::PersistentSequential);
+    LOG_DEBUG(log, "Proposing query: {}", queryToString(query));
+
+    {
+        std::lock_guard lock(log_name_mutex);
+        log_name_to_exec_with_result = current_zookeeper->create(zookeeper_path + "/log/log-", queryToString(query), zkutil::CreateMode::PersistentSequential);
+    }
 
     background_log_executor->schedule();
 }
@@ -241,11 +254,11 @@ void DatabaseReplicated::createSnapshot() {
     current_zookeeper = getZooKeeper();
     String snapshot_path = zookeeper_path + "/snapshots/" + last_executed_log_entry;
 
-    if (Coordination::ZNODEEXISTS == current_zookeeper->tryCreate(snapshot_path, String(), zkutil::CreateMode::Persistent)) {
+    if (Coordination::Error::ZNODEEXISTS == current_zookeeper->tryCreate(snapshot_path, String(), zkutil::CreateMode::Persistent)) {
         return;
     }
     
-    for (auto iterator = getTablesIterator({}); iterator->isValid(); iterator->next()) {
+    for (auto iterator = getTablesIterator(global_context, {}); iterator->isValid(); iterator->next()) {
         String table_name = iterator->name();
         auto query = getCreateQueryFromMetadata(getObjectMetadataPath(table_name), true);
         String statement = queryToString(query);
@@ -262,7 +275,7 @@ void DatabaseReplicated::loadMetadataFromSnapshot() {
     current_zookeeper = getZooKeeper();
 
     Strings snapshots;
-    if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshots", snapshots) != Coordination::ZOK)
+    if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshots", snapshots) != Coordination::Error::ZOK)
         return;
 
     auto latest_snapshot = std::max_element(snapshots.begin(), snapshots.end());
@@ -277,14 +290,14 @@ void DatabaseReplicated::loadMetadataFromSnapshot() {
 
 
     Strings metadatas;
-    if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshots/" + *latest_snapshot, metadatas) != Coordination::ZOK)
+    if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshots/" + *latest_snapshot, metadatas) != Coordination::Error::ZOK)
         return;
 
-    LOG_DEBUG(log, "Executing " << *latest_snapshot << " snapshot");
+    LOG_DEBUG(log, "Executing {} snapshot", *latest_snapshot);
     for (auto t = metadatas.begin(); t != metadatas.end(); ++t) {
         String path = zookeeper_path + "/snapshots/" + *latest_snapshot + "/" + *t;
 
-        executeFromZK(path);
+        executeFromZK(path, false);
     }
 
     last_executed_log_entry = *latest_snapshot;
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 1cdcc3e990c..2aa6c0d9a68 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -4,6 +4,7 @@
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Core/BackgroundSchedulePool.h>
 
+
 namespace DB
 {
 /** DatabaseReplicated engine
@@ -33,6 +34,8 @@ class DatabaseReplicated : public DatabaseAtomic
 public:
     DatabaseReplicated(const String & name_, const String & metadata_path_, const String & zookeeper_path_, const String & replica_name_, Context & context);
 
+    ~DatabaseReplicated();
+
     void drop(const Context & /*context*/) override;
 
     String getEngineName() const override { return "Replicated"; }
@@ -47,7 +50,7 @@ private:
 
     void runBackgroundLogExecutor();
     
-    void executeFromZK(String & path);
+    void executeFromZK(String & path, bool yield);
 
     void writeLastExecutedToDiskAndZK();
 
@@ -57,6 +60,10 @@ private:
 
     std::unique_ptr<Context> current_context; // to run executeQuery
 
+    //BlockIO execution_result;
+    std::mutex log_name_mutex;
+    String log_name_to_exec_with_result;
+
     int snapshot_period;
 
     String last_executed_log_entry = "";
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index 368024da043..8eef9059f69 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -93,7 +93,7 @@ BlockIO InterpreterDropQuery::executeToTable(
         {
             context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id);
             table->shutdown();
-            TableStructureWriteLockHolder table_lock;
+            TableExclusiveLockHolder table_lock;
             if (database->getEngineName() != "Atomic" && database->getEngineName() != "Replicated")
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
             /// Drop table from memory, don't touch data and metadata
@@ -111,7 +111,6 @@ BlockIO InterpreterDropQuery::executeToTable(
             auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
             auto metadata_snapshot = table->getInMemoryMetadataPtr();
             /// Drop table data, don't touch metadata
-            auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
             if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
                 database->propose(query_ptr);
             } else {
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index eb62c80cc49..9836cd2ee23 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -634,14 +634,6 @@ static StoragePtr create(const StorageFactory::Arguments & args)
         throw Exception("You must set the setting `allow_experimental_data_skipping_indices` to 1 " \
                         "before using data skipping indices.", ErrorCodes::BAD_ARGUMENTS);
 
-    StorageInMemoryMetadata metadata(args.columns, indices_description, args.constraints);
-    metadata.partition_by_ast = partition_by_ast;
-    metadata.order_by_ast = order_by_ast;
-    metadata.primary_key_ast = primary_key_ast;
-    metadata.ttl_for_table_ast = ttl_table_ast;
-    metadata.sample_by_ast = sample_by_ast;
-    metadata.settings_ast = settings_ast;
-
     if (replicatedStorage)
         return StorageReplicatedMergeTree::create(
             zookeeper_path, replica_name, args.attach, args.table_id, args.relative_data_path,
diff --git a/tests/integration/runner b/tests/integration/runner
index 399c87dcf06..058badcee66 100755
--- a/tests/integration/runner
+++ b/tests/integration/runner
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
 #-*- coding: utf-8 -*-
 import subprocess
 import os
@@ -105,7 +105,7 @@ if __name__ == "__main__":
         bridge_bin=args.bridge_binary,
         cfg=args.configs_dir,
         pth=args.clickhouse_root,
-        opts=' '.join(args.pytest_args),
+        opts='-vv ' + ' '.join(args.pytest_args),
         img=DIND_INTEGRATION_TESTS_IMAGE_NAME,
         name=CONTAINER_NAME,
         command=args.command

From 147fa9fed92c6b35061091971590e3243522bb84 Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sat, 4 Jul 2020 16:39:17 +0300
Subject: [PATCH 0046/1238] fix type error in zookeeper

---
 src/Common/ZooKeeper/ZooKeeper.cpp | 2 +-
 tests/integration/runner           | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index 541625149dd..e09533874e3 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -582,7 +582,7 @@ void ZooKeeper::removeChildren(const std::string & path)
 void ZooKeeper::tryRemoveChildren(const std::string & path)
 {
     Strings children;
-    if (tryGetChildren(path, children) != Coordination::ZOK)
+    if (tryGetChildren(path, children) != Coordination::Error::ZOK)
         return;
     while (!children.empty())
     {
diff --git a/tests/integration/runner b/tests/integration/runner
index 058badcee66..399c87dcf06 100755
--- a/tests/integration/runner
+++ b/tests/integration/runner
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 #-*- coding: utf-8 -*-
 import subprocess
 import os
@@ -105,7 +105,7 @@ if __name__ == "__main__":
         bridge_bin=args.bridge_binary,
         cfg=args.configs_dir,
         pth=args.clickhouse_root,
-        opts='-vv ' + ' '.join(args.pytest_args),
+        opts=' '.join(args.pytest_args),
         img=DIND_INTEGRATION_TESTS_IMAGE_NAME,
         name=CONTAINER_NAME,
         command=args.command

From e591fe501412cce7bf2c9105ba7b572cc3b89ddb Mon Sep 17 00:00:00 2001
From: Val <valeryxbaturin@gmail.com>
Date: Sat, 4 Jul 2020 19:32:23 +0300
Subject: [PATCH 0047/1238] database replicated feedback mechanism prototype

---
 src/Databases/DatabaseReplicated.cpp        | 77 ++++++++++++++++-----
 src/Databases/DatabaseReplicated.h          | 10 +--
 src/Interpreters/InterpreterAlterQuery.cpp  |  4 +-
 src/Interpreters/InterpreterCreateQuery.cpp | 11 ++-
 src/Interpreters/InterpreterDropQuery.cpp   |  6 ++
 src/Interpreters/InterpreterRenameQuery.cpp |  8 +++
 6 files changed, 92 insertions(+), 24 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 47298996236..fb64a005320 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -7,11 +7,13 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/executeQuery.h>
 #include <Parsers/queryToString.h>
+#include <Common/Stopwatch.h>
 #include <Common/Exception.h>
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/ZooKeeper/Types.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/ZooKeeper/Lock.h>
+#include <DataTypes/DataTypeString.h>
 
 #include <common/sleep.h>
 
@@ -105,6 +107,7 @@ DatabaseReplicated::DatabaseReplicated(
     }
 
     snapshot_period = context_.getConfigRef().getInt("database_replicated_snapshot_period", 10);
+    feedback_timeout = context_.getConfigRef().getInt("database_replicated_feedback_timeout", 0);
     LOG_DEBUG(log, "Snapshot period is set to {} log entries per one snapshot", snapshot_period);
 
     background_log_executor = context_.getReplicatedSchedulePool().createTask(database_name + "(DatabaseReplicated::background_executor)", [this]{ runBackgroundLogExecutor();} );
@@ -177,14 +180,7 @@ void DatabaseReplicated::runBackgroundLogExecutor() {
     log_entry_names.erase(log_entry_names.begin(), newest_entry_it);
 
     for (const String & log_entry_name : log_entry_names) {
-        String log_entry_path = zookeeper_path + "/log/" + log_entry_name;
-        bool yield = false;
-        {
-            std::lock_guard lock(log_name_mutex);
-            if (log_name_to_exec_with_result == log_entry_name)
-                yield = true;
-        }
-        executeFromZK(log_entry_path, yield);
+        executeLogName(log_entry_name);
         last_executed_log_entry = log_entry_name;
         writeLastExecutedToDiskAndZK();
 
@@ -213,7 +209,8 @@ void DatabaseReplicated::writeLastExecutedToDiskAndZK() {
     out.close();
 }
 
-void DatabaseReplicated::executeFromZK(String & path, bool yield) {
+void DatabaseReplicated::executeLogName(const String & log_entry_name) {
+        String path = zookeeper_path + "/log/" + log_entry_name;
         current_zookeeper = getZooKeeper();
         String query_to_execute = current_zookeeper->get(path, {}, NULL);
 
@@ -225,15 +222,12 @@ void DatabaseReplicated::executeFromZK(String & path, bool yield) {
             current_context->setCurrentQueryId(""); // generate random query_id
             executeQuery(query_to_execute, *current_context);
         }
-        catch (...)
+        catch (const Exception & e)
         {
-            if (yield)
-                tryLogCurrentException(log, "Query from zookeeper " + query_to_execute + " wasn't finished successfully");
-    
+            tryLogCurrentException(log, "Query from zookeeper " + query_to_execute + " wasn't finished successfully");
+            current_zookeeper->create(zookeeper_path + "/replicas/" + replica_name + "/errors/" + log_entry_name, e.what(), zkutil::CreateMode::Persistent);
         }
 
-        std::lock_guard lock(log_name_mutex);
-        log_name_to_exec_with_result.clear();
         LOG_DEBUG(log, "Executed query: {}", query_to_execute);
 }
 
@@ -250,6 +244,48 @@ void DatabaseReplicated::propose(const ASTPtr & query) {
     background_log_executor->schedule();
 }
 
+BlockIO DatabaseReplicated::getFeedback() {
+    BlockIO res;
+    if (feedback_timeout == 0)
+        return res;
+
+    Stopwatch watch;
+
+    NamesAndTypes block_structure = {
+        {"replica_name", std::make_shared<DataTypeString>()},
+        {"execution_feedback", std::make_shared<DataTypeString>()},
+    };
+    auto replica_name_column = block_structure[0].type->createColumn();
+    auto feedback_column = block_structure[1].type->createColumn();
+
+    current_zookeeper = getZooKeeper();
+    Strings replica_states = current_zookeeper->getChildren(zookeeper_path + "/replicas");
+    auto replica_iter = replica_states.begin();
+
+    while (!replica_states.empty() && watch.elapsedSeconds() < feedback_timeout) {
+        String last_executed = current_zookeeper->get(zookeeper_path + "/replicas/" + *replica_iter);
+        if (last_executed > log_name_to_exec_with_result) {
+            replica_name_column->insert(*replica_iter);
+            String err_path = zookeeper_path + "/replicas/" + *replica_iter + "/errors/" + log_name_to_exec_with_result;
+            if (!current_zookeeper->exists(err_path)) {
+                feedback_column->insert("OK");
+            } else {
+                String feedback = current_zookeeper->get(err_path, {}, NULL);
+                feedback_column->insert(feedback);
+            }
+        replica_states.erase(replica_iter);
+        replica_iter = replica_states.begin();
+        }
+    }
+
+    Block block = Block({
+        {std::move(replica_name_column), block_structure[0].type, block_structure[0].name},
+        {std::move(feedback_column), block_structure[1].type, block_structure[1].name}});
+
+    res.in = std::make_shared<OneBlockInputStream>(block);
+    return res;
+}
+
 void DatabaseReplicated::createSnapshot() {
     current_zookeeper = getZooKeeper();
     String snapshot_path = zookeeper_path + "/snapshots/" + last_executed_log_entry;
@@ -288,16 +324,23 @@ void DatabaseReplicated::loadMetadataFromSnapshot() {
         return;
     }
 
-
     Strings metadatas;
     if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshots/" + *latest_snapshot, metadatas) != Coordination::Error::ZOK)
         return;
 
     LOG_DEBUG(log, "Executing {} snapshot", *latest_snapshot);
+
     for (auto t = metadatas.begin(); t != metadatas.end(); ++t) {
         String path = zookeeper_path + "/snapshots/" + *latest_snapshot + "/" + *t;
 
-        executeFromZK(path, false);
+        String query_to_execute = current_zookeeper->get(path, {}, NULL);
+
+        current_context = std::make_unique<Context>(global_context);
+        current_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
+        current_context->setCurrentDatabase(database_name);
+        current_context->setCurrentQueryId(""); // generate random query_id
+
+        executeQuery(query_to_execute, *current_context);
     }
 
     last_executed_log_entry = *latest_snapshot;
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 2aa6c0d9a68..0f448b8061c 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -3,6 +3,8 @@
 #include <Databases/DatabaseAtomic.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Core/BackgroundSchedulePool.h>
+#include <DataStreams/BlockIO.h>
+#include <DataStreams/OneBlockInputStream.h>
 
 
 namespace DB
@@ -42,6 +44,8 @@ public:
 
     void propose(const ASTPtr & query) override;
 
+    BlockIO getFeedback();
+
     String zookeeper_path;
     String replica_name;
 
@@ -49,9 +53,7 @@ private:
     void createDatabaseZKNodes();
 
     void runBackgroundLogExecutor();
-    
-    void executeFromZK(String & path, bool yield);
-
+    void executeLogName(const String &);
     void writeLastExecutedToDiskAndZK();
 
     void loadMetadataFromSnapshot();
@@ -60,11 +62,11 @@ private:
 
     std::unique_ptr<Context> current_context; // to run executeQuery
 
-    //BlockIO execution_result;
     std::mutex log_name_mutex;
     String log_name_to_exec_with_result;
 
     int snapshot_period;
+    int feedback_timeout;
 
     String last_executed_log_entry = "";
 
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index 6b4bcdde067..96f3628b637 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -16,6 +16,7 @@
 #include <boost/range/algorithm_ext/push_back.hpp>
 #include <algorithm>
 #include <Databases/IDatabase.h>
+#include <Databases/DatabaseReplicated.h>
 #include <Databases/DatabaseFactory.h>
 
 
@@ -52,7 +53,8 @@ BlockIO InterpreterAlterQuery::execute()
     DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
     if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY && !table->supportsReplication()) {
         database->propose(query_ptr);
-        return {};
+        auto * database_replicated = typeid_cast<DatabaseReplicated *>(database.get());
+        return database_replicated->getFeedback();
     }
 
     /// Add default database to table identifiers that we can encounter in e.g. default expressions,
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 9d3abf2c8a6..0c312cfc863 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -46,6 +46,7 @@
 #include <DataTypes/DataTypeNullable.h>
 
 #include <Databases/DatabaseFactory.h>
+#include <Databases/DatabaseReplicated.h>
 #include <Databases/IDatabase.h>
 
 #include <Dictionaries/getDictionaryConfigurationFromAST.h>
@@ -571,12 +572,12 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
             ErrorCodes::BAD_DATABASE_FOR_TEMPORARY_TABLE);
 
     String current_database = context.getCurrentDatabase();
+    auto database_name = create.database.empty() ? current_database : create.database;
+    auto database = DatabaseCatalog::instance().getDatabase(database_name);
 
     // If this is a stub ATTACH query, read the query definition from the database
     if (create.attach && !create.storage && !create.columns_list)
     {
-        auto database_name = create.database.empty() ? current_database : create.database;
-        auto database = DatabaseCatalog::instance().getDatabase(database_name);
         bool if_not_exists = create.if_not_exists;
 
         // Table SQL definition is available even if the table is detached
@@ -611,6 +612,12 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
 
     /// Actually creates table
     bool created = doCreateTable(create, properties);
+
+    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
+        auto * database_replicated = typeid_cast<DatabaseReplicated *>(database.get());
+        return database_replicated->getFeedback();
+    }
+
     if (!created)   /// Table already exists
         return {};
 
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index 8eef9059f69..d5ac832e46c 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -12,6 +12,7 @@
 #include <Common/quoteString.h>
 #include <Common/typeid_cast.h>
 #include <Databases/DatabaseAtomic.h>
+#include <Databases/DatabaseReplicated.h>
 
 
 namespace DB
@@ -137,6 +138,11 @@ BlockIO InterpreterDropQuery::executeToTable(
         }
     }
 
+    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
+        auto * database_replicated = typeid_cast<DatabaseReplicated *>(database.get());
+        return database_replicated->getFeedback();
+    }
+
     return {};
 }
 
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index 97206f6b364..b950edac5bc 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -6,6 +6,7 @@
 #include <Interpreters/DDLWorker.h>
 #include <Access/AccessRightsElement.h>
 #include <Common/typeid_cast.h>
+#include <Databases/DatabaseReplicated.h>
 
 
 namespace DB
@@ -91,7 +92,14 @@ BlockIO InterpreterRenameQuery::execute()
                 elem.to_table_name,
                 rename.exchange);
         }
+
+        // TODO it can't work
+        if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
+            auto * database_replicated = typeid_cast<DatabaseReplicated *>(database.get());
+            return database_replicated->getFeedback();
+        }
     }
+
     return {};
 }
 

From 64d130f8a2b8614d78177c5a9381489a915814b5 Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 14 Sep 2020 15:39:33 -0400
Subject: [PATCH 0048/1238] Adding support for `[PERIODIC] REFRESH [value_sec]`
 clause when creating LIVE VIEW tables.

---
 src/Core/Defines.h                            |   1 +
 src/Core/Settings.h                           |   1 +
 src/Parsers/ASTCreateQuery.cpp                |  21 ++-
 src/Parsers/ASTCreateQuery.h                  |   2 +
 src/Parsers/ParserCreateQuery.cpp             |  38 ++++-
 .../LiveView/LiveViewBlockOutputStream.h      |  10 ++
 src/Storages/LiveView/StorageLiveView.cpp     | 133 +++++++++++++-----
 src/Storages/LiveView/StorageLiveView.h       |  36 ++++-
 8 files changed, 196 insertions(+), 46 deletions(-)

diff --git a/src/Core/Defines.h b/src/Core/Defines.h
index e244581c339..8920d44fdb4 100644
--- a/src/Core/Defines.h
+++ b/src/Core/Defines.h
@@ -36,6 +36,7 @@
 #define DEFAULT_MERGE_BLOCK_SIZE 8192
 
 #define DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC 5
+#define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60
 #define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160)
 #define DEFAULT_LIVE_VIEW_HEARTBEAT_INTERVAL_SEC 15
 #define DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE 1024
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index b39c223a5e9..f7ecab5fecb 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -369,6 +369,7 @@ class IColumn;
     M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
     M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
     M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \
+    M(Seconds, periodic_live_view_refresh, DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC, "Interval after which periodically refreshed live view is forced to refresh.", 0) \
     M(Bool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \
     M(Bool, allow_nondeterministic_mutations, false, "Allow non-deterministic functions in ALTER UPDATE/ALTER DELETE statements", 0) \
     M(Seconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "How long locking request should wait before failing", 0) \
diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp
index 73903e28f84..9b6c62b026f 100644
--- a/src/Parsers/ASTCreateQuery.cpp
+++ b/src/Parsers/ASTCreateQuery.cpp
@@ -248,9 +248,24 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
         if (uuid != UUIDHelpers::Nil)
             settings.ostr << (settings.hilite ? hilite_keyword : "") << " UUID " << (settings.hilite ? hilite_none : "")
                           << quoteString(toString(uuid));
-        if (live_view_timeout)
-            settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH TIMEOUT " << (settings.hilite ? hilite_none : "")
-                          << *live_view_timeout;
+
+        if (live_view_timeout || live_view_periodic_refresh)
+        {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH";
+
+            if (live_view_timeout)
+                settings.ostr << " TIMEOUT " << (settings.hilite ? hilite_none : "") << *live_view_timeout;
+
+            if (live_view_periodic_refresh)
+            {
+                if (live_view_timeout)
+                    settings.ostr << (settings.hilite ? hilite_keyword : "") << " AND" << (settings.hilite ? hilite_none : "");
+
+                settings.ostr << (settings.hilite ? hilite_keyword : "") << " PERIODIC REFRESH " << (settings.hilite ? hilite_none : "")
+                    << *live_view_periodic_refresh;
+            }
+        }
+
         formatOnCluster(settings);
     }
     else
diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h
index 5d69d86bd61..a75df184842 100644
--- a/src/Parsers/ASTCreateQuery.h
+++ b/src/Parsers/ASTCreateQuery.h
@@ -76,6 +76,8 @@ public:
     ASTDictionary * dictionary = nullptr; /// dictionary definition (layout, primary key, etc.)
 
     std::optional<UInt64> live_view_timeout;    /// For CREATE LIVE VIEW ... WITH TIMEOUT ...
+    std::optional<UInt64> live_view_periodic_refresh;    /// For CREATE LIVE VIEW ... WITH [PERIODIC] REFRESH ...
+
     bool attach_short_syntax{false};
 
     /** Get the text that identifies this element. */
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index 55208ca4133..a0bddabcc1d 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -496,10 +496,14 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
     ASTPtr as_table;
     ASTPtr select;
     ASTPtr live_view_timeout;
+    ASTPtr live_view_periodic_refresh;
 
     String cluster_str;
     bool attach = false;
     bool if_not_exists = false;
+    bool with_and = false;
+    bool with_timeout = false;
+    bool with_periodic_refresh = false;
 
     if (!s_create.ignore(pos, expected))
     {
@@ -521,10 +525,35 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
     if (!table_name_p.parse(pos, table, expected))
         return false;
 
-    if (ParserKeyword{"WITH TIMEOUT"}.ignore(pos, expected))
+    if (ParserKeyword{"WITH"}.ignore(pos, expected))
     {
-        if (!ParserNumber{}.parse(pos, live_view_timeout, expected))
-            live_view_timeout = std::make_shared<ASTLiteral>(static_cast<UInt64>(DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC));
+        if (ParserKeyword{"TIMEOUT"}.ignore(pos, expected))
+        {
+            if (!ParserNumber{}.parse(pos, live_view_timeout, expected))
+            {
+                live_view_timeout = std::make_shared<ASTLiteral>(static_cast<UInt64>(DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC));
+            }
+
+            /// Optional - AND
+            if (ParserKeyword{"AND"}.ignore(pos, expected))
+                with_and = true;
+
+            with_timeout = true;
+        }
+
+        if (ParserKeyword{"REFRESH"}.ignore(pos, expected) || ParserKeyword{"PERIODIC REFRESH"}.ignore(pos, expected))
+        {
+            if (!ParserNumber{}.parse(pos, live_view_periodic_refresh, expected))
+                live_view_periodic_refresh = std::make_shared<ASTLiteral>(static_cast<UInt64>(DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC));
+
+            with_periodic_refresh = true;
+        }
+
+        else if (with_and)
+            return false;
+
+        if (!with_timeout && !with_periodic_refresh)
+            return false;
     }
 
     if (ParserKeyword{"ON"}.ignore(pos, expected))
@@ -583,6 +612,9 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
     if (live_view_timeout)
         query->live_view_timeout.emplace(live_view_timeout->as<ASTLiteral &>().value.safeGet<UInt64>());
 
+    if (live_view_periodic_refresh)
+        query->live_view_periodic_refresh.emplace(live_view_periodic_refresh->as<ASTLiteral &>().value.safeGet<UInt64>());
+
     return true;
 }
 
diff --git a/src/Storages/LiveView/LiveViewBlockOutputStream.h b/src/Storages/LiveView/LiveViewBlockOutputStream.h
index 548bcf1b86a..5a1f75a8c2f 100644
--- a/src/Storages/LiveView/LiveViewBlockOutputStream.h
+++ b/src/Storages/LiveView/LiveViewBlockOutputStream.h
@@ -34,6 +34,7 @@ public:
         {
             new_blocks_metadata->hash = key_str;
             new_blocks_metadata->version = storage.getBlocksVersion() + 1;
+            new_blocks_metadata->time = std::chrono::system_clock::now();
 
             for (auto & block : *new_blocks)
             {
@@ -48,6 +49,15 @@ public:
 
             storage.condition.notify_all();
         }
+        else
+        {
+            // only update blocks time
+            new_blocks_metadata->hash = storage.getBlocksHashKey();
+            new_blocks_metadata->version = storage.getBlocksVersion();
+            new_blocks_metadata->time = std::chrono::system_clock::now();
+
+            (*storage.blocks_metadata_ptr) = new_blocks_metadata;
+        }
 
         new_blocks.reset();
         new_blocks_metadata.reset();
diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp
index 7095357a161..48dcab56f8c 100644
--- a/src/Storages/LiveView/StorageLiveView.cpp
+++ b/src/Storages/LiveView/StorageLiveView.cpp
@@ -21,6 +21,7 @@ limitations under the License. */
 #include <DataStreams/MaterializingBlockInputStream.h>
 #include <DataStreams/SquashingBlockInputStream.h>
 #include <DataStreams/copyData.h>
+#include <common/logger_useful.h>
 #include <Common/typeid_cast.h>
 #include <Common/SipHash.h>
 
@@ -254,6 +255,8 @@ StorageLiveView::StorageLiveView(
     live_view_context = std::make_unique<Context>(global_context);
     live_view_context->makeQueryContext();
 
+    log = &Poco::Logger::get("StorageLiveView (" + table_id_.database_name + "." + table_id_.table_name + ")");
+
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(columns_);
     setInMemoryMetadata(storage_metadata);
@@ -275,12 +278,21 @@ StorageLiveView::StorageLiveView(
     if (query.live_view_timeout)
     {
         is_temporary = true;
-        temporary_live_view_timeout = std::chrono::seconds{*query.live_view_timeout};
+        temporary_live_view_timeout = Seconds {*query.live_view_timeout};
+    }
+
+    if (query.live_view_periodic_refresh)
+    {
+        is_periodically_refreshed = true;
+        periodic_live_view_refresh = Seconds {*query.live_view_periodic_refresh};
     }
 
     blocks_ptr = std::make_shared<BlocksPtr>();
     blocks_metadata_ptr = std::make_shared<BlocksMetadataPtr>();
     active_ptr = std::make_shared<bool>(true);
+
+    periodic_refresh_task = global_context.getSchedulePool().createTask("LieViewPeriodicRefreshTask", [this]{ periodicRefreshTaskFunc(); });
+    periodic_refresh_task->deactivate();
 }
 
 Block StorageLiveView::getHeader() const
@@ -364,10 +376,20 @@ bool StorageLiveView::getNewBlocks()
             }
             new_blocks_metadata->hash = key.toHexString();
             new_blocks_metadata->version = getBlocksVersion() + 1;
+            new_blocks_metadata->time = std::chrono::system_clock::now();
+
             (*blocks_ptr) = new_blocks;
             (*blocks_metadata_ptr) = new_blocks_metadata;
+
             updated = true;
         }
+        else {
+            new_blocks_metadata->hash = getBlocksHashKey();
+            new_blocks_metadata->version = getBlocksVersion();
+            new_blocks_metadata->time = std::chrono::system_clock::now();
+
+            (*blocks_metadata_ptr) = new_blocks_metadata;
+        }
     }
     return updated;
 }
@@ -387,11 +409,18 @@ void StorageLiveView::startup()
 {
     if (is_temporary)
         TemporaryLiveViewCleaner::instance().addView(std::static_pointer_cast<StorageLiveView>(shared_from_this()));
+
+    if (is_periodically_refreshed)
+        periodic_refresh_task->activate();
 }
 
 void StorageLiveView::shutdown()
 {
     shutdown_called = true;
+
+    if (is_periodically_refreshed)
+        periodic_refresh_task->deactivate();
+
     DatabaseCatalog::instance().removeDependency(select_table_id, getStorageID());
 }
 
@@ -410,15 +439,55 @@ void StorageLiveView::drop()
     condition.notify_all();
 }
 
-void StorageLiveView::refresh()
+void StorageLiveView::scheduleNextPeriodicRefresh()
+{
+    Seconds current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
+    Seconds blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
+
+    if ( (current_time - periodic_live_view_refresh) >= blocks_time )
+    {
+        refresh(false);
+        blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
+    }
+    current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
+
+    auto next_refresh_time = blocks_time + periodic_live_view_refresh;
+
+    if (current_time >= next_refresh_time)
+        periodic_refresh_task->scheduleAfter(0);
+    else
+    {
+        auto schedule_time = std::chrono::duration_cast<MilliSeconds> (next_refresh_time - current_time);
+        periodic_refresh_task->scheduleAfter(static_cast<size_t>(schedule_time.count()));
+    }
+}
+
+void StorageLiveView::periodicRefreshTaskFunc()
+{
+    LOG_TRACE(log, "periodic refresh task");
+
+    std::lock_guard lock(mutex);
+
+    if (hasActiveUsers())
+        scheduleNextPeriodicRefresh();
+}
+
+void StorageLiveView::refresh(bool grab_lock)
 {
     // Lock is already acquired exclusively from InterperterAlterQuery.cpp InterpreterAlterQuery::execute() method.
     // So, reacquiring lock is not needed and will result in an exception.
+
+    if (grab_lock)
     {
         std::lock_guard lock(mutex);
         if (getNewBlocks())
             condition.notify_all();
     }
+    else
+    {
+        if (getNewBlocks())
+            condition.notify_all();
+    }
 }
 
 Pipe StorageLiveView::read(
@@ -430,15 +499,21 @@ Pipe StorageLiveView::read(
     const size_t /*max_block_size*/,
     const unsigned /*num_streams*/)
 {
+    std::lock_guard lock(mutex);
+
+    if (!(*blocks_ptr))
+        refresh(false);
+
+    else if (is_periodically_refreshed)
     {
-        std::lock_guard lock(mutex);
-        if (!(*blocks_ptr))
-        {
-            if (getNewBlocks())
-                condition.notify_all();
-        }
-        return Pipe(std::make_shared<BlocksSource>(blocks_ptr, getHeader()));
+        Seconds current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
+        Seconds blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
+
+        if ( (current_time - periodic_live_view_refresh) >= blocks_time )
+            refresh(false);
     }
+
+    return Pipe(std::make_shared<BlocksSource>(blocks_ptr, getHeader()));
 }
 
 BlockInputStreams StorageLiveView::watch(
@@ -453,6 +528,7 @@ BlockInputStreams StorageLiveView::watch(
 
     bool has_limit = false;
     UInt64 limit = 0;
+    BlockInputStreamPtr reader;
 
     if (query.limit_length)
     {
@@ -461,45 +537,28 @@ BlockInputStreams StorageLiveView::watch(
     }
 
     if (query.is_watch_events)
-    {
-        auto reader = std::make_shared<LiveViewEventsBlockInputStream>(
+        reader = std::make_shared<LiveViewEventsBlockInputStream>(
             std::static_pointer_cast<StorageLiveView>(shared_from_this()),
             blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit,
             context.getSettingsRef().live_view_heartbeat_interval.totalSeconds());
-
-        {
-            std::lock_guard lock(mutex);
-            if (!(*blocks_ptr))
-            {
-                if (getNewBlocks())
-                    condition.notify_all();
-            }
-        }
-
-        processed_stage = QueryProcessingStage::Complete;
-
-        return { reader };
-    }
     else
-    {
-        auto reader = std::make_shared<LiveViewBlockInputStream>(
+        reader = std::make_shared<LiveViewBlockInputStream>(
             std::static_pointer_cast<StorageLiveView>(shared_from_this()),
             blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit,
             context.getSettingsRef().live_view_heartbeat_interval.totalSeconds());
 
-        {
-            std::lock_guard lock(mutex);
-            if (!(*blocks_ptr))
-            {
-                if (getNewBlocks())
-                    condition.notify_all();
-            }
-        }
+    {
+        std::lock_guard lock(mutex);
 
-        processed_stage = QueryProcessingStage::Complete;
+        if (!(*blocks_ptr))
+            refresh(false);
 
-        return { reader };
+        if (is_periodically_refreshed)
+            scheduleNextPeriodicRefresh();
     }
+
+    processed_stage = QueryProcessingStage::Complete;
+    return { reader };
 }
 
 NamesAndTypesList StorageLiveView::getVirtuals() const
diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h
index fe4be6ee08e..4a219431c0d 100644
--- a/src/Storages/LiveView/StorageLiveView.h
+++ b/src/Storages/LiveView/StorageLiveView.h
@@ -13,6 +13,7 @@ limitations under the License. */
 
 #include <ext/shared_ptr_helper.h>
 #include <Storages/IStorage.h>
+#include <Core/BackgroundSchedulePool.h>
 
 #include <mutex>
 #include <condition_variable>
@@ -21,10 +22,16 @@ limitations under the License. */
 namespace DB
 {
 
+using Time = std::chrono::time_point<std::chrono::system_clock>;
+using Seconds = std::chrono::seconds;
+using MilliSeconds = std::chrono::milliseconds;
+
+
 struct BlocksMetadata
 {
     String hash;
     UInt64 version;
+    Time time;
 };
 
 struct MergeableBlocks
@@ -75,8 +82,10 @@ public:
     NamesAndTypesList getVirtuals() const override;
 
     bool isTemporary() const { return is_temporary; }
-    std::chrono::seconds getTimeout() const { return temporary_live_view_timeout; }
+    bool isPeriodicallyRefreshed() const { return is_periodically_refreshed; }
 
+    Seconds getTimeout() const { return temporary_live_view_timeout; }
+    Seconds getPeriodicRefresh() const { return periodic_live_view_refresh; }
 
     /// Check if we have any readers
     /// must be called with mutex locked
@@ -109,6 +118,15 @@ public:
         return 0;
     }
 
+    /// Get blocks time
+    /// must be called with mutex locked
+    Time getBlocksTime()
+    {
+        if (*blocks_metadata_ptr)
+            return (*blocks_metadata_ptr)->time;
+        return {};
+    }
+
     /// Reset blocks
     /// must be called with mutex locked
     void reset()
@@ -124,7 +142,7 @@ public:
     void startup() override;
     void shutdown() override;
 
-    void refresh();
+    void refresh(const bool grab_lock = true);
 
     Pipe read(
         const Names & column_names,
@@ -176,8 +194,13 @@ private:
     Context & global_context;
     std::unique_ptr<Context> live_view_context;
 
+    Poco::Logger * log;
+
     bool is_temporary = false;
-    std::chrono::seconds temporary_live_view_timeout;
+    bool is_periodically_refreshed = false;
+
+    Seconds temporary_live_view_timeout;
+    Seconds periodic_live_view_refresh;
 
     /// Mutex to protect access to sample block and inner_blocks_query
     mutable std::mutex sample_block_lock;
@@ -199,6 +222,13 @@ private:
 
     std::atomic<bool> shutdown_called = false;
 
+    /// Periodic refresh task used when [PERIODIC] REFRESH is specified in create statement
+    BackgroundSchedulePool::TaskHolder periodic_refresh_task;
+    void periodicRefreshTaskFunc();
+
+    /// Must be called with mutex locked
+    void scheduleNextPeriodicRefresh();
+
     StorageLiveView(
         const StorageID & table_id_,
         Context & local_context,

From 8ea4c2e26fbf51e8aa59f6ea3bf4e9b366182d67 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Thu, 24 Sep 2020 14:42:41 +0300
Subject: [PATCH 0049/1238] Fix TTL in cases, when its expression is a function
 and is the same as ORDER BY key

---
 src/DataStreams/TTLBlockInputStream.cpp       | 56 ++++++-------
 .../MergeTree/MergeTreeDataWriter.cpp         | 42 ++++------
 src/Storages/TTLDescription.cpp               | 49 ++----------
 .../01506_ttl_same_with_order_by.reference    |  4 +
 .../01506_ttl_same_with_order_by.sql          | 78 +++++++++++++++++++
 5 files changed, 128 insertions(+), 101 deletions(-)
 create mode 100644 tests/queries/0_stateless/01506_ttl_same_with_order_by.reference
 create mode 100644 tests/queries/0_stateless/01506_ttl_same_with_order_by.sql

diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp
index 85d9c7fead2..6dba8968f79 100644
--- a/src/DataStreams/TTLBlockInputStream.cpp
+++ b/src/DataStreams/TTLBlockInputStream.cpp
@@ -86,6 +86,7 @@ TTLBlockInputStream::TTLBlockInputStream(
             if (descr.arguments.empty())
                 for (const auto & name : descr.argument_names)
                     descr.arguments.push_back(header.getPositionByName(name));
+
         agg_aggregate_columns.resize(storage_rows_ttl.aggregate_descriptions.size());
 
         const Settings & settings = storage.global_context.getSettingsRef();
@@ -153,19 +154,26 @@ void TTLBlockInputStream::readSuffixImpl()
         LOG_INFO(log, "Removed {} rows with expired TTL from part {}", rows_removed, data_part->name);
 }
 
+static ColumnPtr extractRequieredColumn(const ExpressionActions & expression, const Block & block, const String & result_column)
+{
+    if (block.has(result_column))
+        return block.getByName(result_column).column;
+
+    Block block_copy;
+    for (const auto & column_name : expression.getRequiredColumns())
+        block_copy.insert(block.getByName(column_name));
+
+    expression.execute(block_copy);
+    return block_copy.getByName(result_column).column;
+}
+
 void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
 {
     auto rows_ttl = metadata_snapshot->getRowsTTL();
+    auto ttl_column = extractRequieredColumn(*rows_ttl.expression, block, rows_ttl.result_column);
 
-    rows_ttl.expression->execute(block);
-    if (rows_ttl.where_expression)
-        rows_ttl.where_expression->execute(block);
-
-    const IColumn * ttl_column =
-        block.getByName(rows_ttl.result_column).column.get();
-
-    const IColumn * where_result_column = rows_ttl.where_expression ?
-        block.getByName(rows_ttl.where_result_column).column.get() : nullptr;
+    auto where_result_column = rows_ttl.where_expression ?
+        extractRequieredColumn(*rows_ttl.where_expression, block, rows_ttl.where_result_column) : nullptr;
 
     const auto & column_names = header.getNames();
 
@@ -181,7 +189,7 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
 
             for (size_t i = 0; i < block.rows(); ++i)
             {
-                UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
+                UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
                 bool where_filter_passed = !where_result_column || where_result_column->getBool(i);
                 if (!isTTLExpired(cur_ttl) || !where_filter_passed)
                 {
@@ -206,7 +214,7 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
         auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
         for (size_t i = 0; i < block.rows(); ++i)
         {
-            UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
+            UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
             bool where_filter_passed = !where_result_column || where_result_column->getBool(i);
             bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed;
 
@@ -221,6 +229,7 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
                     same_as_current = false;
                 }
             }
+
             if (!same_as_current)
             {
                 if (rows_with_current_key)
@@ -311,7 +320,6 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
         defaults_expression->execute(block_with_defaults);
     }
 
-    std::vector<String> columns_to_remove;
     for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
     {
         /// If we read not all table columns. E.g. while mutation.
@@ -329,11 +337,7 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
         if (isTTLExpired(old_ttl_info.max))
             continue;
 
-        if (!block.has(ttl_entry.result_column))
-        {
-            columns_to_remove.push_back(ttl_entry.result_column);
-            ttl_entry.expression->execute(block);
-        }
+        auto ttl_column = extractRequieredColumn(*ttl_entry.expression, block, ttl_entry.result_column);
 
         ColumnPtr default_column = nullptr;
         if (block_with_defaults.has(name))
@@ -344,11 +348,9 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
         MutableColumnPtr result_column = values_column->cloneEmpty();
         result_column->reserve(block.rows());
 
-        const IColumn * ttl_column = block.getByName(ttl_entry.result_column).column.get();
-
         for (size_t i = 0; i < block.rows(); ++i)
         {
-            UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
+            UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
             if (isTTLExpired(cur_ttl))
             {
                 if (default_column)
@@ -365,34 +367,24 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
         }
         column_with_type.column = std::move(result_column);
     }
-
-    for (const String & column : columns_to_remove)
-        block.erase(column);
 }
 
 void TTLBlockInputStream::updateTTLWithDescriptions(Block & block, const TTLDescriptions & descriptions, TTLInfoMap & ttl_info_map)
 {
-    std::vector<String> columns_to_remove;
     for (const auto & ttl_entry : descriptions)
     {
         auto & new_ttl_info = ttl_info_map[ttl_entry.result_column];
         if (!block.has(ttl_entry.result_column))
-        {
-            columns_to_remove.push_back(ttl_entry.result_column);
             ttl_entry.expression->execute(block);
-        }
 
-        const IColumn * ttl_column = block.getByName(ttl_entry.result_column).column.get();
+        auto ttl_column = extractRequieredColumn(*ttl_entry.expression, block, ttl_entry.result_column);
 
         for (size_t i = 0; i < block.rows(); ++i)
         {
-            UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
+            UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
             new_ttl_info.update(cur_ttl);
         }
     }
-
-    for (const String & column : columns_to_remove)
-        block.erase(column);
 }
 
 void TTLBlockInputStream::updateMovesTTL(Block & block)
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 739aff31a06..d5a2bfe280e 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -84,19 +84,14 @@ void updateTTL(
     const TTLDescription & ttl_entry,
     IMergeTreeDataPart::TTLInfos & ttl_infos,
     DB::MergeTreeDataPartTTLInfo & ttl_info,
-    Block & block,
+    const Block & block,
     bool update_part_min_max_ttls)
 {
-    bool remove_column = false;
-    if (!block.has(ttl_entry.result_column))
-    {
-        ttl_entry.expression->execute(block);
-        remove_column = true;
-    }
+    Block block_copy = block;
+    if (!block_copy.has(ttl_entry.result_column))
+        ttl_entry.expression->execute(block_copy);
 
-    const auto & current = block.getByName(ttl_entry.result_column);
-
-    const IColumn * column = current.column.get();
+    const IColumn * column = block_copy.getByName(ttl_entry.result_column).column.get();
     if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
     {
         const auto & date_lut = DateLUT::instance();
@@ -127,9 +122,6 @@ void updateTTL(
 
     if (update_part_min_max_ttls)
         ttl_infos.updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
-
-    if (remove_column)
-        block.erase(ttl_entry.result_column);
 }
 
 }
@@ -271,6 +263,18 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
             sync_guard.emplace(disk, full_path);
     }
 
+    if (metadata_snapshot->hasRowsTTL())
+        updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true);
+
+    for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
+        updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);
+
+    const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
+    for (const auto & ttl_entry : recompression_ttl_entries)
+        updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.recompression_ttl[ttl_entry.result_column], block, false);
+
+    new_data_part->ttl_infos.update(move_ttl_infos);
+
     /// If we need to calculate some columns to sort.
     if (metadata_snapshot->hasSortingKey() || metadata_snapshot->hasSecondaryIndices())
         data.getSortingKeyAndSkipIndicesExpression(metadata_snapshot)->execute(block);
@@ -299,18 +303,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
             ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted);
     }
 
-    if (metadata_snapshot->hasRowsTTL())
-        updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true);
-
-    for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
-        updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);
-
-    const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
-    for (const auto & ttl_entry : recompression_ttl_entries)
-        updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.recompression_ttl[ttl_entry.result_column], block, false);
-
-    new_data_part->ttl_infos.update(move_ttl_infos);
-
     /// This effectively chooses minimal compression method:
     ///  either default lz4 or compression method with zero thresholds on absolute and relative part size.
     auto compression_codec = data.global_context.chooseCompressionCodec(0, 0);
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index 7f55badf819..7499f1de292 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -10,6 +10,8 @@
 #include <Storages/ColumnsDescription.h>
 #include <Interpreters/Context.h>
 
+#include <Parsers/queryToString.h>
+
 
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
@@ -196,59 +198,20 @@ TTLDescription TTLDescription::getTTLFromAST(
                         ErrorCodes::BAD_TTL_EXPRESSION);
             }
 
-            for (const auto & [name, value] : ttl_element->group_by_aggregations)
-            {
-                if (primary_key_columns_set.count(name))
-                    throw Exception(
-                        "Can not set custom aggregation for column in primary key in TTL Expression",
-                        ErrorCodes::BAD_TTL_EXPRESSION);
-
+            for (const auto & [name, _] : ttl_element->group_by_aggregations)
                 aggregation_columns_set.insert(name);
-            }
 
             if (aggregation_columns_set.size() != ttl_element->group_by_aggregations.size())
                 throw Exception(
                     "Multiple aggregations set for one column in TTL Expression",
                     ErrorCodes::BAD_TTL_EXPRESSION);
 
-
             result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size());
-
             auto aggregations = ttl_element->group_by_aggregations;
 
-            for (size_t i = 0; i < pk_columns.size(); ++i)
+            for (const auto & column : columns.getOrdinary())
             {
-                ASTPtr value = primary_key.expression_list_ast->children[i]->clone();
-
-                if (i >= ttl_element->group_by_key.size())
-                {
-                    ASTPtr value_max = makeASTFunction("max", value->clone());
-                    aggregations.emplace_back(value->getColumnName(), std::move(value_max));
-                }
-
-                if (value->as<ASTFunction>())
-                {
-                    auto syntax_result = TreeRewriter(context).analyze(value, columns.getAllPhysical(), {}, {}, true);
-                    auto expr_actions = ExpressionAnalyzer(value, syntax_result, context).getActions(false);
-                    for (const auto & column : expr_actions->getRequiredColumns())
-                    {
-                        if (i < ttl_element->group_by_key.size())
-                        {
-                            ASTPtr expr = makeASTFunction("any", std::make_shared<ASTIdentifier>(column));
-                            aggregations.emplace_back(column, std::move(expr));
-                        }
-                        else
-                        {
-                            ASTPtr expr = makeASTFunction("argMax", std::make_shared<ASTIdentifier>(column), value->clone());
-                            aggregations.emplace_back(column, std::move(expr));
-                        }
-                    }
-                }
-            }
-
-            for (const auto & column : columns.getAllPhysical())
-            {
-                if (!primary_key_columns_set.count(column.name) && !aggregation_columns_set.count(column.name))
+                if (!aggregation_columns_set.count(column.name))
                 {
                     ASTPtr expr = makeASTFunction("any", std::make_shared<ASTIdentifier>(column.name));
                     aggregations.emplace_back(column.name, std::move(expr));
@@ -280,8 +243,6 @@ TTLDescription TTLDescription::getTTLFromAST(
     }
 
     checkTTLExpression(result.expression, result.result_column);
-
-
     return result;
 }
 
diff --git a/tests/queries/0_stateless/01506_ttl_same_with_order_by.reference b/tests/queries/0_stateless/01506_ttl_same_with_order_by.reference
new file mode 100644
index 00000000000..f8f36434a82
--- /dev/null
+++ b/tests/queries/0_stateless/01506_ttl_same_with_order_by.reference
@@ -0,0 +1,4 @@
+2020-01-01 00:00:00	3
+2020-01-01 00:00:00	2020-01-01 00:00:00	111
+1
+0
diff --git a/tests/queries/0_stateless/01506_ttl_same_with_order_by.sql b/tests/queries/0_stateless/01506_ttl_same_with_order_by.sql
new file mode 100644
index 00000000000..7a0fb86330b
--- /dev/null
+++ b/tests/queries/0_stateless/01506_ttl_same_with_order_by.sql
@@ -0,0 +1,78 @@
+DROP TABLE IF EXISTS derived_metrics_local;
+
+CREATE TABLE derived_metrics_local
+(
+  timestamp DateTime,
+  bytes UInt64
+)
+ENGINE=SummingMergeTree()
+PARTITION BY toYYYYMMDD(timestamp)
+ORDER BY (toStartOfHour(timestamp), timestamp)
+TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR GROUP BY toStartOfHour(timestamp)
+SET bytes=max(bytes);
+
+INSERT INTO derived_metrics_local values('2020-01-01 00:00:00', 1);
+INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 3);
+INSERT INTO derived_metrics_local values('2020-01-01 00:02:00', 2);
+
+OPTIMIZE TABLE derived_metrics_local FINAL;
+SELECT * FROM derived_metrics_local;
+
+DROP TABLE derived_metrics_local;
+
+CREATE TABLE derived_metrics_local 
+(
+  timestamp DateTime,
+  timestamp_h DateTime materialized toStartOfHour(timestamp),
+  bytes UInt64
+)
+ENGINE=SummingMergeTree()
+PARTITION BY toYYYYMMDD(timestamp)
+ORDER BY (timestamp_h, timestamp)
+TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR GROUP BY timestamp_h
+SET bytes=max(bytes), timestamp = toStartOfHour(any(timestamp));
+
+INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 111);
+INSERT INTO derived_metrics_local values('2020-01-01 00:19:22', 22);
+INSERT INTO derived_metrics_local values('2020-01-01 00:59:02', 1);
+
+OPTIMIZE TABLE derived_metrics_local FINAL;
+SELECT timestamp, timestamp_h, bytes FROM derived_metrics_local;
+
+DROP TABLE IF EXISTS derived_metrics_local;
+
+CREATE TABLE derived_metrics_local
+(
+  timestamp DateTime,
+  bytes UInt64 TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR
+)
+ENGINE=MergeTree()
+ORDER BY (toStartOfHour(timestamp), timestamp)
+SETTINGS min_bytes_for_wide_part = 0;
+
+INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 111) ('2020-01-01 00:19:22', 22) ('2100-01-01 00:19:22', 1);
+
+OPTIMIZE TABLE derived_metrics_local FINAL;
+SELECT sum(bytes) FROM derived_metrics_local;
+
+DROP TABLE IF EXISTS derived_metrics_local;
+
+CREATE TABLE derived_metrics_local
+(
+  timestamp DateTime,
+  bytes UInt64
+)
+ENGINE=MergeTree()
+PARTITION BY toYYYYMMDD(timestamp)
+ORDER BY (toStartOfHour(timestamp), timestamp)
+TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR
+SETTINGS min_bytes_for_wide_part = 0;
+
+INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 111);
+INSERT INTO derived_metrics_local values('2020-01-01 00:19:22', 22);
+INSERT INTO derived_metrics_local values('2020-01-01 00:59:02', 1);
+
+OPTIMIZE TABLE derived_metrics_local FINAL;
+SELECT count() FROM derived_metrics_local;
+
+DROP TABLE IF EXISTS derived_metrics_local;

From 534e199c43651507216f912f86dbc59510edcc6e Mon Sep 17 00:00:00 2001
From: Daria Mozhaeva <dmozhaeva@yandex-team.ru>
Date: Wed, 30 Sep 2020 11:32:57 +0400
Subject: [PATCH 0050/1238] Edit and translate to Russian.

---
 .../settings.md                               |   8 +-
 docs/en/operations/settings/settings.md       |  98 +++----
 docs/en/sql-reference/statements/system.md    |   6 +-
 .../settings.md                               |   6 +-
 docs/ru/operations/settings/settings.md       | 242 +++++++++++++-----
 5 files changed, 235 insertions(+), 125 deletions(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index c1ac1d0d92d..d89f74f6bdc 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -351,15 +351,15 @@ Keys for syslog:
 ## send\_crash\_reports {#server_configuration_parameters-logger}
 
 Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io).
-Enabling it, especially in pre-production environments, is greatly appreciated.
+Enabling it, especially in pre-production environments, is highly appreciated.
 
-The server will need an access to public Internet via IPv4 (at the time of writing IPv6 is not supported by Sentry) for this feature to be functioning properly.
+The server will need access to the public Internet via IPv4 (at the time of writing IPv6 is not supported by Sentry) for this feature to be functioning properly.
 
 Keys:
 
 -   `enabled` – Boolean flag to enable the feature, `false` by default. Set to `true` to allow sending crash reports. 
--   `endpoint` – You can override the Sentry endpoint URL for sending crash reports. It can be either separate Sentry account or your self-hosted Sentry instance. Use the [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk) syntax. 
--   `anonymize` - Avoid attaching the server hostname to crash report.
+-   `endpoint` – You can override the Sentry endpoint URL for sending crash reports. It can be either a separate Sentry account or your self-hosted Sentry instance. Use the [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk) syntax. 
+-   `anonymize` - Avoid attaching the server hostname to the crash report.
 -   `http_proxy` - Configure HTTP proxy for sending crash reports.
 -   `debug` - Sets the Sentry client into debug mode.
 -   `tmp_path` - Filesystem path for temporary crash report state.
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 4995c04f712..ee7eb1fd6be 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2,7 +2,7 @@
 
 ## distributed\_product\_mode {#distributed-product-mode}
 
-Changes the behavior of [distributed subqueries](../../sql-reference/operators/in.md).
+Changes the behaviour of [distributed subqueries](../../sql-reference/operators/in.md).
 
 ClickHouse applies this setting when the query contains the product of distributed tables, i.e. when the query for a distributed table contains a non-GLOBAL subquery for the distributed table.
 
@@ -42,7 +42,7 @@ Consider the following queries:
 
 If `enable_optimize_predicate_expression = 1`, then the execution time of these queries is equal because ClickHouse applies `WHERE` to the subquery when processing it.
 
-If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer, because the `WHERE` clause applies to all the data after the subquery finishes.
+If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer because the `WHERE` clause applies to all the data after the subquery finishes.
 
 ## fallback\_to\_stale\_replicas\_for\_distributed\_queries {#settings-fallback_to_stale_replicas_for_distributed_queries}
 
@@ -215,7 +215,7 @@ Ok.
 
 ## input\_format\_values\_deduce\_templates\_of\_expressions {#settings-input_format_values_deduce_templates_of_expressions}
 
-Enables or disables template deduction for SQL expressions in [Values](../../interfaces/formats.md#data-format-values) format. It allows parsing and interpreting expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse tries to deduce template of an expression, parse the following rows using this template and evaluate the expression on a batch of successfully parsed rows.
+Enables or disables template deduction for SQL expressions in [Values](../../interfaces/formats.md#data-format-values) format. It allows parsing and interpreting expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse tries to deduce the template of an expression, parse the following rows using this template and evaluate the expression on a batch of successfully parsed rows.
 
 Possible values:
 
@@ -236,7 +236,7 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (
 
 ## input\_format\_values\_accurate\_types\_of\_literals {#settings-input-format-values-accurate-types-of-literals}
 
-This setting is used only when `input_format_values_deduce_templates_of_expressions = 1`. It can happen, that expressions for some column have the same structure, but contain numeric literals of different types, e.g.
+This setting is used only when `input_format_values_deduce_templates_of_expressions = 1`. Expressions for some column may have the same structure, but contain numeric literals of different types, e.g.
 
 ``` sql
 (..., abs(0), ...),             -- UInt64 literal
@@ -278,7 +278,7 @@ Disabled by default.
 
 ## input\_format\_null\_as\_default {#settings-input-format-null-as-default}
 
-Enables or disables using default values if input data contain `NULL`, but data type of the corresponding column in not `Nullable(T)` (for text input formats).
+Enables or disables using default values if input data contain `NULL`, but the data type of the corresponding column in not `Nullable(T)` (for text input formats).
 
 ## input\_format\_skip\_unknown\_fields {#settings-input-format-skip-unknown-fields}
 
@@ -395,7 +395,7 @@ See also:
 
 ## join\_use\_nulls {#join_use_nulls}
 
-Sets the type of [JOIN](../../sql-reference/statements/select/join.md) behavior. When merging tables, empty cells may appear. ClickHouse fills them differently based on this setting.
+Sets the type of [JOIN](../../sql-reference/statements/select/join.md) behaviour. When merging tables, empty cells may appear. ClickHouse fills them differently based on this setting.
 
 Possible values:
 
@@ -424,8 +424,8 @@ Limits sizes of right-hand join data blocks in partial merge join algorithm for
 ClickHouse server:
 
 1.  Splits right-hand join data into blocks with up to the specified number of rows.
-2.  Indexes each block with their minimum and maximum values
-3.  Unloads prepared blocks to disk if possible.
+2.  Indexes each block with its minimum and maximum values.
+3.  Unloads prepared blocks to disk if it is possible.
 
 Possible values:
 
@@ -447,25 +447,25 @@ Default value: 64.
 
 ## any\_join\_distinct\_right\_table\_keys {#any_join_distinct_right_table_keys}
 
-Enables legacy ClickHouse server behavior in `ANY INNER|LEFT JOIN` operations.
+Enables legacy ClickHouse server behaviour in `ANY INNER|LEFT JOIN` operations.
 
 !!! note "Warning"
-    Use this setting only for the purpose of backward compatibility if your use cases depend on legacy `JOIN` behavior.
+    Use this setting only for backward compatibility if your use cases depend on legacy `JOIN` behaviour.
 
-When the legacy behavior enabled:
+When the legacy behaviour enabled:
 
 -   Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping.
 -   Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do.
 
-When the legacy behavior disabled:
+When the legacy behaviour disabled:
 
 -   Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations.
--   Results of `ANY INNER JOIN` operations contain one row per key from both left and right tables.
+-   Results of `ANY INNER JOIN` operations contain one row per key from both the left and right tables.
 
 Possible values:
 
--   0 — Legacy behavior is disabled.
--   1 — Legacy behavior is enabled.
+-   0 — Legacy behaviour is disabled.
+-   1 — Legacy behaviour is enabled.
 
 Default value: 0.
 
@@ -634,7 +634,7 @@ Possible values:
 
 Default value: `QUERY_START`.
 
-Can be used to limit which entiries will goes to `query_log`, say you are interesting only in errors, then you can use `EXCEPTION_WHILE_PROCESSING`:
+Can be used to limit which entities will go to `query_log`, say you are interested only in errors, then you can use `EXCEPTION_WHILE_PROCESSING`:
 
 ``` text
 log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
@@ -662,11 +662,11 @@ The setting also doesn’t have a purpose when using INSERT SELECT, since data i
 
 Default value: 1,048,576.
 
-The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion and a large enough block size allow sorting more data in RAM.
+The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allow sorting more data in RAM.
 
 ## min\_insert\_block\_size\_rows {#min-insert-block-size-rows}
 
-Sets minimum number of rows in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones.
+Sets the minimum number of rows in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones.
 
 Possible values:
 
@@ -677,7 +677,7 @@ Default value: 1048576.
 
 ## min\_insert\_block\_size\_bytes {#min-insert-block-size-bytes}
 
-Sets minimum number of bytes in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones.
+Sets the minimum number of bytes in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones.
 
 Possible values:
 
@@ -754,7 +754,7 @@ Default value: 256 KiB.
 
 ## max\_parser\_depth {#max_parser_depth}
 
-Limits maximum recursion depth in the recursive descent parser. Allows to control stack size.
+Limits maximum recursion depth in the recursive descent parser. Allows controlling the stack size.
 
 Possible values:
 
@@ -865,12 +865,12 @@ Yandex.Metrica uses this parameter set to 1 for implementing suggestions for seg
 
 ## replace\_running\_query\_max\_wait\_ms {#replace-running-query-max-wait-ms}
 
-The wait time for running query with the same `query_id` to finish, when the [replace_running_query](#replace-running-query) setting is active.
+The wait time for running the query with the same `query_id` to finish, when the [replace_running_query](#replace-running-query) setting is active.
 
 Possible values:
 
 - Positive integer.
-- 0 — Throwing an exception that does not allow to run a new query if the server already executes a query with the  same `query_id`.
+- 0 — Throwing an exception that does not allow to run a new query if the server already executes a query with the same `query_id`.
 
 Default value: 5000.
 
@@ -946,7 +946,7 @@ The `first_or_random` algorithm solves the problem of the `in_order` algorithm.
 load_balancing = round_robin
 ```
 
-This algorithm uses round robin policy across replicas with the same number of errors (only the queries with `round_robin` policy is accounted).
+This algorithm uses a round-robin policy across replicas with the same number of errors (only the queries with `round_robin` policy is accounted).
 
 ## prefer\_localhost\_replica {#settings-prefer-localhost-replica}
 
@@ -983,7 +983,7 @@ Replica lag is not controlled.
 Enable compilation of queries. By default, 0 (disabled).
 
 The compilation is only used for part of the query-processing pipeline: for the first stage of aggregation (GROUP BY).
-If this portion of the pipeline was compiled, the query may run faster due to deployment of short cycles and inlining aggregate function calls. The maximum performance improvement (up to four times faster in rare cases) is seen for queries with multiple simple aggregate functions. Typically, the performance gain is insignificant. In very rare cases, it may slow down query execution.
+If this portion of the pipeline was compiled, the query may run faster due to the deployment of short cycles and inlining aggregate function calls. The maximum performance improvement (up to four times faster in rare cases) is seen for queries with multiple simple aggregate functions. Typically, the performance gain is insignificant. In very rare cases, it may slow down query execution.
 
 ## min\_count\_to\_compile {#min-count-to-compile}
 
@@ -1099,7 +1099,7 @@ When `output_format_json_quote_denormals = 1`, the query returns:
 
 ## format\_csv\_delimiter {#settings-format_csv_delimiter}
 
-The character interpreted as a delimiter in the CSV data. By default, the delimiter is `,`.
+The character is interpreted as a delimiter in the CSV data. By default, the delimiter is `,`.
 
 ## input\_format\_csv\_unquoted\_null\_literal\_as\_null {#settings-input_format_csv_unquoted_null_literal_as_null}
 
@@ -1142,7 +1142,7 @@ See also:
 
 ## insert\_quorum\_timeout {#settings-insert_quorum_timeout}
 
-Write to quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
+Write to a quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
 
 Default value: 60 seconds.
 
@@ -1198,8 +1198,8 @@ Default value: 0.
 Usage
 
 By default, deduplication is not performed for materialized views but is done upstream, in the source table.
-If an INSERTed block is skipped due to deduplication in the source table, there will be no insertion into attached materialized views. This behaviour exists to enable insertion of highly aggregated data into materialized views, for cases where inserted blocks are the same after materialized view aggregation but derived from different INSERTs into the source table.
-At the same time, this behaviour “breaks” `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won’t receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows for changing this behaviour. On retry, a materialized view will receive the repeat insert and will perform deduplication check by itself,
+If an INSERTed block is skipped due to deduplication in the source table, there will be no insertion into attached materialized views. This behaviour exists to enable the insertion of highly aggregated data into materialized views, for cases where inserted blocks are the same after materialized view aggregation but derived from different INSERTs into the source table.
+At the same time, this behaviour “breaks” `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won’t receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows for changing this behaviour. On retry, a materialized view will receive the repeat insert and will perform a deduplication check by itself,
 ignoring check result for the source table, and will insert rows lost because of the first failure.
 
 ## max\_network\_bytes {#settings-max-network-bytes}
@@ -1355,7 +1355,7 @@ Default value: 0.
 -   Type: seconds
 -   Default value: 60 seconds
 
-Controls how fast errors in distributed tables are zeroed. If a replica is unavailable for some time, accumulates 5 errors, and distributed\_replica\_error\_half\_life is set to 1 second, then the replica is considered normal 3 seconds after last error.
+Controls how fast errors in distributed tables are zeroed. If a replica is unavailable for some time, accumulates 5 errors, and distributed\_replica\_error\_half\_life is set to 1 second, then the replica is considered normal 3 seconds after the last error.
 
 See also:
 
@@ -1369,7 +1369,7 @@ See also:
 -   Type: unsigned int
 -   Default value: 1000
 
-Error count of each replica is capped at this value, preventing a single replica from accumulating too many errors.
+The error count of each replica is capped at this value, preventing a single replica from accumulating too many errors.
 
 See also:
 
@@ -1383,7 +1383,7 @@ See also:
 -   Type: unsigned int
 -   Default value: 0
 
-Number of errors that will be ignored while choosing replicas (according to `load_balancing` algorithm).
+The number of errors that will be ignored while choosing replicas (according to `load_balancing` algorithm).
 
 See also:
 
@@ -1414,7 +1414,7 @@ Default value: 30000 milliseconds (30 seconds).
 
 ## distributed\_directory\_monitor\_batch\_inserts {#distributed_directory_monitor_batch_inserts}
 
-Enables/disables sending of inserted data in batches.
+Enables/disables inserted data sending in batches.
 
 When batch sending is enabled, the [Distributed](../../engines/table-engines/special/distributed.md) table engine tries to send multiple files of inserted data in one operation instead of sending them separately. Batch sending improves cluster performance by better-utilizing server and network resources.
 
@@ -1507,7 +1507,7 @@ Default value: 0.
 -   Type: bool
 -   Default value: True
 
-Enable order-preserving parallel parsing of data formats. Supported only for TSV, TKSV, CSV and JSONEachRow formats.
+Enable order-preserving parallel parsing of data formats. Supported only for TSV, TKSV, CSV, and JSONEachRow formats.
 
 ## min\_chunk\_bytes\_for\_parallel\_parsing {#min-chunk-bytes-for-parallel-parsing}
 
@@ -1559,7 +1559,7 @@ Default value: 0.
 
 ## background\_pool\_size {#background_pool_size}
 
-Sets the number of threads performing background operations in table engines (for example, merges in [MergeTree engine](../../engines/table-engines/mergetree-family/index.md) tables). This setting is applied from `default` profile at ClickHouse server start and can’t be changed in a user session. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.
+Sets the number of threads performing background operations in table engines (for example, merges in [MergeTree engine](../../engines/table-engines/mergetree-family/index.md) tables). This setting is applied from the `default` profile at the ClickHouse server start and can’t be changed in a user session. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.
 
 Before changing it, please also take a look at related [MergeTree settings](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-merge_tree), such as `number_of_free_entries_in_pool_to_lower_max_size_of_merge` and `number_of_free_entries_in_pool_to_execute_mutation`.
 
@@ -1578,8 +1578,8 @@ If we execute `INSERT INTO distributed_table_a SELECT ... FROM distributed_table
 Possible values:
 
 -   0 — Disabled.
--   1 — `SELECT` will be executed on each shard from underlying table of the distributed engine.
--   2 — `SELECT` and `INSERT` will be executed on each shard from/to underlying table of the distributed engine.
+-   1 — `SELECT` will be executed on each shard from the underlying table of the distributed engine.
+-   2 — `SELECT` and `INSERT` will be executed on each shard from/to the underlying table of the distributed engine.
 
 Default value: 0.
 
@@ -1602,7 +1602,7 @@ Default value: `0`.
 -   [Managing Distributed Tables](../../sql-reference/statements/system.md#query-language-system-distributed)
 ## background\_buffer\_flush\_schedule\_pool\_size {#background_buffer_flush_schedule_pool_size}
 
-Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at ClickHouse server start and can’t be changed in a user session.
+Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session.
 
 Possible values:
 
@@ -1612,7 +1612,7 @@ Default value: 16.
 
 ## background\_move\_pool\_size {#background_move_pool_size}
 
-Sets the number of threads performing background moves of data parts for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)-engine tables. This setting is applied at ClickHouse server start and can’t be changed in a user session.
+Sets the number of threads performing background moves of data parts for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)-engine tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session.
 
 Possible values:
 
@@ -1634,7 +1634,7 @@ Default value: 16.
 
 Prohibits data parts merging in [Replicated\*MergeTree](../../engines/table-engines/mergetree-family/replication.md)-engine tables.
 
-When merging is prohibited, the replica never merges parts and always downloads merged parts from other replicas. If there is no required data yet, the replica waits for it. CPU and disk load on the replica server decreases, but the network load on cluster increases. This setting can be useful on servers with relatively weak CPUs or slow disks, such as servers for backups storage.
+When merging is prohibited, the replica never merges parts and always downloads merged parts from other replicas. If there is no required data yet, the replica waits for it. CPU and disk load on the replica server decreases, but the network load on the cluster increases. This setting can be useful on servers with relatively weak CPUs or slow disks, such as servers for backups storage.
 
 Possible values:
 
@@ -1649,7 +1649,7 @@ Default value: 0.
 
 ## background\_distributed\_schedule\_pool\_size {#background_distributed_schedule_pool_size}
 
-Sets the number of threads performing background tasks for [distributed](../../engines/table-engines/special/distributed.md) sends. This setting is applied at ClickHouse server start and can’t be changed in a user session.
+Sets the number of threads performing background tasks for [distributed](../../engines/table-engines/special/distributed.md) sends. This setting is applied at the ClickHouse server start and can’t be changed in a user session.
 
 Possible values:
 
@@ -1740,7 +1740,7 @@ Default value: 8192.
 
 Turns on or turns off using of single dictionary for the data part.
 
-By default, ClickHouse server monitors the size of dictionaries and if a dictionary overflows then the server starts to write the next one. To prohibit creating several dictionaries set `low_cardinality_use_single_dictionary_for_part = 1`.
+By default, the ClickHouse server monitors the size of dictionaries and if a dictionary overflows then the server starts to write the next one. To prohibit creating several dictionaries set `low_cardinality_use_single_dictionary_for_part = 1`.
 
 Possible values:
 
@@ -1785,7 +1785,7 @@ Default value: 0.
 
 ## min\_insert\_block\_size\_rows\_for\_materialized\_views {#min-insert-block-size-rows-for-materialized-views}
 
-Sets minimum number of rows in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage.
+Sets the minimum number of rows in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage.
 
 Possible values:
 
@@ -1800,7 +1800,7 @@ Default value: 1048576.
 
 ## min\_insert\_block\_size\_bytes\_for\_materialized\_views {#min-insert-block-size-bytes-for-materialized-views}
 
-Sets minimum number of bytes in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage.
+Sets the minimum number of bytes in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage.
 
 Possible values:
 
@@ -1815,7 +1815,7 @@ Default value: 268435456.
 
 ## output\_format\_pretty\_grid\_charset {#output-format-pretty-grid-charset}
 
-Allows to change a charset which is used for printing grids borders. Available charsets are following: UTF-8, ASCII.
+Allows changing a charset which is used for printing grids borders. Available charsets are UTF-8, ASCII.
 
 **Example**
 
@@ -1872,12 +1872,12 @@ When `ttl_only_drop_parts` is disabled (by default), the ClickHouse server only
 
 When `ttl_only_drop_parts` is enabled, the ClickHouse server drops a whole part when all rows in it are expired. 
 
-Dropping whole parts instead of partial cleaning TTL-d rows allows to have shorter `merge_with_ttl_timeout` times and lower impact on system performance.
+Dropping whole parts instead of partial cleaning TTL-d rows allows having shorter `merge_with_ttl_timeout` times and lower impact on system performance.
 
 Possible values:
 
--   0 — Complete dropping of data parts is disabled.
--   1 — Complete dropping of data parts is enabled.
+-   0 — The complete dropping of data parts is disabled.
+-   1 — The complete dropping of data parts is enabled.
 
 Default value: `0`.
 
@@ -1888,9 +1888,9 @@ Default value: `0`.
 
 ## lock_acquire_timeout {#lock_acquire_timeout}
 
-Defines how many seconds locking request waits before failing. 
+Defines how many seconds a locking request waits before failing. 
 
-Locking timeout is used to protect from deadlocks while executing read/write operations with tables. When timeout expires and locking request fails, the ClickHouse server throws an exeption "Locking attempt timed out! Possible deadlock avoided. Client should retry." with error code `DEADLOCK_AVOIDED`.
+Locking timeout is used to protect from deadlocks while executing read/write operations with tables. When the timeout expires and the locking request fails, the ClickHouse server throws an exception "Locking attempt timed out! Possible deadlock avoided. Client should retry." with error code `DEADLOCK_AVOIDED`.
 
 Possible values:
 
diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index f6ff264e827..a9f9b718de6 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -81,12 +81,12 @@ SYSTEM DROP REPLICA 'replica_name';
 SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';
 ```
 
-Queries will remove the replica path in ZooKeeper. It’s useful when replica is dead and its metadata cannot be removed from ZooKeeper by `DROP TABLE` because there is no such table anymore. It will only drop the inactive/stale replica, and it can’t drop local replica, please use `DROP TABLE` for that. `DROP REPLICA` does not drop any tables and does not remove any data or metadata from disk.
+Queries will remove the replica path in ZooKeeper. It is useful when the replica is dead and its metadata cannot be removed from ZooKeeper by `DROP TABLE` because there is no such table anymore. It will only drop the inactive/stale replica, and it cannot drop local replica, please use `DROP TABLE` for that. `DROP REPLICA` does not drop any tables and does not remove any data or metadata from disk.
 
 The first one removes metadata of `'replica_name'` replica of `database.table` table.
 The second one does the same for all replicated tables in the database.
-The third one does the same for all replicated tables on local server.
-The forth one is useful to remove metadata of dead replica when all other replicas of a table were dropped. It requires the table path to be specified explicitly. It must be the same path as was passed to the first argument of `ReplicatedMergeTree` engine on table creation.
+The third one does the same for all replicated tables on the local server.
+The fourth one is useful to remove metadata of dead replica when all other replicas of a table were dropped. It requires the table path to be specified explicitly. It must be the same path as was passed to the first argument of `ReplicatedMergeTree` engine on table creation.
 
 ## DROP UNCOMPRESSED CACHE {#query_language-system-drop-uncompressed-cache}
 
diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index 795a9f5893a..0abb568ffc7 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -492,11 +492,11 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 
 ## max\_thread\_pool\_size {#max-thread-pool-size}
 
-Максимальное кол-во потоков в глобальном пуле потоков.
+Максимальное количество потоков в глобальном пуле потоков.
 
-Default value: 10000.
+Значение по умолчанию: 10000.
 
-**Example**
+**Пример**
 
 ``` xml
 <max_thread_pool_size>12000</max_thread_pool_size>
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 9a487b6c166..15c4139a3f3 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -281,6 +281,14 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (
 
 Значение по умолчанию: 1.
 
+## input\_format\_tsv\_empty\_as\_default {#settings-input-format-tsv-empty-as-default}
+
+Если эта настройка включена, замените пустые поля ввода в TSV значениями по умолчанию. Для сложных выражений по умолчанию также должна быть включена настройка `input_format_defaults_for_omitted_fields`.
+
+По умолчанию отключена.
+
+Disabled by default.
+
 ## input\_format\_null\_as\_default {#settings-input-format-null-as-default}
 
 Включает или отключает использование значений по умолчанию в случаях, когда во входных данных содержится `NULL`, но тип соответствующего столбца не `Nullable(T)` (для текстовых форматов).
@@ -369,7 +377,7 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (
 
 Устанавливает строгость по умолчанию для [JOIN](../../sql-reference/statements/select/join.md#select-join).
 
-Возможные значения
+Возможные значения:
 
 -   `ALL` — если в правой таблице несколько совпадающих строк, данные умножаются на количество этих строк. Это нормальное поведение `JOIN` как в стандартном SQL.
 -   `ANY` — если в правой таблице несколько соответствующих строк, то соединяется только первая найденная. Если в «правой» таблице есть не более одной подходящей строки, то результаты `ANY` и `ALL` совпадают.
@@ -520,6 +528,31 @@ ClickHouse использует этот параметр при чтении д
 
 Значение по умолчанию: 0.
 
+## network_compression_method {#network_compression_method}
+
+Устанавливает метод сжатия данных, который используется для обмена данными между серверами и между сервером и [clickhouse-client](../../interfaces/cli.md).
+
+Возможные значения:
+
+-   `LZ4` — устанавливает метод сжатия LZ4.
+-   `ZSTD` — устанавливает метод сжатия ZSTD.
+
+Значение по умолчанию: `LZ4`.
+
+**См. также**
+
+-   [network_zstd_compression_level](#network_zstd_compression_level)
+
+## network_zstd_compression_level {#network_zstd_compression_level}
+
+Регулирует уровень сжатия ZSTD. Используется только тогда, когда [network_compression_method](#network_compression_method) установлен на `ZSTD`.
+
+Возможные значения:
+
+-   Положительное целое число от 1 до 15.
+
+Значение по умолчанию: `1`.
+
 ## log\_queries {#settings-log-queries}
 
 Установка логирования запроса.
@@ -534,42 +567,6 @@ log_queries=1
 
 ## log\_queries\_min\_type {#settings-log-queries-min-type}
 
-`query_log` минимальный уровень логирования.
-
-Возможные значения:
-- `QUERY_START` (`=1`)
-- `QUERY_FINISH` (`=2`)
-- `EXCEPTION_BEFORE_START` (`=3`)
-- `EXCEPTION_WHILE_PROCESSING` (`=4`)
-
-Значение по умолчанию: `QUERY_START`.
-
-Можно использовать для ограничения того, какие объекты будут записаны в `query_log`, например, если вас интересуют ошибки, тогда вы можете использовать `EXCEPTION_WHILE_PROCESSING`:
-
-``` text
-log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
-```
-
-## log\_queries\_min\_type {#settings-log-queries-min-type}
-
-`query_log` минимальный уровень логирования.
-
-Возможные значения:
-- `QUERY_START` (`=1`)
-- `QUERY_FINISH` (`=2`)
-- `EXCEPTION_BEFORE_START` (`=3`)
-- `EXCEPTION_WHILE_PROCESSING` (`=4`)
-
-Значение по умолчанию: `QUERY_START`.
-
-Можно использовать для ограничения того, какие объекты будут записаны в `query_log`, например, если вас интересуют ошибки, тогда вы можете использовать `EXCEPTION_WHILE_PROCESSING`:
-
-``` text
-log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
-```
-
-## log\_queries\_min\_type {#settings-log-queries-min-type}
-
 Задаёт минимальный уровень логирования в `query_log`.
 
 Возможные значения:
@@ -839,6 +836,11 @@ ClickHouse поддерживает следующие алгоритмы выб
 -   [Nearest hostname](#load_balancing-nearest_hostname)
 -   [In order](#load_balancing-in_order)
 -   [First or random](#load_balancing-first_or_random)
+-   [Round robin](#load_balancing-round_robin)
+
+См. также:
+
+-   [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors)
 
 ### Random (by Default) {#load_balancing-random}
 
@@ -882,6 +884,14 @@ load_balancing = first_or_random
 
 Алгоритм `first or random` решает проблему алгоритма `in order`. При использовании `in order`, если одна реплика перестаёт отвечать, то следующая за ней принимает двойную нагрузку, в то время как все остальные обрабатываю свой обычный трафик. Алгоритм `first or random` равномерно распределяет нагрузку между репликами.
 
+### Round Robin {#load_balancing-round_robin}
+
+``` sql
+load_balancing = round_robin
+```
+
+Этот алгоритм использует циклический перебор реплик с одинаковым количеством ошибок (учитываются только запросы с алгоритмом `round_robin`).
+
 ## prefer\_localhost\_replica {#settings-prefer-localhost-replica}
 
 Включает или выключает предпочтительное использование localhost реплики при обработке распределенных запросов.
@@ -1292,6 +1302,48 @@ ClickHouse генерирует исключение
 
 Значение по умолчанию: 0.
 
+## distributed\_replica\_error\_half\_life {#settings-distributed_replica_error_half_life}
+
+-   Тип: секунды
+-   Значение по умолчанию: 60 секунд
+
+Управляет скоростью обнуления ошибок в распределенных таблицах. Если реплика недоступна в течение некоторого времени, накапливает 5 ошибок, а distributed\_replica\_error\_half\_life установлена на 1 секунду, то реплика считается нормальной через 3 секунды после последней ошибки.
+
+См. также:
+
+-   [load\_balancing](#load_balancing-round_robin)
+-   [Table engine Distributed](../../engines/table-engines/special/distributed.md)
+-   [distributed\_replica\_error\_cap](#settings-distributed_replica_error_cap)
+-   [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors)
+
+## distributed\_replica\_error\_cap {#settings-distributed_replica_error_cap}
+
+-   Тип: unsigned int
+-   Значение по умолчанию: 1000
+
+Счетчик ошибок каждой реплики ограничен этим значением, чтобы одна реплика не накапливала слишком много ошибок.
+
+См. также:
+
+-   [load\_balancing](#load_balancing-round_robin)
+-   [Table engine Distributed](../../engines/table-engines/special/distributed.md)
+-   [distributed\_replica\_error\_half\_life](#settings-distributed_replica_error_half_life)
+-   [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors)
+
+## distributed\_replica\_max\_ignored\_errors {#settings-distributed_replica_max_ignored_errors}
+
+-   Тип: unsigned int
+-   Значение по умолчанию: 0
+
+Количество ошибок, которые будут проигнорированы при выборе реплик (согласно алгоритму `load_balancing`).
+
+См. также:
+
+-   [load\_balancing](#load_balancing-round_robin)
+-   [Table engine Distributed](../../engines/table-engines/special/distributed.md)
+-   [distributed\_replica\_error\_cap](#settings-distributed_replica_error_cap)
+-   [distributed\_replica\_error\_half\_life](#settings-distributed_replica_error_half_life)
+
 ## distributed\_directory\_monitor\_sleep\_time\_ms {#distributed_directory_monitor_sleep_time_ms}
 
 Основной интервал отправки данных движком таблиц [Distributed](../../engines/table-engines/special/distributed.md). Фактический интервал растёт экспоненциально при возникновении ошибок.
@@ -1342,65 +1394,103 @@ ClickHouse генерирует исключение
 
 ## query\_profiler\_real\_time\_period\_ns {#query_profiler_real_time_period_ns}
 
-Sets the period for a real clock timer of the [query profiler](../../operations/optimizing-performance/sampling-query-profiler.md). Real clock timer counts wall-clock time.
+Устанавливает период для таймера реального времени [профилировщика запросов](../../operations/optimizing-performance/sampling-query-profiler.md). Таймер реального времени считает wall-clock time.
 
-Possible values:
+Возможные значения:
 
--   Positive integer number, in nanoseconds.
+-   Положительное целое число в наносекундах.
 
-        Recommended values:
+        Рекомендуемые значения:
 
-            - 10000000 (100 times a second) nanoseconds and less for single queries.
-            - 1000000000 (once a second) for cluster-wide profiling.
+            - 10000000 (100 раз в секунду) наносекунд и меньшее значение для одиночных запросов.
+            - 1000000000 (раз в секунду) для профилирования в масштабе кластера.
 
--   0 for turning off the timer.
+-   0 для выключения таймера.
 
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
 
-Default value: 1000000000 nanoseconds (once a second).
+Значение по умолчанию: 1000000000 наносекунд (раз в секунду).
 
-See also:
+См. также:
 
--   System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
+-   Системная таблица [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
 
 ## query\_profiler\_cpu\_time\_period\_ns {#query_profiler_cpu_time_period_ns}
 
-Sets the period for a CPU clock timer of the [query profiler](../../operations/optimizing-performance/sampling-query-profiler.md). This timer counts only CPU time.
+Устанавливает период для таймера CPU [query profiler](../../operations/optimizing-performance/sampling-query-profiler.md). Этот таймер считает только время CPU.
 
-Possible values:
+Возможные значения:
 
--   Positive integer number of nanoseconds.
+-   Положительное целое число в наносекундах.
 
-        Recommended values:
+        Рекомендуемые значения:
 
-            - 10000000 (100 times a second) nanosecods and more for for single queries.
-            - 1000000000 (once a second) for cluster-wide profiling.
+            - 10000000 (100 раз в секунду) наносекунд и большее значение для одиночных запросов.
+            - 1000000000 (раз в секунду) для профилирования в масштабе кластера.
 
--   0 for turning off the timer.
+-   0 для выключения таймера.
 
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
 
-Default value: 1000000000 nanoseconds.
+Значение по умолчанию: 1000000000 наносекунд.
 
-See also:
+См. также:
 
--   System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
+-   Системная таблица [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
 
 ## allow_introspection_functions {#settings-allow_introspection_functions}
 
-Enables of disables [introspections functions](../../sql-reference/functions/introspection.md) for query profiling.
+Включает или отключает [функции самоанализа](../../sql-reference/functions/introspection.md) для профилирования запросов.
 
-Possible values:
+Возможные значения:
 
--   1 — Introspection functions enabled.
--   0 — Introspection functions disabled.
+-   1 — включены функции самоанализа.
+-   0 — функции самоанализа отключены.
 
-Default value: 0.
+Значение по умолчанию: 0.
 
-**See Also**
+**См. также**
 
 -   [Sampling Query Profiler](../optimizing-performance/sampling-query-profiler.md)
--   System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
+-   Системная таблица [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
+
+## input\_format\_parallel\_parsing {#input-format-parallel-parsing}
+
+-   Тип: bool
+-   Значение по умолчанию: True
+
+Обеспечивает параллельный анализ форматов данных с сохранением порядка. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow.
+
+## min\_chunk\_bytes\_for\_parallel\_parsing {#min-chunk-bytes-for-parallel-parsing}
+
+-   Тип: unsigned int
+-   Значение по умолчанию: 1 MiB
+
+Минимальный размер блока в байтах, который каждый поток будет анализировать параллельно.
+
+## output\_format\_avro\_codec {#settings-output_format_avro_codec}
+
+Устанавливает кодек сжатия, используемый для вывода файла Avro.
+
+Тип: строка
+
+Возможные значения:
+
+-   `null` — без сжатия
+-   `deflate` — сжать с помощью Deflate (zlib)
+-   `snappy` — сжать с помощью [Snappy](https://google.github.io/snappy/)
+
+Значение по умолчанию: `snappy` (если доступно) или `deflate`.
+
+## output\_format\_avro\_sync\_interval {#settings-output_format_avro_sync_interval}
+
+Устанавливает минимальный размер данных (в байтах) между маркерами синхронизации для выходного файла Avro.
+
+Тип: unsigned int
+
+озможные значения: 32 (32 байта) - 1073741824 (1 GiB)
+
+Значение по умолчанию: 32768 (32 KiB)
 
 ## background\_pool\_size {#background_pool_size}
 
@@ -1624,6 +1714,26 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
 
 -   [min_insert_block_size_bytes](#min-insert-block-size-bytes)
 
+## output\_format\_pretty\_grid\_charset {#output-format-pretty-grid-charset}
+
+Позволяет изменить кодировку, которая используется для печати грид-границ. Доступны следующие кодировки: UTF-8, ASCII.
+
+**Пример**
+
+``` text
+SET output_format_pretty_grid_charset = 'UTF-8';
+SELECT * FROM a;
+┌─a─┐
+│ 1 │
+└───┘
+
+SET output_format_pretty_grid_charset = 'ASCII';
+SELECT * FROM a;
++-a-+
+| 1 |
++---+
+```
+
 ## optimize_read_in_order {#optimize_read_in_order}
 
 Включает или отключает оптимизацию в запросах [SELECT](../../sql-reference/statements/select/index.md) с секцией [ORDER BY](../../sql-reference/statements/select/order-by.md#optimize_read_in_order) при работе с таблицами семейства [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).

From 0d79474acc3cfb6f2c8dfbed26aa5d5f0346fc4f Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Wed, 30 Sep 2020 19:10:15 +0300
Subject: [PATCH 0051/1238] Fix TTL with GROUP BY and fix test according to new
 logic

---
 src/DataStreams/TTLBlockInputStream.cpp       | 184 ++++++++++--------
 src/DataStreams/TTLBlockInputStream.h         |   7 +-
 src/Storages/TTLDescription.cpp               |  17 +-
 .../01280_ttl_where_group_by.reference        |  16 +-
 .../0_stateless/01280_ttl_where_group_by.sh   |   8 +-
 .../01280_ttl_where_group_by_negative.sql     |   3 -
 6 files changed, 134 insertions(+), 101 deletions(-)

diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp
index 6dba8968f79..5c49b9f11c2 100644
--- a/src/DataStreams/TTLBlockInputStream.cpp
+++ b/src/DataStreams/TTLBlockInputStream.cpp
@@ -88,7 +88,6 @@ TTLBlockInputStream::TTLBlockInputStream(
                     descr.arguments.push_back(header.getPositionByName(name));
 
         agg_aggregate_columns.resize(storage_rows_ttl.aggregate_descriptions.size());
-
         const Settings & settings = storage.global_context.getSettingsRef();
 
         Aggregator::Params params(header, keys, aggregates,
@@ -108,14 +107,15 @@ Block TTLBlockInputStream::readImpl()
 {
     /// Skip all data if table ttl is expired for part
     auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
-    if (metadata_snapshot->hasRowsTTL() && !storage_rows_ttl.where_expression && storage_rows_ttl.mode != TTLMode::GROUP_BY
+    if (metadata_snapshot->hasRowsTTL()
+        && !storage_rows_ttl.where_expression
+        && storage_rows_ttl.mode != TTLMode::GROUP_BY
         && isTTLExpired(old_ttl_infos.table_ttl.max))
     {
         rows_removed = data_part->rows_count;
         return {};
     }
 
-
     Block block = children.at(0)->read();
     if (!block)
     {
@@ -130,10 +130,9 @@ Block TTLBlockInputStream::readImpl()
     }
 
     if (metadata_snapshot->hasRowsTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min)))
-        removeRowsWithExpiredTableTTL(block);
+        executeRowsTTL(block);
 
     removeValuesWithExpiredColumnTTL(block);
-
     updateMovesTTL(block);
     updateRecompressionTTL(block);
 
@@ -167,107 +166,117 @@ static ColumnPtr extractRequieredColumn(const ExpressionActions & expression, co
     return block_copy.getByName(result_column).column;
 }
 
-void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
+void TTLBlockInputStream::executeRowsTTL(Block & block)
 {
     auto rows_ttl = metadata_snapshot->getRowsTTL();
     auto ttl_column = extractRequieredColumn(*rows_ttl.expression, block, rows_ttl.result_column);
 
     auto where_result_column = rows_ttl.where_expression ?
-        extractRequieredColumn(*rows_ttl.where_expression, block, rows_ttl.where_result_column) : nullptr;
+        extractRequieredColumn(*rows_ttl.where_expression, block, rows_ttl.where_result_column): nullptr;
 
+    if (aggregator)
+        aggregateRowsWithExpiredTTL(block, ttl_column, where_result_column);
+    else
+        removeRowsWithExpiredTTL(block, ttl_column, where_result_column);
+}
+
+void TTLBlockInputStream::removeRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column)
+{
+    MutableColumns result_columns;
     const auto & column_names = header.getNames();
 
-    if (!aggregator)
+    result_columns.reserve(column_names.size());
+    for (auto it = column_names.begin(); it != column_names.end(); ++it)
     {
-        MutableColumns result_columns;
-        result_columns.reserve(column_names.size());
-        for (auto it = column_names.begin(); it != column_names.end(); ++it)
-        {
-            const IColumn * values_column = block.getByName(*it).column.get();
-            MutableColumnPtr result_column = values_column->cloneEmpty();
-            result_column->reserve(block.rows());
+        const IColumn * values_column = block.getByName(*it).column.get();
+        MutableColumnPtr result_column = values_column->cloneEmpty();
+        result_column->reserve(block.rows());
 
-            for (size_t i = 0; i < block.rows(); ++i)
-            {
-                UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
-                bool where_filter_passed = !where_result_column || where_result_column->getBool(i);
-                if (!isTTLExpired(cur_ttl) || !where_filter_passed)
-                {
-                    new_ttl_infos.table_ttl.update(cur_ttl);
-                    result_column->insertFrom(*values_column, i);
-                }
-                else if (it == column_names.begin())
-                    ++rows_removed;
-            }
-            result_columns.emplace_back(std::move(result_column));
-        }
-        block = header.cloneWithColumns(std::move(result_columns));
-    }
-    else
-    {
-        MutableColumns result_columns = header.cloneEmptyColumns();
-        MutableColumns aggregate_columns = header.cloneEmptyColumns();
-
-        size_t rows_aggregated = 0;
-        size_t current_key_start = 0;
-        size_t rows_with_current_key = 0;
-        auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
         for (size_t i = 0; i < block.rows(); ++i)
         {
             UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
-            bool where_filter_passed = !where_result_column || where_result_column->getBool(i);
-            bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed;
-
-            bool same_as_current = true;
-            for (size_t j = 0; j < storage_rows_ttl.group_by_keys.size(); ++j)
-            {
-                const String & key_column = storage_rows_ttl.group_by_keys[j];
-                const IColumn * values_column = block.getByName(key_column).column.get();
-                if (!same_as_current || (*values_column)[i] != current_key_value[j])
-                {
-                    values_column->get(i, current_key_value[j]);
-                    same_as_current = false;
-                }
-            }
-
-            if (!same_as_current)
-            {
-                if (rows_with_current_key)
-                    calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
-                finalizeAggregates(result_columns);
-
-                current_key_start = rows_aggregated;
-                rows_with_current_key = 0;
-            }
-
-            if (ttl_expired)
-            {
-                ++rows_with_current_key;
-                ++rows_aggregated;
-                for (const auto & name : column_names)
-                {
-                    const IColumn * values_column = block.getByName(name).column.get();
-                    auto & column = aggregate_columns[header.getPositionByName(name)];
-                    column->insertFrom(*values_column, i);
-                }
-            }
-            else
+            bool where_filter_passed = !where_column || where_column->getBool(i);
+            if (!isTTLExpired(cur_ttl) || !where_filter_passed)
             {
                 new_ttl_infos.table_ttl.update(cur_ttl);
-                for (const auto & name : column_names)
-                {
-                    const IColumn * values_column = block.getByName(name).column.get();
-                    auto & column = result_columns[header.getPositionByName(name)];
-                    column->insertFrom(*values_column, i);
-                }
+                result_column->insertFrom(*values_column, i);
+            }
+            else if (it == column_names.begin())
+                ++rows_removed;
+        }
+
+        result_columns.emplace_back(std::move(result_column));
+    }
+
+    block = header.cloneWithColumns(std::move(result_columns));
+}
+
+void TTLBlockInputStream::aggregateRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column)
+{
+    const auto & column_names = header.getNames();
+    MutableColumns result_columns = header.cloneEmptyColumns();
+    MutableColumns aggregate_columns = header.cloneEmptyColumns();
+
+    size_t rows_aggregated = 0;
+    size_t current_key_start = 0;
+    size_t rows_with_current_key = 0;
+    auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
+
+    for (size_t i = 0; i < block.rows(); ++i)
+    {
+        UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
+        bool where_filter_passed = !where_column || where_column->getBool(i);
+        bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed;
+
+        bool same_as_current = true;
+        for (size_t j = 0; j < storage_rows_ttl.group_by_keys.size(); ++j)
+        {
+            const String & key_column = storage_rows_ttl.group_by_keys[j];
+            const IColumn * values_column = block.getByName(key_column).column.get();
+            if (!same_as_current || (*values_column)[i] != current_key_value[j])
+            {
+                values_column->get(i, current_key_value[j]);
+                same_as_current = false;
             }
         }
 
-        if (rows_with_current_key)
-            calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
+        if (!same_as_current)
+        {
+            if (rows_with_current_key)
+                calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
+            finalizeAggregates(result_columns);
 
-        block = header.cloneWithColumns(std::move(result_columns));
+            current_key_start = rows_aggregated;
+            rows_with_current_key = 0;
+        }
+
+        if (ttl_expired)
+        {
+            ++rows_with_current_key;
+            ++rows_aggregated;
+            for (const auto & name : column_names)
+            {
+                const IColumn * values_column = block.getByName(name).column.get();
+                auto & column = aggregate_columns[header.getPositionByName(name)];
+                column->insertFrom(*values_column, i);
+            }
+        }
+        else
+        {
+            new_ttl_infos.table_ttl.update(cur_ttl);
+            for (const auto & name : column_names)
+            {
+                const IColumn * values_column = block.getByName(name).column.get();
+                auto & column = result_columns[header.getPositionByName(name)];
+                column->insertFrom(*values_column, i);
+            }
+        }
     }
+
+    if (rows_with_current_key)
+        calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
+
+    block = header.cloneWithColumns(std::move(result_columns));
 }
 
 void TTLBlockInputStream::calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length)
@@ -294,12 +303,14 @@ void TTLBlockInputStream::finalizeAggregates(MutableColumns & result_columns)
         {
             for (const auto & it : storage_rows_ttl.set_parts)
                 it.expression->execute(agg_block);
+
             for (const auto & name : storage_rows_ttl.group_by_keys)
             {
                 const IColumn * values_column = agg_block.getByName(name).column.get();
                 auto & result_column = result_columns[header.getPositionByName(name)];
                 result_column->insertRangeFrom(*values_column, 0, agg_block.rows());
             }
+
             for (const auto & it : storage_rows_ttl.set_parts)
             {
                 const IColumn * values_column = agg_block.getByName(it.expression_result_column_name).column.get();
@@ -308,6 +319,7 @@ void TTLBlockInputStream::finalizeAggregates(MutableColumns & result_columns)
             }
         }
     }
+
     agg_result.invalidate();
 }
 
diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/DataStreams/TTLBlockInputStream.h
index 1d3b69f61c5..bbe1f8782a4 100644
--- a/src/DataStreams/TTLBlockInputStream.h
+++ b/src/DataStreams/TTLBlockInputStream.h
@@ -67,8 +67,13 @@ private:
     /// Removes values with expired ttl and computes new_ttl_infos and empty_columns for part
     void removeValuesWithExpiredColumnTTL(Block & block);
 
+    void executeRowsTTL(Block & block);
+
     /// Removes rows with expired table ttl and computes new ttl_infos for part
-    void removeRowsWithExpiredTableTTL(Block & block);
+    void removeRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column);
+
+    /// Aggregates rows with expired table ttl and computes new ttl_infos for part
+    void aggregateRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column);
 
     // Calculate aggregates of aggregate_columns into agg_result
     void calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length);
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index 7499f1de292..e412653a972 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -184,11 +184,8 @@ TTLDescription TTLDescription::getTTLFromAST(
             if (ttl_element->group_by_key.size() > pk_columns.size())
                 throw Exception("TTL Expression GROUP BY key should be a prefix of primary key", ErrorCodes::BAD_TTL_EXPRESSION);
 
-            NameSet primary_key_columns_set(pk_columns.begin(), pk_columns.end());
             NameSet aggregation_columns_set;
-
-            for (const auto & column : primary_key.expression->getRequiredColumns())
-                primary_key_columns_set.insert(column);
+            NameSet used_primary_key_columns_set;
 
             for (size_t i = 0; i < ttl_element->group_by_key.size(); ++i)
             {
@@ -196,6 +193,8 @@ TTLDescription TTLDescription::getTTLFromAST(
                     throw Exception(
                         "TTL Expression GROUP BY key should be a prefix of primary key",
                         ErrorCodes::BAD_TTL_EXPRESSION);
+
+                used_primary_key_columns_set.insert(pk_columns[i]);
             }
 
             for (const auto & [name, _] : ttl_element->group_by_aggregations)
@@ -209,9 +208,17 @@ TTLDescription TTLDescription::getTTLFromAST(
             result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size());
             auto aggregations = ttl_element->group_by_aggregations;
 
+            const auto & primary_key_expressions = primary_key.expression_list_ast->children;
+            for (size_t i = ttl_element->group_by_key.size(); i < primary_key_expressions.size(); ++i)
+            {
+                ASTPtr expr = makeASTFunction("any", primary_key_expressions[i]->clone());
+                aggregations.emplace_back(pk_columns[i], std::move(expr));
+                aggregation_columns_set.insert(pk_columns[i]);
+            }
+
             for (const auto & column : columns.getOrdinary())
             {
-                if (!aggregation_columns_set.count(column.name))
+                if (!aggregation_columns_set.count(column.name) && !used_primary_key_columns_set.count(column.name))
                 {
                     ASTPtr expr = makeASTFunction("any", std::make_shared<ASTIdentifier>(column.name));
                     aggregations.emplace_back(column.name, std::move(expr));
diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.reference b/tests/queries/0_stateless/01280_ttl_where_group_by.reference
index ad20d38f2e6..7fe00709dee 100644
--- a/tests/queries/0_stateless/01280_ttl_where_group_by.reference
+++ b/tests/queries/0_stateless/01280_ttl_where_group_by.reference
@@ -1,20 +1,26 @@
+ttl_01280_1
 1	1	0	4
 1	2	3	7
 1	3	0	5
 2	1	0	1
 2	1	20	1
+ttl_01280_2
 1	1	[0,2,3]	4
 1	1	[5,4,1]	13
 1	3	[1,0,1,0]	17
 2	1	[3,1,0,3]	8
 3	1	[2,4,5]	8
+ttl_01280_3
 1	1	0	4
-1	3	10	6
+1	1	10	6
 2	1	0	3
-3	5	8	2
+3	1	8	2
+ttl_01280_4
 1	1	0	4
-3	3	13	9
+10	2	13	9
+ttl_01280_5
 1	2	7	5
 2	3	6	5
-1	2	3	5
-2	3	3	5
+ttl_01280_6
+1	5	3	5
+2	10	3	5
diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.sh b/tests/queries/0_stateless/01280_ttl_where_group_by.sh
index 9b05606f928..531f2951d36 100755
--- a/tests/queries/0_stateless/01280_ttl_where_group_by.sh
+++ b/tests/queries/0_stateless/01280_ttl_where_group_by.sh
@@ -13,6 +13,7 @@ function optimize()
     done
 }
 
+echo "ttl_01280_1"
 $CLICKHOUSE_CLIENT -n --query "
 create table ttl_01280_1 (a Int, b Int, x Int, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second delete where x % 10 == 0 and y > 5;
 insert into ttl_01280_1 values (1, 1, 0, 4, now() + 10);
@@ -29,6 +30,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_1 ORDER BY a, b, x,
 
 $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_2"
 
+echo "ttl_01280_2"
 $CLICKHOUSE_CLIENT -n --query "
 create table ttl_01280_2 (a Int, b Int, x Array(Int32), y Double, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b set x = minForEach(x), y = sum(y), d = max(d);
 insert into ttl_01280_2 values (1, 1, array(0, 2, 3), 4, now() + 10);
@@ -47,6 +49,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_2 ORDER BY a, b, x,
 
 $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_3"
 
+echo "ttl_01280_3"
 $CLICKHOUSE_CLIENT -n --query "
 create table ttl_01280_3 (a Int, b Int, x Int64, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set x = argMax(x, d), y = argMax(y, d), d = max(d);
 insert into ttl_01280_3 values (1, 1, 0, 4, now() + 10);
@@ -65,6 +68,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_3 ORDER BY a, b, x,
 
 $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_4"
 
+echo "ttl_01280_4"
 $CLICKHOUSE_CLIENT -n --query "
 create table ttl_01280_4 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), -(a + b)) ttl d + interval 1 second group by toDate(d) set x = sum(x), y = max(y);
 insert into ttl_01280_4 values (1, 1, 0, 4, now() + 10);
@@ -79,7 +83,8 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_4 ORDER BY a, b, x,
 
 $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_5"
 
-$CLICKHOUSE_CLIENT -n --query "create table ttl_01280_5 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a set x = sum(x);
+echo "ttl_01280_5"
+$CLICKHOUSE_CLIENT -n --query "create table ttl_01280_5 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a set x = sum(x), b = argMax(b, -b);
 insert into ttl_01280_5 values (1, 2, 3, 5, now());
 insert into ttl_01280_5 values (2, 10, 1, 5, now());
 insert into ttl_01280_5 values (2, 3, 5, 5, now());
@@ -91,6 +96,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_5 ORDER BY a, b, x,
 
 $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_6"
 
+echo "ttl_01280_6"
 $CLICKHOUSE_CLIENT -n --query "
 create table ttl_01280_6 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a;
 insert into ttl_01280_6 values (1, 2, 3, 5, now());
diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql b/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql
index f2c26a3d495..b273e065bcc 100644
--- a/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql
+++ b/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql
@@ -1,7 +1,4 @@
 create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by x set y = max(y); -- { serverError 450}
 create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by b set y = max(y); -- { serverError 450}
 create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b, x set y = max(y); -- { serverError 450}
-create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set b = min(b), y = max(y); -- { serverError 450}
 create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b set y = max(y), y = max(y); -- { serverError 450}
-create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a) ttl d + interval 1 second group by toDate(d), a set d = min(d), b = max(b); -- { serverError 450}
-create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (d, -(a + b)) ttl d + interval 1 second group by d, -(a + b) set a = sum(a), b = min(b); -- { serverError 450}

From 478eb0b8a5df5f602651268cc396178b6adcf17e Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Thu, 22 Oct 2020 18:08:00 +0300
Subject: [PATCH 0052/1238] fix

---
 src/Databases/DatabaseReplicated.cpp        | 206 ++++++++++++--------
 src/Databases/IDatabase.h                   |   3 +-
 src/Databases/ya.make                       |   1 +
 src/Interpreters/InterpreterAlterQuery.cpp  |   3 +-
 src/Interpreters/InterpreterCreateQuery.cpp |  10 +-
 src/Interpreters/InterpreterDropQuery.cpp   |  19 +-
 src/Interpreters/InterpreterRenameQuery.cpp |  10 +-
 7 files changed, 149 insertions(+), 103 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 42662d836d4..328f5476064 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1,3 +1,4 @@
+#include <DataTypes/DataTypeString.h>
 #include <Databases/DatabaseReplicated.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadBufferFromString.h>
@@ -7,20 +8,15 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/executeQuery.h>
 #include <Parsers/queryToString.h>
-#include <Common/Stopwatch.h>
 #include <Common/Exception.h>
+#include <Common/Stopwatch.h>
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/ZooKeeper/Types.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
-#include <DataTypes/DataTypeString.h>
-
-#include <common/sleep.h>
 
 
 namespace DB
 {
-
-
 namespace ErrorCodes
 {
     extern const int NO_ZOOKEEPER;
@@ -60,29 +56,34 @@ DatabaseReplicated::DatabaseReplicated(
     , zookeeper_path(zookeeper_path_)
     , replica_name(replica_name_)
 {
-    if (zookeeper_path.empty() || replica_name.empty()) {
+    if (zookeeper_path.empty() || replica_name.empty())
+    {
         throw Exception("ZooKeeper path and replica name must be non-empty", ErrorCodes::BAD_ARGUMENTS);
     }
 
     if (zookeeper_path.back() == '/')
         zookeeper_path.resize(zookeeper_path.size() - 1);
-    // If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
+    /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
     if (zookeeper_path.front() != '/')
         zookeeper_path = "/" + zookeeper_path;
 
-    if (context_.hasZooKeeper()) {
+    if (context_.hasZooKeeper())
+    {
         current_zookeeper = context_.getZooKeeper();
     }
     if (!current_zookeeper)
     {
-            throw Exception("Can't create replicated database without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
+        throw Exception("Can't create replicated database without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
     }
 
-    // New database
-    if (!current_zookeeper->exists(zookeeper_path)) {
+    /// New database
+    if (!current_zookeeper->exists(zookeeper_path))
+    {
         createDatabaseZKNodes();
-    // Old replica recovery
-    } else if (current_zookeeper->exists(zookeeper_path + "/replicas/" + replica_name)) {
+        /// Old replica recovery
+    }
+    else if (current_zookeeper->exists(zookeeper_path + "/replicas/" + replica_name))
+    {
         String remote_last_entry = current_zookeeper->get(zookeeper_path + "/replicas/" + replica_name, {}, nullptr);
 
         String local_last_entry;
@@ -93,16 +94,22 @@ DatabaseReplicated::DatabaseReplicated(
         }
         catch (const Exception &)
         {
-                // Metadata is corrupted.
-                // Replica erases the previous zk last executed log entry
-                // and behaves like a new clean replica.
-                writeLastExecutedToDiskAndZK();
+            /// Metadata is corrupted.
+            /// Replica erases the previous zk last executed log entry
+            /// and behaves like a new clean replica.
+            writeLastExecutedToDiskAndZK();
         }
 
-        if (!local_last_entry.empty() && local_last_entry == remote_last_entry) {
+        if (!local_last_entry.empty() && local_last_entry == remote_last_entry)
+        {
             last_executed_log_entry = local_last_entry;
-        } else {
-            throw Exception("Replica name might be in use by a different node. Please check replica_name parameter. Remove .last_entry file from metadata to create a new replica.", ErrorCodes::LOGICAL_ERROR);
+        }
+        else
+        {
+            throw Exception(
+                "Replica name might be in use by a different node. Please check replica_name parameter. Remove .last_entry file from "
+                "metadata to create a new replica.",
+                ErrorCodes::LOGICAL_ERROR);
         }
     }
 
@@ -110,12 +117,15 @@ DatabaseReplicated::DatabaseReplicated(
     feedback_timeout = context_.getConfigRef().getInt("database_replicated_feedback_timeout", 0);
     LOG_DEBUG(log, "Snapshot period is set to {} log entries per one snapshot", snapshot_period);
 
-    background_log_executor = context_.getReplicatedSchedulePool().createTask(database_name + "(DatabaseReplicated::background_executor)", [this]{ runBackgroundLogExecutor();} );
+    background_log_executor = context_.getReplicatedSchedulePool().createTask(
+        database_name + "(DatabaseReplicated::background_executor)", [this] { runBackgroundLogExecutor(); }
+    );
 
     background_log_executor->scheduleAfter(500);
 }
 
-void DatabaseReplicated::createDatabaseZKNodes() {
+void DatabaseReplicated::createDatabaseZKNodes()
+{
     current_zookeeper = getZooKeeper();
 
     current_zookeeper->createAncestors(zookeeper_path);
@@ -126,31 +136,34 @@ void DatabaseReplicated::createDatabaseZKNodes() {
     current_zookeeper->createIfNotExists(zookeeper_path + "/replicas", String());
 }
 
-void DatabaseReplicated::RemoveOutdatedSnapshotsAndLog() {
-    // This method removes all snapshots and logged queries
-    // that no longer will be in use by current replicas or
-    // new coming ones.
-    // Each registered replica has its state in ZooKeeper.
-    // Therefore, snapshots and logged queries that are less
-    // than a least advanced replica are removed.
-    // It does not interfere with a new coming replica
-    // metadata loading from snapshot
-    // because the replica will use the latest snapshot available
-    // and this snapshot will set the last executed log query
-    // to a greater one than the least advanced current replica.
+void DatabaseReplicated::RemoveOutdatedSnapshotsAndLog()
+{
+    /// This method removes all snapshots and logged queries
+    /// that no longer will be in use by current replicas or
+    /// new coming ones.
+    /// Each registered replica has its state in ZooKeeper.
+    /// Therefore, snapshots and logged queries that are less
+    /// than a least advanced replica are removed.
+    /// It does not interfere with a new coming replica
+    /// metadata loading from snapshot
+    /// because the replica will use the latest snapshot available
+    /// and this snapshot will set the last executed log query
+    /// to a greater one than the least advanced current replica.
     current_zookeeper = getZooKeeper();
     Strings replica_states = current_zookeeper->getChildren(zookeeper_path + "/replicas");
     auto least_advanced = std::min_element(replica_states.begin(), replica_states.end());
     Strings snapshots = current_zookeeper->getChildren(zookeeper_path + "/snapshots");
-    
-    if (snapshots.size() < 2) {
+
+    if (snapshots.size() < 2)
+    {
         return;
     }
 
     std::sort(snapshots.begin(), snapshots.end());
     auto still_useful = std::lower_bound(snapshots.begin(), snapshots.end(), *least_advanced);
     snapshots.erase(still_useful, snapshots.end());
-    for (const String & snapshot : snapshots) {
+    for (const String & snapshot : snapshots)
+    {
         current_zookeeper->tryRemoveRecursive(zookeeper_path + "/snapshots/" + snapshot);
     }
 
@@ -158,14 +171,17 @@ void DatabaseReplicated::RemoveOutdatedSnapshotsAndLog() {
     std::sort(log_entry_names.begin(), log_entry_names.end());
     auto still_useful_log = std::upper_bound(log_entry_names.begin(), log_entry_names.end(), *still_useful);
     log_entry_names.erase(still_useful_log, log_entry_names.end());
-    for (const String & log_entry_name : log_entry_names) {
+    for (const String & log_entry_name : log_entry_names)
+    {
         String log_entry_path = zookeeper_path + "/log/" + log_entry_name;
         current_zookeeper->tryRemove(log_entry_path);
     }
 }
 
-void DatabaseReplicated::runBackgroundLogExecutor() {
-    if (last_executed_log_entry == "") {
+void DatabaseReplicated::runBackgroundLogExecutor()
+{
+    if (last_executed_log_entry == "")
+    {
         loadMetadataFromSnapshot();
     }
 
@@ -177,7 +193,8 @@ void DatabaseReplicated::runBackgroundLogExecutor() {
 
     log_entry_names.erase(log_entry_names.begin(), newest_entry_it);
 
-    for (const String & log_entry_name : log_entry_names) {
+    for (const String & log_entry_name : log_entry_names)
+    {
         executeLogName(log_entry_name);
         last_executed_log_entry = log_entry_name;
         writeLastExecutedToDiskAndZK();
@@ -185,8 +202,9 @@ void DatabaseReplicated::runBackgroundLogExecutor() {
         int log_n = parse<int>(log_entry_name.substr(4));
         int last_log_n = parse<int>(log_entry_names.back().substr(4));
 
-        // The third condition gurantees at most one snapshot creation per batch
-        if (log_n > 0 && snapshot_period > 0 && (last_log_n - log_n) / snapshot_period == 0 && log_n % snapshot_period == 0) {
+        /// The third condition gurantees at most one snapshot creation per batch
+        if (log_n > 0 && snapshot_period > 0 && (last_log_n - log_n) / snapshot_period == 0 && log_n % snapshot_period == 0)
+        {
             createSnapshot();
         }
     }
@@ -194,9 +212,11 @@ void DatabaseReplicated::runBackgroundLogExecutor() {
     background_log_executor->scheduleAfter(500);
 }
 
-void DatabaseReplicated::writeLastExecutedToDiskAndZK() {
+void DatabaseReplicated::writeLastExecutedToDiskAndZK()
+{
     current_zookeeper = getZooKeeper();
-    current_zookeeper->createOrUpdate(zookeeper_path + "/replicas/" + replica_name, last_executed_log_entry, zkutil::CreateMode::Persistent);
+    current_zookeeper->createOrUpdate(
+        zookeeper_path + "/replicas/" + replica_name, last_executed_log_entry, zkutil::CreateMode::Persistent);
 
     String metadata_file = getMetadataPath() + ".last_entry";
     WriteBufferFromFile out(metadata_file, last_executed_log_entry.size(), O_WRONLY | O_CREAT);
@@ -207,42 +227,47 @@ void DatabaseReplicated::writeLastExecutedToDiskAndZK() {
     out.close();
 }
 
-void DatabaseReplicated::executeLogName(const String & log_entry_name) {
-        String path = zookeeper_path + "/log/" + log_entry_name;
-        current_zookeeper = getZooKeeper();
-        String query_to_execute = current_zookeeper->get(path, {}, nullptr);
+void DatabaseReplicated::executeLogName(const String & log_entry_name)
+{
+    String path = zookeeper_path + "/log/" + log_entry_name;
+    current_zookeeper = getZooKeeper();
+    String query_to_execute = current_zookeeper->get(path, {}, nullptr);
 
-        try
-        {
-            current_context = std::make_unique<Context>(global_context);
-            current_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
-            current_context->setCurrentDatabase(database_name);
-            current_context->setCurrentQueryId(""); // generate random query_id
-            executeQuery(query_to_execute, *current_context);
-        }
-        catch (const Exception & e)
-        {
-            tryLogCurrentException(log, "Query from zookeeper " + query_to_execute + " wasn't finished successfully");
-            current_zookeeper->create(zookeeper_path + "/replicas/" + replica_name + "/errors/" + log_entry_name, e.what(), zkutil::CreateMode::Persistent);
-        }
+    try
+    {
+        current_context = std::make_unique<Context>(global_context);
+        current_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
+        current_context->setCurrentDatabase(database_name);
+        current_context->setCurrentQueryId(""); // generate random query_id
+        executeQuery(query_to_execute, *current_context);
+    }
+    catch (const Exception & e)
+    {
+        tryLogCurrentException(log, "Query from zookeeper " + query_to_execute + " wasn't finished successfully");
+        current_zookeeper->create(
+            zookeeper_path + "/replicas/" + replica_name + "/errors/" + log_entry_name, e.what(), zkutil::CreateMode::Persistent);
+    }
 
-        LOG_DEBUG(log, "Executed query: {}", query_to_execute);
+    LOG_DEBUG(log, "Executed query: {}", query_to_execute);
 }
 
-void DatabaseReplicated::propose(const ASTPtr & query) {
+void DatabaseReplicated::propose(const ASTPtr & query)
+{
     current_zookeeper = getZooKeeper();
 
     LOG_DEBUG(log, "Proposing query: {}", queryToString(query));
 
     {
         std::lock_guard lock(log_name_mutex);
-        log_name_to_exec_with_result = current_zookeeper->create(zookeeper_path + "/log/log-", queryToString(query), zkutil::CreateMode::PersistentSequential);
+        log_name_to_exec_with_result
+            = current_zookeeper->create(zookeeper_path + "/log/log-", queryToString(query), zkutil::CreateMode::PersistentSequential);
     }
 
     background_log_executor->schedule();
 }
 
-BlockIO DatabaseReplicated::getFeedback() {
+BlockIO DatabaseReplicated::getFeedback()
+{
     BlockIO res;
     if (feedback_timeout == 0)
         return res;
@@ -260,39 +285,48 @@ BlockIO DatabaseReplicated::getFeedback() {
     Strings replica_states = current_zookeeper->getChildren(zookeeper_path + "/replicas");
     auto replica_iter = replica_states.begin();
 
-    while (!replica_states.empty() && watch.elapsedSeconds() < feedback_timeout) {
+    while (!replica_states.empty() && watch.elapsedSeconds() < feedback_timeout)
+    {
         String last_executed = current_zookeeper->get(zookeeper_path + "/replicas/" + *replica_iter);
-        if (last_executed > log_name_to_exec_with_result) {
+        if (last_executed > log_name_to_exec_with_result)
+        {
             replica_name_column->insert(*replica_iter);
             String err_path = zookeeper_path + "/replicas/" + *replica_iter + "/errors/" + log_name_to_exec_with_result;
-            if (!current_zookeeper->exists(err_path)) {
+            if (!current_zookeeper->exists(err_path))
+            {
                 feedback_column->insert("OK");
-            } else {
+            }
+            else
+            {
                 String feedback = current_zookeeper->get(err_path, {}, nullptr);
                 feedback_column->insert(feedback);
             }
-        replica_states.erase(replica_iter);
-        replica_iter = replica_states.begin();
+            replica_states.erase(replica_iter);
+            replica_iter = replica_states.begin();
         }
     }
 
     Block block = Block({
         {std::move(replica_name_column), block_structure[0].type, block_structure[0].name},
-        {std::move(feedback_column), block_structure[1].type, block_structure[1].name}});
+        {std::move(feedback_column), block_structure[1].type, block_structure[1].name}
+    });
 
     res.in = std::make_shared<OneBlockInputStream>(block);
     return res;
 }
 
-void DatabaseReplicated::createSnapshot() {
+void DatabaseReplicated::createSnapshot()
+{
     current_zookeeper = getZooKeeper();
     String snapshot_path = zookeeper_path + "/snapshots/" + last_executed_log_entry;
 
-    if (Coordination::Error::ZNODEEXISTS == current_zookeeper->tryCreate(snapshot_path, String(), zkutil::CreateMode::Persistent)) {
+    if (Coordination::Error::ZNODEEXISTS == current_zookeeper->tryCreate(snapshot_path, String(), zkutil::CreateMode::Persistent))
+    {
         return;
     }
-    
-    for (auto iterator = getTablesIterator(global_context, {}); iterator->isValid(); iterator->next()) {
+
+    for (auto iterator = getTablesIterator(global_context, {}); iterator->isValid(); iterator->next())
+    {
         String table_name = iterator->name();
         auto query = getCreateQueryFromMetadata(getObjectMetadataPath(table_name), true);
         String statement = queryToString(query);
@@ -303,9 +337,10 @@ void DatabaseReplicated::createSnapshot() {
     RemoveOutdatedSnapshotsAndLog();
 }
 
-void DatabaseReplicated::loadMetadataFromSnapshot() {
-    // Executes the latest snapshot.
-    // Used by new replicas only.
+void DatabaseReplicated::loadMetadataFromSnapshot()
+{
+    /// Executes the latest snapshot.
+    /// Used by new replicas only.
     current_zookeeper = getZooKeeper();
 
     Strings snapshots;
@@ -313,12 +348,14 @@ void DatabaseReplicated::loadMetadataFromSnapshot() {
         return;
 
     auto latest_snapshot = std::max_element(snapshots.begin(), snapshots.end());
-    while (snapshots.size() > 0 && !current_zookeeper->exists(zookeeper_path + "/snapshots/" + *latest_snapshot + "/.completed")) {
+    while (snapshots.size() > 0 && !current_zookeeper->exists(zookeeper_path + "/snapshots/" + *latest_snapshot + "/.completed"))
+    {
         snapshots.erase(latest_snapshot);
         latest_snapshot = std::max_element(snapshots.begin(), snapshots.end());
     }
 
-    if (snapshots.size() < 1) {
+    if (snapshots.size() < 1)
+    {
         return;
     }
 
@@ -328,7 +365,8 @@ void DatabaseReplicated::loadMetadataFromSnapshot() {
 
     LOG_DEBUG(log, "Executing {} snapshot", *latest_snapshot);
 
-    for (auto t = metadatas.begin(); t != metadatas.end(); ++t) {
+    for (auto t = metadatas.begin(); t != metadatas.end(); ++t)
+    {
         String path = zookeeper_path + "/snapshots/" + *latest_snapshot + "/" + *t;
 
         String query_to_execute = current_zookeeper->get(path, {}, nullptr);
diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index 2fd0c62b72e..9bec6394be7 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -181,7 +181,8 @@ public:
     virtual bool empty() const = 0;
 
     /// Submit query to log. Currently used by DatabaseReplicated engine only.
-    virtual void propose(const ASTPtr & /*query*/) {
+    virtual void propose(const ASTPtr & /*query*/)
+    {
         throw Exception(getEngineName() + ": propose() is not supported", ErrorCodes::NOT_IMPLEMENTED);
     }
 
diff --git a/src/Databases/ya.make b/src/Databases/ya.make
index b4173057e03..4ce56859d66 100644
--- a/src/Databases/ya.make
+++ b/src/Databases/ya.make
@@ -15,6 +15,7 @@ SRCS(
     DatabaseMemory.cpp
     DatabaseOnDisk.cpp
     DatabaseOrdinary.cpp
+    DatabaseReplicated.cpp
     DatabasesCommon.cpp
     DatabaseWithDictionaries.cpp
     MySQL/ConnectionMySQLSettings.cpp
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index 0b53e84564f..e229cb120e5 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -51,7 +51,8 @@ BlockIO InterpreterAlterQuery::execute()
     auto metadata_snapshot = table->getInMemoryMetadataPtr();
 
     DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
-    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY && !table->supportsReplication()) {
+    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY && !table->supportsReplication())
+    {
         database->propose(query_ptr);
         auto * database_replicated = typeid_cast<DatabaseReplicated *>(database.get());
         return database_replicated->getFeedback();
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 7c809e65639..5210230859c 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -75,6 +75,7 @@ namespace ErrorCodes
     extern const int DICTIONARY_ALREADY_EXISTS;
     extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE;
     extern const int ILLEGAL_COLUMN;
+    extern const int LOGICAL_ERROR;
 }
 
 namespace fs = std::filesystem;
@@ -713,14 +714,16 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     /// contain the right database name for every replica
     /// therefore for such queries the AST database
     /// field is modified right before an actual execution
-    if (context.getClientInfo().query_kind == ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
+    if (context.getClientInfo().query_kind == ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+    {
         create.database = current_database;
     }
 
     /// Actually creates table
     bool created = doCreateTable(create, properties);
 
-    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
+    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+    {
         auto * database_replicated = typeid_cast<DatabaseReplicated *>(database.get());
         return database_replicated->getFeedback();
     }
@@ -786,7 +789,8 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
         return true;
     }
 
-    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
+    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+    {
         database->propose(query_ptr);
         return true;
     }
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index 455b40c30e3..393f4ef3dc9 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -101,11 +101,10 @@ BlockIO InterpreterDropQuery::executeToTable(
             if (database->getEngineName() != "Atomic" && database->getEngineName() != "Replicated")
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
             /// Drop table from memory, don't touch data and metadata
-            if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
+            if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
                 database->propose(query_ptr);
-            } else {
+            else
                 database->detachTable(table_id.table_name);
-            }
         }
         else if (query.kind == ASTDropQuery::Kind::Truncate)
         {
@@ -115,11 +114,10 @@ BlockIO InterpreterDropQuery::executeToTable(
             auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
             auto metadata_snapshot = table->getInMemoryMetadataPtr();
             /// Drop table data, don't touch metadata
-            if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
+            if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
                 database->propose(query_ptr);
-            } else {
+            else
                 table->truncate(query_ptr, metadata_snapshot, context, table_lock);
-            }
         }
         else if (query.kind == ASTDropQuery::Kind::Drop)
         {
@@ -132,12 +130,11 @@ BlockIO InterpreterDropQuery::executeToTable(
             if (database->getEngineName() != "Atomic" && database->getEngineName() != "Replicated")
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
 
-            // Prevents recursive drop from drop database query. The original query must specify a table.
-            if (!query_ptr->as<ASTDropQuery &>().table.empty() && database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
+            /// Prevents recursive drop from drop database query. The original query must specify a table.
+            if (!query_ptr->as<ASTDropQuery &>().table.empty() && database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
                 database->propose(query_ptr);
-            } else {
+            else
                 database->dropTable(context, table_id.table_name, query.no_delay);
-            }
         }
     }
 
@@ -154,7 +151,7 @@ BlockIO InterpreterDropQuery::executeToTable(
         }
     }
 
-    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+    if (database && database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
     {
         auto * database_replicated = typeid_cast<DatabaseReplicated *>(database.get());
         return database_replicated->getFeedback();
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index 3d8855b6458..65ed33bd9db 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -75,9 +75,12 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
             database_catalog.assertTableDoesntExist(StorageID(elem.to_database_name, elem.to_table_name), context);
 
         DatabasePtr database = database_catalog.getDatabase(elem.from_database_name);
-        if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
+        if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+        {
             database->propose(query_ptr);
-        } else {
+        }
+        else
+        {
             database->renameTable(
                 context,
                 elem.from_table_name,
@@ -88,7 +91,8 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
         }
 
         // TODO it can't work
-        if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY) {
+        if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+        {
             auto * database_replicated = typeid_cast<DatabaseReplicated *>(database.get());
             return database_replicated->getFeedback();
         }

From cd14f095abe7f355353054172533d1f097d6105e Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Mon, 26 Oct 2020 18:12:16 +0300
Subject: [PATCH 0053/1238] fix tests

---
 src/Databases/DatabaseReplicated.cpp          |   9 +-
 src/Databases/DatabaseReplicated.h            |   2 +-
 src/Interpreters/InterpreterCreateQuery.cpp   |   2 +-
 tests/integration/helpers/test_tools.py       |  10 +-
 .../test_replicated_database/__init__.py      |   0
 .../test_replicated_database/test.py          | 143 ++++++++++--------
 6 files changed, 95 insertions(+), 71 deletions(-)
 create mode 100644 tests/integration/test_replicated_database/__init__.py

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 328f5476064..7fb7be61d35 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -136,7 +136,7 @@ void DatabaseReplicated::createDatabaseZKNodes()
     current_zookeeper->createIfNotExists(zookeeper_path + "/replicas", String());
 }
 
-void DatabaseReplicated::RemoveOutdatedSnapshotsAndLog()
+void DatabaseReplicated::removeOutdatedSnapshotsAndLog()
 {
     /// This method removes all snapshots and logged queries
     /// that no longer will be in use by current replicas or
@@ -180,7 +180,7 @@ void DatabaseReplicated::RemoveOutdatedSnapshotsAndLog()
 
 void DatabaseReplicated::runBackgroundLogExecutor()
 {
-    if (last_executed_log_entry == "")
+    if (last_executed_log_entry.empty())
     {
         loadMetadataFromSnapshot();
     }
@@ -274,7 +274,8 @@ BlockIO DatabaseReplicated::getFeedback()
 
     Stopwatch watch;
 
-    NamesAndTypes block_structure = {
+    NamesAndTypes block_structure =
+    {
         {"replica_name", std::make_shared<DataTypeString>()},
         {"execution_feedback", std::make_shared<DataTypeString>()},
     };
@@ -334,7 +335,7 @@ void DatabaseReplicated::createSnapshot()
     }
     current_zookeeper->create(snapshot_path + "/.completed", String(), zkutil::CreateMode::Persistent);
 
-    RemoveOutdatedSnapshotsAndLog();
+    removeOutdatedSnapshotsAndLog();
 }
 
 void DatabaseReplicated::loadMetadataFromSnapshot()
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 4b647915079..62997e953ac 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -57,7 +57,7 @@ private:
 
     void loadMetadataFromSnapshot();
     void createSnapshot();
-    void RemoveOutdatedSnapshotsAndLog();
+    void removeOutdatedSnapshotsAndLog();
 
     std::unique_ptr<Context> current_context; // to run executeQuery
 
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 5210230859c..0f7d441c0d6 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -141,7 +141,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
         throw Exception("Unknown database engine: " + ostr.str(), ErrorCodes::UNKNOWN_DATABASE_ENGINE);
     }
 
-    if (create.storage->engine->name == "Atomic")
+    if (create.storage->engine->name == "Atomic" || create.storage->engine->name == "Replicated")
     {
         if (create.attach && create.uuid == UUIDHelpers::Nil)
             throw Exception("UUID must be specified for ATTACH", ErrorCodes::INCORRECT_QUERY);
diff --git a/tests/integration/helpers/test_tools.py b/tests/integration/helpers/test_tools.py
index 75ae8f67f7a..639b47a7179 100644
--- a/tests/integration/helpers/test_tools.py
+++ b/tests/integration/helpers/test_tools.py
@@ -44,20 +44,20 @@ class TSV:
 
 
 def assert_eq_with_retry(instance, query, expectation, retry_count=20, sleep_time=0.5, stdin=None, timeout=None,
-                         settings=None, user=None, ignore_error=False):
+                         settings=None, user=None, ignore_error=False, get_result=lambda x: x):
     expectation_tsv = TSV(expectation)
     for i in range(retry_count):
         try:
-            if TSV(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings,
-                                  ignore_error=ignore_error)) == expectation_tsv:
+            if TSV(get_result(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings,
+                                  ignore_error=ignore_error))) == expectation_tsv:
                 break
             time.sleep(sleep_time)
         except Exception as ex:
             print(("assert_eq_with_retry retry {} exception {}".format(i + 1, ex)))
             time.sleep(sleep_time)
     else:
-        val = TSV(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings,
-                                 ignore_error=ignore_error))
+        val = TSV(get_result(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings,
+                                 ignore_error=ignore_error)))
         if expectation_tsv != val:
             raise AssertionError("'{}' != '{}'\n{}".format(expectation_tsv, val, '\n'.join(
                 expectation_tsv.diff(val, n1="expectation", n2="query"))))
diff --git a/tests/integration/test_replicated_database/__init__.py b/tests/integration/test_replicated_database/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 346114cb8c4..372ac7a7c3e 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -1,20 +1,24 @@
 import time
-import logging
-
+import re
 import pytest
 
 from helpers.cluster import ClickHouseCluster
-
-logging.getLogger().setLevel(logging.INFO)
-logging.getLogger().addHandler(logging.StreamHandler())
+from helpers.test_tools import assert_eq_with_retry
 
 cluster = ClickHouseCluster(__file__)
 
-main_node = cluster.add_instance('main_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True, stay_alive=True)
-dummy_node = cluster.add_instance('dummy_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True)
-competing_node = cluster.add_instance('competing_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True)
-snapshotting_node = cluster.add_instance('snapshotting_node', main_configs=['configs/snapshot_each_query.xml'], with_zookeeper=True)
-snapshot_recovering_node = cluster.add_instance('snapshot_recovering_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True)
+main_node = cluster.add_instance('main_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 1})
+dummy_node = cluster.add_instance('dummy_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True, macros={"shard": 1, "replica": 2})
+competing_node = cluster.add_instance('competing_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True, macros={"shard": 1, "replica": 3})
+snapshotting_node = cluster.add_instance('snapshotting_node', main_configs=['configs/snapshot_each_query.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 1})
+snapshot_recovering_node = cluster.add_instance('snapshot_recovering_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 2})
+
+uuid_regex = re.compile("[0-9a-f]{8}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{12}")
+def assert_create_query(nodes, table_name, expected):
+    replace_uuid = lambda x: re.sub(uuid_regex, "uuid", x)
+    query = "show create table testdb.{}".format(table_name)
+    for node in nodes:
+        assert_eq_with_retry(node, query, expected, get_result=replace_uuid)
 
 @pytest.fixture(scope="module")
 def started_cluster():
@@ -27,17 +31,25 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
+#TODO better tests
 
 def test_create_replicated_table(started_cluster):
-    DURATION_SECONDS = 1
-    main_node.query("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree(d, k, 8192);")
+    #FIXME should fail (replicated with old syntax)
+    #main_node.query("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree(d, k, 8192);")
+    main_node.query("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);")
 
-    time.sleep(DURATION_SECONDS)
-    assert main_node.query("desc table testdb.replicated_table") == dummy_node.query("desc table testdb.replicated_table")
+    expected = "CREATE TABLE testdb.replicated_table\\n(\\n    `d` Date,\\n    `k` UInt64,\\n    `i32` Int32\\n)\\n" \
+               "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')\\n" \
+               "PARTITION BY toYYYYMM(d)\\nORDER BY k\\nSETTINGS index_granularity = 8192"
+    assert_create_query([main_node, dummy_node], "replicated_table", expected)
+    # assert without replacing uuid
+    assert main_node.query("show create testdb.replicated_table") == dummy_node.query("show create testdb.replicated_table")
 
 def test_simple_alter_table(started_cluster):
-    DURATION_SECONDS = 1
-    main_node.query("CREATE TABLE testdb.alter_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
+    #TODO add test with ReplicatedMergeTree
+    main_node.query("CREATE TABLE testdb.alter_test "
+                    "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
+                    "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
     main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added0 UInt32;")
     main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added2 UInt32;")
     main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added1 UInt32 AFTER Added0;")
@@ -45,48 +57,37 @@ def test_simple_alter_table(started_cluster):
     main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;")
     main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
 
-    time.sleep(DURATION_SECONDS)
+    expected = "CREATE TABLE testdb.alter_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+               "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n" \
+               "    `ToDrop` UInt32,\\n    `Added0` UInt32,\\n    `Added1` UInt32,\\n    `Added2` UInt32,\\n" \
+               "    `AddedNested1.A` Array(UInt32),\\n    `AddedNested1.B` Array(UInt64),\\n    `AddedNested1.C` Array(String),\\n" \
+               "    `AddedNested2.A` Array(UInt32),\\n    `AddedNested2.B` Array(UInt64)\\n)\\n" \
+               "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
 
-    schema = main_node.query("show create table testdb.alter_test")
-    fields = [
-        "`CounterID`",
-        "`StartDate`",
-        "`UserID`",
-        "`VisitID`",
-        "`NestedColumn.A`",
-        "`NestedColumn.S`",
-        "`ToDrop`",
-        "`Added0`",
-        "`Added1`",
-        "`Added2`",
-        "`AddedNested1.A`",
-        "`AddedNested1.B`",
-        "`AddedNested1.C`",
-        "`AddedNested2.A`",
-        "`AddedNested2.B`"]
-
-    for field in fields:
-        assert field in schema
-
-    assert main_node.query("desc table testdb.alter_test") == dummy_node.query("desc table testdb.alter_test")
+    assert_create_query([main_node, dummy_node], "alter_test", expected)
 
 def test_create_replica_after_delay(started_cluster):
     competing_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica3');")
 
-    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added3 UInt32 ;")
-    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added4 UInt32 ;")
-    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added5 UInt32 ;")
+    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added3 UInt32;")
+    main_node.query("ALTER TABLE testdb.alter_test DROP COLUMN AddedNested1;")
+    main_node.query("ALTER TABLE testdb.alter_test RENAME COLUMN Added1 TO AddedNested1;")
 
-    time.sleep(6)
+    expected = "CREATE TABLE testdb.alter_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+               "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n" \
+               "    `ToDrop` UInt32,\\n    `Added0` UInt32,\\n    `AddedNested1` UInt32,\\n    `Added2` UInt32,\\n" \
+               "    `AddedNested2.A` Array(UInt32),\\n    `AddedNested2.B` Array(UInt64),\\n    `Added3` UInt32\\n)\\n" \
+               "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
 
-    assert competing_node.query("desc table testdb.alter_test") == main_node.query("desc table testdb.alter_test")
+    assert_create_query([main_node, dummy_node, competing_node], "alter_test", expected)
 
 def test_alters_from_different_replicas(started_cluster):
-    DURATION_SECONDS = 1
+    main_node.query("CREATE TABLE testdb.concurrent_test "
+                    "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
+                    "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
 
-    main_node.query("CREATE TABLE testdb.concurrent_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
-
-    time.sleep(DURATION_SECONDS)
+    time.sleep(1)   #FIXME
+    dummy_node.kill_clickhouse(stop_start_wait_sec=0)
 
     competing_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added0 UInt32;")
     main_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added2 UInt32;")
@@ -95,31 +96,53 @@ def test_alters_from_different_replicas(started_cluster):
     competing_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;")
     main_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
 
-    time.sleep(DURATION_SECONDS)
+    expected = "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+               "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32,\\n" \
+               "    `Added0` UInt32,\\n    `Added1` UInt32,\\n    `Added2` UInt32,\\n    `AddedNested1.A` Array(UInt32),\\n" \
+               "    `AddedNested1.B` Array(UInt64),\\n    `AddedNested1.C` Array(String),\\n    `AddedNested2.A` Array(UInt32),\\n" \
+               "    `AddedNested2.B` Array(UInt64)\\n)\\n" \
+               "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
 
-    assert competing_node.query("desc table testdb.concurrent_test") == main_node.query("desc table testdb.concurrent_test")
+    assert_create_query([main_node, competing_node], "concurrent_test", expected)
 
 def test_drop_and_create_table(started_cluster):
     main_node.query("DROP TABLE testdb.concurrent_test")
-    main_node.query("CREATE TABLE testdb.concurrent_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
-    time.sleep(5)
-    assert competing_node.query("desc table testdb.concurrent_test") == main_node.query("desc table testdb.concurrent_test")
+    main_node.query("CREATE TABLE testdb.concurrent_test "
+                    "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
+                    "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
+
+    expected = "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+               "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n" \
+               "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
+
+    assert_create_query([main_node, competing_node], "concurrent_test", expected)
 
 def test_replica_restart(started_cluster):
     main_node.restart_clickhouse()
-    time.sleep(5)
-    assert competing_node.query("desc table testdb.concurrent_test") == main_node.query("desc table testdb.concurrent_test")
+
+    expected = "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+               "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n" \
+               "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
+
+    assert_create_query([main_node, competing_node], "concurrent_test", expected)
 
 def test_snapshot_and_snapshot_recover(started_cluster):
+    #FIXME bad test
     snapshotting_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica4');")
     time.sleep(5)
     snapshot_recovering_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica5');")
     time.sleep(5)
     assert snapshotting_node.query("desc table testdb.alter_test") == snapshot_recovering_node.query("desc table testdb.alter_test")
 
-#def test_drop_and_create_replica(started_cluster):
-#    main_node.query("DROP DATABASE testdb")
-#    main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica1');")
-#    time.sleep(6)
-#    assert competing_node.query("desc table testdb.concurrent_test") == main_node.query("desc table testdb.concurrent_test")
+def test_drop_and_create_replica(started_cluster):
+    main_node.query("DROP DATABASE testdb")
+    main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica1');")
+
+    expected = "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+               "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n" \
+               "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
+
+    assert_create_query([main_node, competing_node], "concurrent_test", expected)
+
+#TODO tests with Distributed
 

From d8ae9fcdb4aea22a83d6fc917ec9d070d2780470 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Tue, 27 Oct 2020 12:19:45 +0300
Subject: [PATCH 0054/1238] fixes, add shard name

---
 src/Common/ZooKeeper/ZooKeeper.cpp   | 17 --------------
 src/Common/ZooKeeper/ZooKeeper.h     |  5 -----
 src/Databases/DatabaseFactory.cpp    | 12 +++++-----
 src/Databases/DatabaseReplicated.cpp | 33 +++++++++++++++++++++-------
 src/Databases/DatabaseReplicated.h   | 14 +++++++-----
 src/Databases/IDatabase.h            | 20 ++++++++---------
 src/Interpreters/DDLWorker.cpp       |  1 +
 7 files changed, 52 insertions(+), 50 deletions(-)

diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index f4174faf057..bee875d1c74 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -588,23 +588,6 @@ void ZooKeeper::removeChildren(const std::string & path)
 }
 
 
-void ZooKeeper::tryRemoveChildren(const std::string & path)
-{
-    Strings children;
-    if (tryGetChildren(path, children) != Coordination::Error::ZOK)
-        return;
-    while (!children.empty())
-    {
-        Coordination::Requests ops;
-        for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i)
-        {
-            ops.emplace_back(makeRemoveRequest(path + "/" + children.back(), -1));
-            children.pop_back();
-        }
-        multi(ops);
-    }
-}
-
 void ZooKeeper::removeChildrenRecursive(const std::string & path)
 {
     Strings children = getChildren(path);
diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index bbe3787197a..1ad744102c6 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -189,11 +189,6 @@ public:
     /// Remove all children nodes (non recursive).
     void removeChildren(const std::string & path);
 
-    /// Remove all children nodes (non recursive).
-    /// If there're no children for the given path,
-    /// this method does not throw an exception.
-    void tryRemoveChildren(const std::string & path);
-
     using WaitCondition = std::function<bool()>;
 
     /// Wait for the node to disappear or return immediately if it doesn't exist.
diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp
index 5afa0b216ac..7758fe0bddc 100644
--- a/src/Databases/DatabaseFactory.cpp
+++ b/src/Databases/DatabaseFactory.cpp
@@ -169,15 +169,17 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
     {
         const ASTFunction * engine = engine_define->engine;
 
-        if (!engine->arguments || engine->arguments->children.size() != 2)
-            throw Exception("Replicated database requires zoo_path and replica_name arguments", ErrorCodes::BAD_ARGUMENTS);
+        if (!engine->arguments || engine->arguments->children.size() != 3)
+            throw Exception("Replicated database requires 3 arguments: zookeeper path, shard name and replica name", ErrorCodes::BAD_ARGUMENTS);
 
         const auto & arguments = engine->arguments->children;
 
-        const auto & zoo_path = safeGetLiteralValue<String>(arguments[0], "Replicated");
-        const auto & replica_name  = safeGetLiteralValue<String>(arguments[1], "Replicated");
+        //TODO allow macros in arguments
+        const auto & zookeeper_path = safeGetLiteralValue<String>(arguments[0], "Replicated");
+        const auto & shard_name  = safeGetLiteralValue<String>(arguments[1], "Replicated");
+        const auto & replica_name  = safeGetLiteralValue<String>(arguments[2], "Replicated");
 
-        return std::make_shared<DatabaseReplicated>(database_name, metadata_path, uuid, zoo_path, replica_name, context);
+        return std::make_shared<DatabaseReplicated>(database_name, metadata_path, uuid, zookeeper_path, shard_name, replica_name, context);
     }
 
     throw Exception("Unknown database engine: " + engine_name, ErrorCodes::UNKNOWN_DATABASE_ENGINE);
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 7fb7be61d35..145b3abba00 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -24,6 +24,7 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+//FIXME never used
 void DatabaseReplicated::setZooKeeper(zkutil::ZooKeeperPtr zookeeper)
 {
     std::lock_guard lock(current_zookeeper_mutex);
@@ -50,16 +51,16 @@ DatabaseReplicated::DatabaseReplicated(
     const String & metadata_path_,
     UUID uuid,
     const String & zookeeper_path_,
+    const String & shard_name_,
     const String & replica_name_,
     Context & context_)
     : DatabaseAtomic(name_, metadata_path_, uuid, "DatabaseReplicated (" + name_ + ")", context_)
     , zookeeper_path(zookeeper_path_)
+    , shard_name(shard_name_)
     , replica_name(replica_name_)
 {
-    if (zookeeper_path.empty() || replica_name.empty())
-    {
-        throw Exception("ZooKeeper path and replica name must be non-empty", ErrorCodes::BAD_ARGUMENTS);
-    }
+    if (zookeeper_path.empty() || shard_name.empty() || replica_name.empty())
+        throw Exception("ZooKeeper path and shard and replica names must be non-empty", ErrorCodes::BAD_ARGUMENTS);
 
     if (zookeeper_path.back() == '/')
         zookeeper_path.resize(zookeeper_path.size() - 1);
@@ -79,10 +80,12 @@ DatabaseReplicated::DatabaseReplicated(
     /// New database
     if (!current_zookeeper->exists(zookeeper_path))
     {
-        createDatabaseZKNodes();
-        /// Old replica recovery
+        createDatabaseZooKeeperNodes();
     }
-    else if (current_zookeeper->exists(zookeeper_path + "/replicas/" + replica_name))
+
+    /// Attach existing replica
+    //TODO better protection from wrong replica names
+    if (current_zookeeper->exists(zookeeper_path + "/replicas/" + replica_name))
     {
         String remote_last_entry = current_zookeeper->get(zookeeper_path + "/replicas/" + replica_name, {}, nullptr);
 
@@ -106,17 +109,23 @@ DatabaseReplicated::DatabaseReplicated(
         }
         else
         {
+            //FIXME
             throw Exception(
                 "Replica name might be in use by a different node. Please check replica_name parameter. Remove .last_entry file from "
                 "metadata to create a new replica.",
                 ErrorCodes::LOGICAL_ERROR);
         }
     }
+    else
+    {
+        createReplicaZooKeeperNodes();
+    }
 
     snapshot_period = context_.getConfigRef().getInt("database_replicated_snapshot_period", 10);
     feedback_timeout = context_.getConfigRef().getInt("database_replicated_feedback_timeout", 0);
     LOG_DEBUG(log, "Snapshot period is set to {} log entries per one snapshot", snapshot_period);
 
+    //TODO do we need separate pool?
     background_log_executor = context_.getReplicatedSchedulePool().createTask(
         database_name + "(DatabaseReplicated::background_executor)", [this] { runBackgroundLogExecutor(); }
     );
@@ -124,7 +133,7 @@ DatabaseReplicated::DatabaseReplicated(
     background_log_executor->scheduleAfter(500);
 }
 
-void DatabaseReplicated::createDatabaseZKNodes()
+void DatabaseReplicated::createDatabaseZooKeeperNodes()
 {
     current_zookeeper = getZooKeeper();
 
@@ -136,6 +145,11 @@ void DatabaseReplicated::createDatabaseZKNodes()
     current_zookeeper->createIfNotExists(zookeeper_path + "/replicas", String());
 }
 
+void DatabaseReplicated::createReplicaZooKeeperNodes()
+{
+    current_zookeeper->create(zookeeper_path + "/replicas/" + replica_name, "", zkutil::CreateMode::Persistent);
+}
+
 void DatabaseReplicated::removeOutdatedSnapshotsAndLog()
 {
     /// This method removes all snapshots and logged queries
@@ -151,6 +165,9 @@ void DatabaseReplicated::removeOutdatedSnapshotsAndLog()
     /// to a greater one than the least advanced current replica.
     current_zookeeper = getZooKeeper();
     Strings replica_states = current_zookeeper->getChildren(zookeeper_path + "/replicas");
+    //TODO do not use log pointers to determine which entries to remove if there are staled pointers.
+    // We can just remove all entries older than previous snapshot version.
+    // Possible invariant: store all entries since last snapshot, replica becomes lost when it cannot get log entry.
     auto least_advanced = std::min_element(replica_states.begin(), replica_states.end());
     Strings snapshots = current_zookeeper->getChildren(zookeeper_path + "/snapshots");
 
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 62997e953ac..375118e7356 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -35,7 +35,9 @@ namespace DB
 class DatabaseReplicated : public DatabaseAtomic
 {
 public:
-    DatabaseReplicated(const String & name_, const String & metadata_path_, UUID uuid, const String & zookeeper_path_, const String & replica_name_, Context & context);
+    DatabaseReplicated(const String & name_, const String & metadata_path_, UUID uuid,
+                       const String & zookeeper_path_, const String & shard_name_, const String & replica_name_,
+                       Context & context);
 
     void drop(const Context & /*context*/) override;
 
@@ -45,11 +47,9 @@ public:
 
     BlockIO getFeedback();
 
-    String zookeeper_path;
-    String replica_name;
-
 private:
-    void createDatabaseZKNodes();
+    void createDatabaseZooKeeperNodes();
+    void createReplicaZooKeeperNodes();
 
     void runBackgroundLogExecutor();
     void executeLogName(const String &);
@@ -59,6 +59,10 @@ private:
     void createSnapshot();
     void removeOutdatedSnapshotsAndLog();
 
+    String zookeeper_path;
+    String shard_name;
+    String replica_name;
+
     std::unique_ptr<Context> current_context; // to run executeQuery
 
     std::mutex log_name_mutex;
diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index eeb69a97092..393e8f2d10c 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -197,7 +197,7 @@ public:
         const StoragePtr & /*table*/,
         const ASTPtr & /*query*/)
     {
-        throw Exception("There is no CREATE TABLE query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no CREATE TABLE query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Add the dictionary to the database. Record its presence in the metadata.
@@ -206,7 +206,7 @@ public:
         const String & /*dictionary_name*/,
         const ASTPtr & /*query*/)
     {
-        throw Exception("There is no CREATE DICTIONARY query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no CREATE DICTIONARY query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Delete the table from the database, drop table and delete the metadata.
@@ -215,7 +215,7 @@ public:
         const String & /*name*/,
         [[maybe_unused]] bool no_delay = false)
     {
-        throw Exception("There is no DROP TABLE query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no DROP TABLE query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Delete the dictionary from the database. Delete the metadata.
@@ -223,32 +223,32 @@ public:
         const Context & /*context*/,
         const String & /*dictionary_name*/)
     {
-        throw Exception("There is no DROP DICTIONARY query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no DROP DICTIONARY query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Add a table to the database, but do not add it to the metadata. The database may not support this method.
     virtual void attachTable(const String & /*name*/, const StoragePtr & /*table*/, [[maybe_unused]] const String & relative_table_path = {})
     {
-        throw Exception("There is no ATTACH TABLE query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no ATTACH TABLE query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Add dictionary to the database, but do not add it to the metadata. The database may not support this method.
     /// If dictionaries_lazy_load is false it also starts loading the dictionary asynchronously.
     virtual void attachDictionary(const String & /* dictionary_name */, const DictionaryAttachInfo & /* attach_info */)
     {
-        throw Exception("There is no ATTACH DICTIONARY query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no ATTACH DICTIONARY query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Forget about the table without deleting it, and return it. The database may not support this method.
     virtual StoragePtr detachTable(const String & /*name*/)
     {
-        throw Exception("There is no DETACH TABLE query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no DETACH TABLE query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Forget about the dictionary without deleting it. The database may not support this method.
     virtual void detachDictionary(const String & /*name*/)
     {
-        throw Exception("There is no DETACH DICTIONARY query for Database " + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("There is no DETACH DICTIONARY query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
     /// Rename the table and possibly move the table to another database.
@@ -352,14 +352,14 @@ protected:
     virtual ASTPtr getCreateTableQueryImpl(const String & /*name*/, const Context & /*context*/, bool throw_on_error) const
     {
         if (throw_on_error)
-            throw Exception("There is no SHOW CREATE TABLE query for Database " + getEngineName(), ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY);
+            throw Exception("There is no SHOW CREATE TABLE query for Database" + getEngineName(), ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY);
         return nullptr;
     }
 
     virtual ASTPtr getCreateDictionaryQueryImpl(const String & /*name*/, bool throw_on_error) const
     {
         if (throw_on_error)
-            throw Exception("There is no SHOW CREATE DICTIONARY query for Database " + getEngineName(), ErrorCodes::CANNOT_GET_CREATE_DICTIONARY_QUERY);
+            throw Exception("There is no SHOW CREATE DICTIONARY query for Database" + getEngineName(), ErrorCodes::CANNOT_GET_CREATE_DICTIONARY_QUERY);
         return nullptr;
     }
 
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 32d0e25bde5..4e2dcc98767 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -759,6 +759,7 @@ void DDLWorker::processTask(DDLTask & task)
     else if (code == Coordination::Error::ZNONODE)
     {
         /// There is no parent
+        //TODO why not to create parent before active_node?
         createStatusDirs(task.entry_path, zookeeper);
         if (Coordination::Error::ZOK != zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy))
             throw Coordination::Exception(code, active_node_path);

From cbcdee0cf9f735e9c8545f32fe73579d01bbb9a5 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Tue, 3 Nov 2020 16:47:26 +0300
Subject: [PATCH 0055/1238] split DDLWorker.cpp

---
 src/Interpreters/DDLTask.cpp                  |  81 +++
 src/Interpreters/DDLTask.h                    |  88 ++++
 src/Interpreters/DDLWorker.cpp                | 479 +-----------------
 src/Interpreters/DDLWorker.h                  |  22 +-
 src/Interpreters/InterpreterAlterQuery.cpp    |   2 +-
 src/Interpreters/InterpreterCreateQuery.cpp   |   3 +-
 .../InterpreterCreateQuotaQuery.cpp           |   2 +-
 .../InterpreterCreateRoleQuery.cpp            |   2 +-
 .../InterpreterCreateRowPolicyQuery.cpp       |   2 +-
 .../InterpreterCreateSettingsProfileQuery.cpp |   2 +-
 .../InterpreterCreateUserQuery.cpp            |   2 +-
 .../InterpreterDropAccessEntityQuery.cpp      |   2 +-
 src/Interpreters/InterpreterDropQuery.cpp     |   2 +-
 src/Interpreters/InterpreterGrantQuery.cpp    |   2 +-
 .../InterpreterKillQueryQuery.cpp             |   2 +-
 src/Interpreters/InterpreterOptimizeQuery.cpp |   2 +-
 src/Interpreters/InterpreterRenameQuery.cpp   |   2 +-
 src/Interpreters/InterpreterSystemQuery.cpp   |   2 +-
 src/Interpreters/executeDDLQueryOnCluster.cpp | 317 ++++++++++++
 src/Interpreters/executeDDLQueryOnCluster.h   |  63 +++
 src/Interpreters/ya.make                      |   2 +
 21 files changed, 576 insertions(+), 505 deletions(-)
 create mode 100644 src/Interpreters/DDLTask.cpp
 create mode 100644 src/Interpreters/DDLTask.h
 create mode 100644 src/Interpreters/executeDDLQueryOnCluster.cpp
 create mode 100644 src/Interpreters/executeDDLQueryOnCluster.h

diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
new file mode 100644
index 00000000000..dfb8f5ff746
--- /dev/null
+++ b/src/Interpreters/DDLTask.cpp
@@ -0,0 +1,81 @@
+#include <Interpreters/DDLTask.h>
+#include <Common/DNSResolver.h>
+#include <Common/isLocalAddress.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <IO/Operators.h>
+#include <IO/ReadBufferFromString.h>
+#include <Poco/Net/NetException.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int UNKNOWN_FORMAT_VERSION;
+}
+
+HostID HostID::fromString(const String & host_port_str)
+{
+    HostID res;
+    std::tie(res.host_name, res.port) = Cluster::Address::fromString(host_port_str);
+    return res;
+}
+
+bool HostID::isLocalAddress(UInt16 clickhouse_port) const
+{
+    try
+    {
+        return DB::isLocalAddress(DNSResolver::instance().resolveAddress(host_name, port), clickhouse_port);
+    }
+    catch (const Poco::Net::NetException &)
+    {
+        /// Avoid "Host not found" exceptions
+        return false;
+    }
+}
+
+
+String DDLLogEntry::toString() const
+{
+    WriteBufferFromOwnString wb;
+
+    Strings host_id_strings(hosts.size());
+    std::transform(hosts.begin(), hosts.end(), host_id_strings.begin(), HostID::applyToString);
+
+    auto version = CURRENT_VERSION;
+    wb << "version: " << version << "\n";
+    wb << "query: " << escape << query << "\n";
+    wb << "hosts: " << host_id_strings << "\n";
+    wb << "initiator: " << initiator << "\n";
+
+    return wb.str();
+}
+
+void DDLLogEntry::parse(const String & data)
+{
+    ReadBufferFromString rb(data);
+
+    int version;
+    rb >> "version: " >> version >> "\n";
+
+    if (version != CURRENT_VERSION)
+        throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}", version);
+
+    Strings host_id_strings;
+    rb >> "query: " >> escape >> query >> "\n";
+    rb >> "hosts: " >> host_id_strings >> "\n";
+
+    if (!rb.eof())
+        rb >> "initiator: " >> initiator >> "\n";
+    else
+        initiator.clear();
+
+    assertEOF(rb);
+
+    hosts.resize(host_id_strings.size());
+    std::transform(host_id_strings.begin(), host_id_strings.end(), hosts.begin(), HostID::fromString);
+}
+
+
+}
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
new file mode 100644
index 00000000000..51f09efd0bd
--- /dev/null
+++ b/src/Interpreters/DDLTask.h
@@ -0,0 +1,88 @@
+#pragma once
+#include <Core/Types.h>
+#include <Interpreters/Cluster.h>
+
+
+namespace DB
+{
+
+class ASTQueryWithOnCluster;
+
+struct HostID
+{
+    String host_name;
+    UInt16 port;
+
+    HostID() = default;
+
+    explicit HostID(const Cluster::Address & address)
+        : host_name(address.host_name), port(address.port) {}
+
+    static HostID fromString(const String & host_port_str);
+
+    String toString() const
+    {
+        return Cluster::Address::toString(host_name, port);
+    }
+
+    String readableString() const
+    {
+        return host_name + ":" + DB::toString(port);
+    }
+
+    bool isLocalAddress(UInt16 clickhouse_port) const;
+
+    static String applyToString(const HostID & host_id)
+    {
+        return host_id.toString();
+    }
+};
+
+
+struct DDLLogEntry
+{
+    String query;
+    std::vector<HostID> hosts;
+    String initiator; // optional
+
+    static constexpr int CURRENT_VERSION = 1;
+
+    String toString() const;
+
+    void parse(const String & data);
+};
+
+
+struct DDLTask
+{
+    /// Stages of task lifetime correspond ordering of these data fields:
+
+    /// Stage 1: parse entry
+    String entry_name;
+    String entry_path;
+    DDLLogEntry entry;
+
+    /// Stage 2: resolve host_id and check that
+    HostID host_id;
+    String host_id_str;
+
+    /// Stage 3.1: parse query
+    ASTPtr query;
+    ASTQueryWithOnCluster * query_on_cluster = nullptr;
+
+    /// Stage 3.2: check cluster and find the host in cluster
+    String cluster_name;
+    ClusterPtr cluster;
+    Cluster::Address address_in_cluster;
+    size_t host_shard_num;
+    size_t host_replica_num;
+
+    /// Stage 3.3: execute query
+    ExecutionStatus execution_status;
+    bool was_executed = false;
+
+    /// Stage 4: commit results to ZooKeeper
+};
+
+
+}
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 4e2dcc98767..2c454db4787 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -1,4 +1,5 @@
 #include <Interpreters/DDLWorker.h>
+#include <Interpreters/DDLTask.h>
 #include <Parsers/ASTAlterQuery.h>
 #include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTOptimizeQuery.h>
@@ -9,37 +10,21 @@
 #include <Parsers/queryToString.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
-#include <IO/Operators.h>
 #include <IO/ReadBufferFromString.h>
 #include <Storages/IStorage.h>
-#include <Storages/StorageDistributed.h>
-#include <DataStreams/IBlockInputStream.h>
 #include <Interpreters/executeQuery.h>
 #include <Interpreters/Cluster.h>
-#include <Interpreters/AddDefaultDatabaseVisitor.h>
 #include <Interpreters/Context.h>
-#include <Access/AccessRightsElement.h>
-#include <Access/ContextAccess.h>
-#include <Common/DNSResolver.h>
-#include <Common/Macros.h>
 #include <Common/setThreadName.h>
-#include <Common/Stopwatch.h>
 #include <Common/randomSeed.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/isLocalAddress.h>
-#include <Common/quoteString.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeArray.h>
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnArray.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Poco/Timestamp.h>
-#include <Poco/Net/NetException.h>
 #include <common/sleep.h>
 #include <common/getFQDNOrHostName.h>
+#include <common/logger_useful.h>
 #include <random>
 #include <pcg_random.hpp>
 
@@ -51,7 +36,6 @@ namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
     extern const int LOGICAL_ERROR;
-    extern const int UNKNOWN_FORMAT_VERSION;
     extern const int INCONSISTENT_CLUSTER_DEFINITION;
     extern const int TIMEOUT_EXCEEDED;
     extern const int UNKNOWN_TYPE_OF_QUERY;
@@ -60,141 +44,6 @@ namespace ErrorCodes
 }
 
 
-namespace
-{
-
-struct HostID
-{
-    String host_name;
-    UInt16 port;
-
-    HostID() = default;
-
-    explicit HostID(const Cluster::Address & address)
-    : host_name(address.host_name), port(address.port) {}
-
-    static HostID fromString(const String & host_port_str)
-    {
-        HostID res;
-        std::tie(res.host_name, res.port) = Cluster::Address::fromString(host_port_str);
-        return res;
-    }
-
-    String toString() const
-    {
-        return Cluster::Address::toString(host_name, port);
-    }
-
-    String readableString() const
-    {
-        return host_name + ":" + DB::toString(port);
-    }
-
-    bool isLocalAddress(UInt16 clickhouse_port) const
-    {
-        try
-        {
-            return DB::isLocalAddress(DNSResolver::instance().resolveAddress(host_name, port), clickhouse_port);
-        }
-        catch (const Poco::Net::NetException &)
-        {
-            /// Avoid "Host not found" exceptions
-            return false;
-        }
-    }
-
-    static String applyToString(const HostID & host_id)
-    {
-        return host_id.toString();
-    }
-};
-
-}
-
-
-struct DDLLogEntry
-{
-    String query;
-    std::vector<HostID> hosts;
-    String initiator; // optional
-
-    static constexpr int CURRENT_VERSION = 1;
-
-    String toString()
-    {
-        WriteBufferFromOwnString wb;
-
-        Strings host_id_strings(hosts.size());
-        std::transform(hosts.begin(), hosts.end(), host_id_strings.begin(), HostID::applyToString);
-
-        auto version = CURRENT_VERSION;
-        wb << "version: " << version << "\n";
-        wb << "query: " << escape << query << "\n";
-        wb << "hosts: " << host_id_strings << "\n";
-        wb << "initiator: " << initiator << "\n";
-
-        return wb.str();
-    }
-
-    void parse(const String & data)
-    {
-        ReadBufferFromString rb(data);
-
-        int version;
-        rb >> "version: " >> version >> "\n";
-
-        if (version != CURRENT_VERSION)
-            throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}", version);
-
-        Strings host_id_strings;
-        rb >> "query: " >> escape >> query >> "\n";
-        rb >> "hosts: " >> host_id_strings >> "\n";
-
-        if (!rb.eof())
-            rb >> "initiator: " >> initiator >> "\n";
-        else
-            initiator.clear();
-
-        assertEOF(rb);
-
-        hosts.resize(host_id_strings.size());
-        std::transform(host_id_strings.begin(), host_id_strings.end(), hosts.begin(), HostID::fromString);
-    }
-};
-
-
-struct DDLTask
-{
-    /// Stages of task lifetime correspond ordering of these data fields:
-
-    /// Stage 1: parse entry
-    String entry_name;
-    String entry_path;
-    DDLLogEntry entry;
-
-    /// Stage 2: resolve host_id and check that
-    HostID host_id;
-    String host_id_str;
-
-    /// Stage 3.1: parse query
-    ASTPtr query;
-    ASTQueryWithOnCluster * query_on_cluster = nullptr;
-
-    /// Stage 3.2: check cluster and find the host in cluster
-    String cluster_name;
-    ClusterPtr cluster;
-    Cluster::Address address_in_cluster;
-    size_t host_shard_num;
-    size_t host_replica_num;
-
-    /// Stage 3.3: execute query
-    ExecutionStatus execution_status;
-    bool was_executed = false;
-
-    /// Stage 4: commit results to ZooKeeper
-};
-
-
 namespace
 {
 
@@ -293,21 +142,6 @@ std::unique_ptr<ZooKeeperLock> createSimpleZooKeeperLock(
 }
 
 
-static bool isSupportedAlterType(int type)
-{
-    static const std::unordered_set<int> unsupported_alter_types{
-        ASTAlterCommand::ATTACH_PARTITION,
-        ASTAlterCommand::REPLACE_PARTITION,
-        ASTAlterCommand::FETCH_PARTITION,
-        ASTAlterCommand::FREEZE_PARTITION,
-        ASTAlterCommand::FREEZE_ALL,
-        ASTAlterCommand::NO_TYPE,
-    };
-
-    return unsupported_alter_types.count(type) == 0;
-}
-
-
 DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix)
     : context(context_)
     , log(&Poco::Logger::get("DDLWorker"))
@@ -1187,313 +1021,4 @@ void DDLWorker::runCleanupThread()
 }
 
 
-class DDLQueryStatusInputStream : public IBlockInputStream
-{
-public:
-
-    DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context_)
-        : node_path(zk_node_path), context(context_), watch(CLOCK_MONOTONIC_COARSE), log(&Poco::Logger::get("DDLQueryStatusInputStream"))
-    {
-        sample = Block{
-            {std::make_shared<DataTypeString>(),    "host"},
-            {std::make_shared<DataTypeUInt16>(),    "port"},
-            {std::make_shared<DataTypeInt64>(),     "status"},
-            {std::make_shared<DataTypeString>(),    "error"},
-            {std::make_shared<DataTypeUInt64>(),    "num_hosts_remaining"},
-            {std::make_shared<DataTypeUInt64>(),    "num_hosts_active"},
-        };
-
-        for (const HostID & host: entry.hosts)
-            waiting_hosts.emplace(host.toString());
-
-        addTotalRowsApprox(entry.hosts.size());
-
-        timeout_seconds = context.getSettingsRef().distributed_ddl_task_timeout;
-    }
-
-    String getName() const override
-    {
-        return "DDLQueryStatusInputStream";
-    }
-
-    Block getHeader() const override { return sample; }
-
-    Block readImpl() override
-    {
-        Block res;
-        if (num_hosts_finished >= waiting_hosts.size())
-        {
-            if (first_exception)
-                throw Exception(*first_exception);
-
-            return res;
-        }
-
-        auto zookeeper = context.getZooKeeper();
-        size_t try_number = 0;
-
-        while (res.rows() == 0)
-        {
-            if (isCancelled())
-            {
-                if (first_exception)
-                    throw Exception(*first_exception);
-
-                return res;
-            }
-
-            if (timeout_seconds >= 0 && watch.elapsedSeconds() > timeout_seconds)
-            {
-                size_t num_unfinished_hosts = waiting_hosts.size() - num_hosts_finished;
-                size_t num_active_hosts = current_active_hosts.size();
-
-
-                throw Exception(ErrorCodes::TIMEOUT_EXCEEDED,
-                    "Watching task {} is executing longer than distributed_ddl_task_timeout (={}) seconds. "
-                    "There are {} unfinished hosts ({} of them are currently active), they are going to execute the query in background",
-                    node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
-            }
-
-            if (num_hosts_finished != 0 || try_number != 0)
-            {
-                sleepForMilliseconds(std::min<size_t>(1000, 50 * (try_number + 1)));
-            }
-
-            /// TODO: add shared lock
-            if (!zookeeper->exists(node_path))
-            {
-                throw Exception(ErrorCodes::UNFINISHED,
-                    "Cannot provide query execution status. The query's node {} has been deleted by the cleaner since it was finished (or its lifetime is expired)",
-                    node_path);
-            }
-
-            Strings new_hosts = getNewAndUpdate(getChildrenAllowNoNode(zookeeper, node_path + "/finished"));
-            ++try_number;
-            if (new_hosts.empty())
-                continue;
-
-            current_active_hosts = getChildrenAllowNoNode(zookeeper, node_path + "/active");
-
-            MutableColumns columns = sample.cloneEmptyColumns();
-            for (const String & host_id : new_hosts)
-            {
-                ExecutionStatus status(-1, "Cannot obtain error message");
-                {
-                    String status_data;
-                    if (zookeeper->tryGet(node_path + "/finished/" + host_id, status_data))
-                        status.tryDeserializeText(status_data);
-                }
-
-                auto [host, port] = Cluster::Address::fromString(host_id);
-
-                if (status.code != 0 && first_exception == nullptr)
-                    first_exception = std::make_unique<Exception>(status.code, "There was an error on [{}:{}]: {}", host, port, status.message);
-
-                ++num_hosts_finished;
-
-                columns[0]->insert(host);
-                columns[1]->insert(port);
-                columns[2]->insert(status.code);
-                columns[3]->insert(status.message);
-                columns[4]->insert(waiting_hosts.size() - num_hosts_finished);
-                columns[5]->insert(current_active_hosts.size());
-            }
-            res = sample.cloneWithColumns(std::move(columns));
-        }
-
-        return res;
-    }
-
-    Block getSampleBlock() const
-    {
-        return sample.cloneEmpty();
-    }
-
-    ~DDLQueryStatusInputStream() override = default;
-
-private:
-
-    static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
-    {
-        Strings res;
-        Coordination::Error code = zookeeper->tryGetChildren(node_path, res);
-        if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE)
-            throw Coordination::Exception(code, node_path);
-        return res;
-    }
-
-    Strings getNewAndUpdate(const Strings & current_list_of_finished_hosts)
-    {
-        Strings diff;
-        for (const String & host : current_list_of_finished_hosts)
-        {
-            if (!waiting_hosts.count(host))
-            {
-                if (!ignoring_hosts.count(host))
-                {
-                    ignoring_hosts.emplace(host);
-                    LOG_INFO(log, "Unexpected host {} appeared  in task {}", host, node_path);
-                }
-                continue;
-            }
-
-            if (!finished_hosts.count(host))
-            {
-                diff.emplace_back(host);
-                finished_hosts.emplace(host);
-            }
-        }
-
-        return diff;
-    }
-
-    String node_path;
-    const Context & context;
-    Stopwatch watch;
-    Poco::Logger * log;
-
-    Block sample;
-
-    NameSet waiting_hosts;  /// hosts from task host list
-    NameSet finished_hosts; /// finished hosts from host list
-    NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
-    Strings current_active_hosts; /// Hosts that were in active state at the last check
-    size_t num_hosts_finished = 0;
-
-    /// Save the first detected error and throw it at the end of execution
-    std::unique_ptr<Exception> first_exception;
-
-    Int64 timeout_seconds = 120;
-};
-
-
-BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context, AccessRightsElements && query_requires_access, bool query_requires_grant_option)
-{
-    /// Remove FORMAT <fmt> and INTO OUTFILE <file> if exists
-    ASTPtr query_ptr = query_ptr_->clone();
-    ASTQueryWithOutput::resetOutputASTIfExist(*query_ptr);
-
-    // XXX: serious design flaw since `ASTQueryWithOnCluster` is not inherited from `IAST`!
-    auto * query = dynamic_cast<ASTQueryWithOnCluster *>(query_ptr.get());
-    if (!query)
-    {
-        throw Exception("Distributed execution is not supported for such DDL queries", ErrorCodes::NOT_IMPLEMENTED);
-    }
-
-    if (!context.getSettingsRef().allow_distributed_ddl)
-        throw Exception("Distributed DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED);
-
-    if (const auto * query_alter = query_ptr->as<ASTAlterQuery>())
-    {
-        for (const auto & command : query_alter->command_list->commands)
-        {
-            if (!isSupportedAlterType(command->type))
-                throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED);
-        }
-    }
-
-    query->cluster = context.getMacros()->expand(query->cluster);
-    ClusterPtr cluster = context.getCluster(query->cluster);
-    DDLWorker & ddl_worker = context.getDDLWorker();
-
-    /// Enumerate hosts which will be used to send query.
-    Cluster::AddressesWithFailover shards = cluster->getShardsAddresses();
-    std::vector<HostID> hosts;
-    for (const auto & shard : shards)
-    {
-        for (const auto & addr : shard)
-            hosts.emplace_back(addr);
-    }
-
-    if (hosts.empty())
-        throw Exception("No hosts defined to execute distributed DDL query", ErrorCodes::LOGICAL_ERROR);
-
-    /// The current database in a distributed query need to be replaced with either
-    /// the local current database or a shard's default database.
-    bool need_replace_current_database
-        = (std::find_if(
-               query_requires_access.begin(),
-               query_requires_access.end(),
-               [](const AccessRightsElement & elem) { return elem.isEmptyDatabase(); })
-           != query_requires_access.end());
-
-    bool use_local_default_database = false;
-    const String & current_database = context.getCurrentDatabase();
-
-    if (need_replace_current_database)
-    {
-        Strings shard_default_databases;
-        for (const auto & shard : shards)
-        {
-            for (const auto & addr : shard)
-            {
-                if (!addr.default_database.empty())
-                    shard_default_databases.push_back(addr.default_database);
-                else
-                    use_local_default_database = true;
-            }
-        }
-        std::sort(shard_default_databases.begin(), shard_default_databases.end());
-        shard_default_databases.erase(std::unique(shard_default_databases.begin(), shard_default_databases.end()), shard_default_databases.end());
-        assert(use_local_default_database || !shard_default_databases.empty());
-
-        if (use_local_default_database && !shard_default_databases.empty())
-            throw Exception("Mixed local default DB and shard default DB in DDL query", ErrorCodes::NOT_IMPLEMENTED);
-
-        if (use_local_default_database)
-        {
-            query_requires_access.replaceEmptyDatabase(current_database);
-        }
-        else
-        {
-            for (size_t i = 0; i != query_requires_access.size();)
-            {
-                auto & element = query_requires_access[i];
-                if (element.isEmptyDatabase())
-                {
-                    query_requires_access.insert(query_requires_access.begin() + i + 1, shard_default_databases.size() - 1, element);
-                    for (size_t j = 0; j != shard_default_databases.size(); ++j)
-                        query_requires_access[i + j].replaceEmptyDatabase(shard_default_databases[j]);
-                    i += shard_default_databases.size();
-                }
-                else
-                    ++i;
-            }
-        }
-    }
-
-    AddDefaultDatabaseVisitor visitor(current_database, !use_local_default_database);
-    visitor.visitDDL(query_ptr);
-
-    /// Check access rights, assume that all servers have the same users config
-    if (query_requires_grant_option)
-        context.getAccess()->checkGrantOption(query_requires_access);
-    else
-        context.checkAccess(query_requires_access);
-
-    DDLLogEntry entry;
-    entry.hosts = std::move(hosts);
-    entry.query = queryToString(query_ptr);
-    entry.initiator = ddl_worker.getCommonHostID();
-    String node_path = ddl_worker.enqueueQuery(entry);
-
-    BlockIO io;
-    if (context.getSettingsRef().distributed_ddl_task_timeout == 0)
-        return io;
-
-    auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, context);
-    io.in = std::move(stream);
-    return io;
-}
-
-BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const AccessRightsElements & query_requires_access, bool query_requires_grant_option)
-{
-    return executeDDLQueryOnCluster(query_ptr, context, AccessRightsElements{query_requires_access}, query_requires_grant_option);
-}
-
-BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context)
-{
-    return executeDDLQueryOnCluster(query_ptr_, context, {});
-}
-
 }
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 39cdcab709e..caa2242caf8 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -1,11 +1,9 @@
 #pragma once
 
-#include <Interpreters/Cluster.h>
-#include <DataStreams/BlockIO.h>
 #include <Common/CurrentThread.h>
 #include <Common/ThreadPool.h>
-#include <common/logger_useful.h>
-#include <Storages/IStorage.h>
+#include <Storages/IStorage_fwd.h>
+#include <Parsers/IAST_fwd.h>
 
 #include <atomic>
 #include <chrono>
@@ -18,23 +16,22 @@ namespace zkutil
     class ZooKeeper;
 }
 
+namespace Poco
+{
+    class Logger;
+    namespace Util { class AbstractConfiguration; }
+}
+
 namespace DB
 {
 
 class Context;
 class ASTAlterQuery;
-class AccessRightsElements;
 struct DDLLogEntry;
 struct DDLTask;
 using DDLTaskPtr = std::unique_ptr<DDLTask>;
 
 
-/// Pushes distributed DDL query to the queue
-BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context);
-BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const AccessRightsElements & query_requires_access, bool query_requires_grant_option = false);
-BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, AccessRightsElements && query_requires_access, bool query_requires_grant_option = false);
-
-
 class DDLWorker
 {
 public:
@@ -137,9 +134,6 @@ private:
     size_t max_tasks_in_queue = 1000;
 
     ThreadGroupStatusPtr thread_group;
-
-    friend class DDLQueryStatusInputStream;
-    friend struct DDLTask;
 };
 
 
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index e229cb120e5..013e30a3ed5 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -1,5 +1,5 @@
 #include <Interpreters/InterpreterAlterQuery.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Interpreters/MutationsInterpreter.h>
 #include <Interpreters/AddDefaultDatabaseVisitor.h>
 #include <Interpreters/Context.h>
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 0f7d441c0d6..04c5efce3e2 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -28,7 +28,8 @@
 #include <Storages/StorageInMemoryMetadata.h>
 
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
+#include <Interpreters/Cluster.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/InterpreterCreateQuery.h>
 #include <Interpreters/InterpreterSelectWithUnionQuery.h>
diff --git a/src/Interpreters/InterpreterCreateQuotaQuery.cpp b/src/Interpreters/InterpreterCreateQuotaQuery.cpp
index f45c2c9709d..ff30a2fff47 100644
--- a/src/Interpreters/InterpreterCreateQuotaQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuotaQuery.cpp
@@ -2,7 +2,7 @@
 #include <Parsers/ASTCreateQuotaQuery.h>
 #include <Parsers/ASTRolesOrUsersSet.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Access/AccessControlManager.h>
 #include <Access/AccessFlags.h>
 #include <ext/range.h>
diff --git a/src/Interpreters/InterpreterCreateRoleQuery.cpp b/src/Interpreters/InterpreterCreateRoleQuery.cpp
index 2fa04eebae1..72ad3234b95 100644
--- a/src/Interpreters/InterpreterCreateRoleQuery.cpp
+++ b/src/Interpreters/InterpreterCreateRoleQuery.cpp
@@ -1,7 +1,7 @@
 #include <Interpreters/InterpreterCreateRoleQuery.h>
 #include <Parsers/ASTCreateRoleQuery.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Access/AccessControlManager.h>
 #include <Access/Role.h>
 
diff --git a/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp b/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp
index 9dacc9d1bf4..8f1c5b061e0 100644
--- a/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp
+++ b/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp
@@ -4,7 +4,7 @@
 #include <Parsers/ASTRolesOrUsersSet.h>
 #include <Parsers/formatAST.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Access/AccessControlManager.h>
 #include <Access/AccessFlags.h>
 #include <boost/range/algorithm/sort.hpp>
diff --git a/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp b/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp
index 2d5f4d499b7..b65225db16c 100644
--- a/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp
+++ b/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp
@@ -2,7 +2,7 @@
 #include <Parsers/ASTCreateSettingsProfileQuery.h>
 #include <Parsers/ASTRolesOrUsersSet.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Access/AccessControlManager.h>
 #include <Access/SettingsProfile.h>
 #include <Access/AccessFlags.h>
diff --git a/src/Interpreters/InterpreterCreateUserQuery.cpp b/src/Interpreters/InterpreterCreateUserQuery.cpp
index 111f698beb9..c9b087de5b4 100644
--- a/src/Interpreters/InterpreterCreateUserQuery.cpp
+++ b/src/Interpreters/InterpreterCreateUserQuery.cpp
@@ -1,7 +1,7 @@
 #include <Interpreters/InterpreterCreateUserQuery.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/InterpreterSetRoleQuery.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Parsers/ASTCreateUserQuery.h>
 #include <Parsers/ASTUserNameWithHost.h>
 #include <Parsers/ASTRolesOrUsersSet.h>
diff --git a/src/Interpreters/InterpreterDropAccessEntityQuery.cpp b/src/Interpreters/InterpreterDropAccessEntityQuery.cpp
index d79d239ee12..e86f8361100 100644
--- a/src/Interpreters/InterpreterDropAccessEntityQuery.cpp
+++ b/src/Interpreters/InterpreterDropAccessEntityQuery.cpp
@@ -2,7 +2,7 @@
 #include <Parsers/ASTDropAccessEntityQuery.h>
 #include <Parsers/ASTRowPolicyName.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Access/AccessControlManager.h>
 #include <Access/AccessFlags.h>
 #include <Access/User.h>
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index 48eb20485be..0f03525f237 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -2,7 +2,7 @@
 
 #include <Databases/IDatabase.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Interpreters/InterpreterDropQuery.h>
 #include <Interpreters/ExternalDictionariesLoader.h>
 #include <Access/AccessRightsElement.h>
diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/InterpreterGrantQuery.cpp
index 6f45687a4e1..dafe4d2e18c 100644
--- a/src/Interpreters/InterpreterGrantQuery.cpp
+++ b/src/Interpreters/InterpreterGrantQuery.cpp
@@ -2,7 +2,7 @@
 #include <Parsers/ASTGrantQuery.h>
 #include <Parsers/ASTRolesOrUsersSet.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Access/AccessControlManager.h>
 #include <Access/ContextAccess.h>
 #include <Access/RolesOrUsersSet.h>
diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp
index 0f7da8f1f58..c50659c6c45 100644
--- a/src/Interpreters/InterpreterKillQueryQuery.cpp
+++ b/src/Interpreters/InterpreterKillQueryQuery.cpp
@@ -2,7 +2,7 @@
 #include <Parsers/ASTKillQueryQuery.h>
 #include <Parsers/queryToString.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/executeQuery.h>
 #include <Interpreters/CancellationCode.h>
diff --git a/src/Interpreters/InterpreterOptimizeQuery.cpp b/src/Interpreters/InterpreterOptimizeQuery.cpp
index 680dd9b803b..431d5074cde 100644
--- a/src/Interpreters/InterpreterOptimizeQuery.cpp
+++ b/src/Interpreters/InterpreterOptimizeQuery.cpp
@@ -1,7 +1,7 @@
 #include <Storages/IStorage.h>
 #include <Parsers/ASTOptimizeQuery.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Interpreters/InterpreterOptimizeQuery.h>
 #include <Access/AccessRightsElement.h>
 #include <Common/typeid_cast.h>
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index 65ed33bd9db..3a375e2ba60 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -3,7 +3,7 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/InterpreterRenameQuery.h>
 #include <Storages/IStorage.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Access/AccessRightsElement.h>
 #include <Common/typeid_cast.h>
 #include <Databases/DatabaseReplicated.h>
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index f0a8ce9064d..1b8c3ae79f2 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -14,7 +14,7 @@
 #include <Interpreters/InterpreterCreateQuery.h>
 #include <Interpreters/InterpreterRenameQuery.h>
 #include <Interpreters/QueryLog.h>
-#include <Interpreters/DDLWorker.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Interpreters/PartLog.h>
 #include <Interpreters/QueryThreadLog.h>
 #include <Interpreters/TraceLog.h>
diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
new file mode 100644
index 00000000000..6da1704ce55
--- /dev/null
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -0,0 +1,317 @@
+#include <Interpreters/executeDDLQueryOnCluster.h>
+#include <Interpreters/DDLWorker.h>
+#include <Interpreters/DDLTask.h>
+#include <Interpreters/AddDefaultDatabaseVisitor.h>
+#include <Parsers/ASTQueryWithOutput.h>
+#include <Parsers/ASTQueryWithOnCluster.h>
+#include <Parsers/ASTAlterQuery.h>
+#include <Parsers/queryToString.h>
+#include <Access/AccessRightsElement.h>
+#include <Access/ContextAccess.h>
+#include <Common/Macros.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeString.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+    extern const int TIMEOUT_EXCEEDED;
+    extern const int UNFINISHED;
+    extern const int QUERY_IS_PROHIBITED;
+}
+
+static bool isSupportedAlterType(int type)
+{
+    static const std::unordered_set<int> unsupported_alter_types{
+        ASTAlterCommand::ATTACH_PARTITION,
+        ASTAlterCommand::REPLACE_PARTITION,
+        ASTAlterCommand::FETCH_PARTITION,
+        ASTAlterCommand::FREEZE_PARTITION,
+        ASTAlterCommand::FREEZE_ALL,
+        ASTAlterCommand::NO_TYPE,
+    };
+
+    return unsupported_alter_types.count(type) == 0;
+}
+
+
+BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context)
+{
+    return executeDDLQueryOnCluster(query_ptr_, context, {});
+}
+
+BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const AccessRightsElements & query_requires_access, bool query_requires_grant_option)
+{
+    return executeDDLQueryOnCluster(query_ptr, context, AccessRightsElements{query_requires_access}, query_requires_grant_option);
+}
+
+BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context, AccessRightsElements && query_requires_access, bool query_requires_grant_option)
+{
+    /// Remove FORMAT <fmt> and INTO OUTFILE <file> if exists
+    ASTPtr query_ptr = query_ptr_->clone();
+    ASTQueryWithOutput::resetOutputASTIfExist(*query_ptr);
+
+    // XXX: serious design flaw since `ASTQueryWithOnCluster` is not inherited from `IAST`!
+    auto * query = dynamic_cast<ASTQueryWithOnCluster *>(query_ptr.get());
+    if (!query)
+    {
+        throw Exception("Distributed execution is not supported for such DDL queries", ErrorCodes::NOT_IMPLEMENTED);
+    }
+
+    if (!context.getSettingsRef().allow_distributed_ddl)
+        throw Exception("Distributed DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED);
+
+    if (const auto * query_alter = query_ptr->as<ASTAlterQuery>())
+    {
+        for (const auto & command : query_alter->command_list->commands)
+        {
+            if (!isSupportedAlterType(command->type))
+                throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED);
+        }
+    }
+
+    query->cluster = context.getMacros()->expand(query->cluster);
+    ClusterPtr cluster = context.getCluster(query->cluster);
+    DDLWorker & ddl_worker = context.getDDLWorker();
+
+    /// Enumerate hosts which will be used to send query.
+    Cluster::AddressesWithFailover shards = cluster->getShardsAddresses();
+    std::vector<HostID> hosts;
+    for (const auto & shard : shards)
+    {
+        for (const auto & addr : shard)
+            hosts.emplace_back(addr);
+    }
+
+    if (hosts.empty())
+        throw Exception("No hosts defined to execute distributed DDL query", ErrorCodes::LOGICAL_ERROR);
+
+    /// The current database in a distributed query need to be replaced with either
+    /// the local current database or a shard's default database.
+    bool need_replace_current_database
+        = (std::find_if(
+            query_requires_access.begin(),
+            query_requires_access.end(),
+            [](const AccessRightsElement & elem) { return elem.isEmptyDatabase(); })
+           != query_requires_access.end());
+
+    bool use_local_default_database = false;
+    const String & current_database = context.getCurrentDatabase();
+
+    if (need_replace_current_database)
+    {
+        Strings shard_default_databases;
+        for (const auto & shard : shards)
+        {
+            for (const auto & addr : shard)
+            {
+                if (!addr.default_database.empty())
+                    shard_default_databases.push_back(addr.default_database);
+                else
+                    use_local_default_database = true;
+            }
+        }
+        std::sort(shard_default_databases.begin(), shard_default_databases.end());
+        shard_default_databases.erase(std::unique(shard_default_databases.begin(), shard_default_databases.end()), shard_default_databases.end());
+        assert(use_local_default_database || !shard_default_databases.empty());
+
+        if (use_local_default_database && !shard_default_databases.empty())
+            throw Exception("Mixed local default DB and shard default DB in DDL query", ErrorCodes::NOT_IMPLEMENTED);
+
+        if (use_local_default_database)
+        {
+            query_requires_access.replaceEmptyDatabase(current_database);
+        }
+        else
+        {
+            for (size_t i = 0; i != query_requires_access.size();)
+            {
+                auto & element = query_requires_access[i];
+                if (element.isEmptyDatabase())
+                {
+                    query_requires_access.insert(query_requires_access.begin() + i + 1, shard_default_databases.size() - 1, element);
+                    for (size_t j = 0; j != shard_default_databases.size(); ++j)
+                        query_requires_access[i + j].replaceEmptyDatabase(shard_default_databases[j]);
+                    i += shard_default_databases.size();
+                }
+                else
+                    ++i;
+            }
+        }
+    }
+
+    AddDefaultDatabaseVisitor visitor(current_database, !use_local_default_database);
+    visitor.visitDDL(query_ptr);
+
+    /// Check access rights, assume that all servers have the same users config
+    if (query_requires_grant_option)
+        context.getAccess()->checkGrantOption(query_requires_access);
+    else
+        context.checkAccess(query_requires_access);
+
+    DDLLogEntry entry;
+    entry.hosts = std::move(hosts);
+    entry.query = queryToString(query_ptr);
+    entry.initiator = ddl_worker.getCommonHostID();
+    String node_path = ddl_worker.enqueueQuery(entry);
+
+    BlockIO io;
+    if (context.getSettingsRef().distributed_ddl_task_timeout == 0)
+        return io;
+
+    auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, context);
+    io.in = std::move(stream);
+    return io;
+}
+
+
+DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context_)
+    : node_path(zk_node_path)
+    , context(context_)
+    , watch(CLOCK_MONOTONIC_COARSE)
+    , log(&Poco::Logger::get("DDLQueryStatusInputStream"))
+{
+    sample = Block{
+        {std::make_shared<DataTypeString>(),    "host"},
+        {std::make_shared<DataTypeUInt16>(),    "port"},
+        {std::make_shared<DataTypeInt64>(),     "status"},
+        {std::make_shared<DataTypeString>(),    "error"},
+        {std::make_shared<DataTypeUInt64>(),    "num_hosts_remaining"},
+        {std::make_shared<DataTypeUInt64>(),    "num_hosts_active"},
+    };
+
+    for (const HostID & host: entry.hosts)
+        waiting_hosts.emplace(host.toString());
+
+    addTotalRowsApprox(entry.hosts.size());
+
+    timeout_seconds = context.getSettingsRef().distributed_ddl_task_timeout;
+}
+
+Block DDLQueryStatusInputStream::readImpl()
+{
+    Block res;
+    if (num_hosts_finished >= waiting_hosts.size())
+    {
+        if (first_exception)
+            throw Exception(*first_exception);
+
+        return res;
+    }
+
+    auto zookeeper = context.getZooKeeper();
+    size_t try_number = 0;
+
+    while (res.rows() == 0)
+    {
+        if (isCancelled())
+        {
+            if (first_exception)
+                throw Exception(*first_exception);
+
+            return res;
+        }
+
+        if (timeout_seconds >= 0 && watch.elapsedSeconds() > timeout_seconds)
+        {
+            size_t num_unfinished_hosts = waiting_hosts.size() - num_hosts_finished;
+            size_t num_active_hosts = current_active_hosts.size();
+
+
+            throw Exception(ErrorCodes::TIMEOUT_EXCEEDED,
+                            "Watching task {} is executing longer than distributed_ddl_task_timeout (={}) seconds. "
+                            "There are {} unfinished hosts ({} of them are currently active), they are going to execute the query in background",
+                            node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
+        }
+
+        if (num_hosts_finished != 0 || try_number != 0)
+        {
+            sleepForMilliseconds(std::min<size_t>(1000, 50 * (try_number + 1)));
+        }
+
+        /// TODO: add shared lock
+        if (!zookeeper->exists(node_path))
+        {
+            throw Exception(ErrorCodes::UNFINISHED,
+                            "Cannot provide query execution status. The query's node {} has been deleted by the cleaner since it was finished (or its lifetime is expired)",
+                            node_path);
+        }
+
+        Strings new_hosts = getNewAndUpdate(getChildrenAllowNoNode(zookeeper, node_path + "/finished"));
+        ++try_number;
+        if (new_hosts.empty())
+            continue;
+
+        current_active_hosts = getChildrenAllowNoNode(zookeeper, node_path + "/active");
+
+        MutableColumns columns = sample.cloneEmptyColumns();
+        for (const String & host_id : new_hosts)
+        {
+            ExecutionStatus status(-1, "Cannot obtain error message");
+            {
+                String status_data;
+                if (zookeeper->tryGet(node_path + "/finished/" + host_id, status_data))
+                    status.tryDeserializeText(status_data);
+            }
+
+            auto [host, port] = Cluster::Address::fromString(host_id);
+
+            if (status.code != 0 && first_exception == nullptr)
+                first_exception = std::make_unique<Exception>(status.code, "There was an error on [{}:{}]: {}", host, port, status.message);
+
+            ++num_hosts_finished;
+
+            columns[0]->insert(host);
+            columns[1]->insert(port);
+            columns[2]->insert(status.code);
+            columns[3]->insert(status.message);
+            columns[4]->insert(waiting_hosts.size() - num_hosts_finished);
+            columns[5]->insert(current_active_hosts.size());
+        }
+        res = sample.cloneWithColumns(std::move(columns));
+    }
+
+    return res;
+}
+
+Strings DDLQueryStatusInputStream::getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
+{
+    Strings res;
+    Coordination::Error code = zookeeper->tryGetChildren(node_path, res);
+    if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE)
+        throw Coordination::Exception(code, node_path);
+    return res;
+}
+
+Strings DDLQueryStatusInputStream::getNewAndUpdate(const Strings & current_list_of_finished_hosts)
+{
+    Strings diff;
+    for (const String & host : current_list_of_finished_hosts)
+    {
+        if (!waiting_hosts.count(host))
+        {
+            if (!ignoring_hosts.count(host))
+            {
+                ignoring_hosts.emplace(host);
+                LOG_INFO(log, "Unexpected host {} appeared  in task {}", host, node_path);
+            }
+            continue;
+        }
+
+        if (!finished_hosts.count(host))
+        {
+            diff.emplace_back(host);
+            finished_hosts.emplace(host);
+        }
+    }
+
+    return diff;
+}
+
+
+}
diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h
new file mode 100644
index 00000000000..83880cc94c1
--- /dev/null
+++ b/src/Interpreters/executeDDLQueryOnCluster.h
@@ -0,0 +1,63 @@
+#pragma once
+#include <DataStreams/BlockIO.h>
+#include <Parsers/IAST_fwd.h>
+
+namespace zkutil
+{
+    class ZooKeeper;
+}
+
+namespace DB
+{
+
+class Context;
+class AccessRightsElements;
+struct DDLLogEntry;
+
+
+/// Pushes distributed DDL query to the queue.
+/// Returns DDLQueryStatusInputStream, which reads results of query execution on each host in the cluster.
+BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context);
+BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const AccessRightsElements & query_requires_access, bool query_requires_grant_option = false);
+BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, AccessRightsElements && query_requires_access, bool query_requires_grant_option = false);
+
+
+class DDLQueryStatusInputStream : public IBlockInputStream
+{
+public:
+    DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context_);
+
+    String getName() const override { return "DDLQueryStatusInputStream"; }
+
+    Block getHeader() const override { return sample; }
+
+    Block getSampleBlock() const { return sample.cloneEmpty(); }
+
+    Block readImpl() override;
+
+private:
+
+    static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path);
+
+    Strings getNewAndUpdate(const Strings & current_list_of_finished_hosts);
+
+    String node_path;
+    const Context & context;
+    Stopwatch watch;
+    Poco::Logger * log;
+
+    Block sample;
+
+    NameSet waiting_hosts;  /// hosts from task host list
+    NameSet finished_hosts; /// finished hosts from host list
+    NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
+    Strings current_active_hosts; /// Hosts that were in active state at the last check
+    size_t num_hosts_finished = 0;
+
+    /// Save the first detected error and throw it at the end of execution
+    std::unique_ptr<Exception> first_exception;
+
+    Int64 timeout_seconds = 120;
+};
+
+}
diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make
index 4c0b64934c7..11a09c40d6a 100644
--- a/src/Interpreters/ya.make
+++ b/src/Interpreters/ya.make
@@ -45,11 +45,13 @@ SRCS(
     CrossToInnerJoinVisitor.cpp
     DatabaseAndTableWithAlias.cpp
     DatabaseCatalog.cpp
+    DDLTask.cpp
     DDLWorker.cpp
     DictionaryReader.cpp
     DNSCacheUpdater.cpp
     EmbeddedDictionaries.cpp
     evaluateConstantExpression.cpp
+    executeDDLQueryOnCluster.cpp
     executeQuery.cpp
     ExecuteScalarSubqueriesVisitor.cpp
     ExpressionActions.cpp

From 2a6c0b91802de8279a0928e853a3840d94a1413a Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Thu, 5 Nov 2020 12:52:23 +0300
Subject: [PATCH 0056/1238] try reuse DDLWorker in DatabaseReplicated

---
 src/Databases/DatabaseReplicated.cpp          | 206 +++++++++++-------
 src/Databases/DatabaseReplicated.h            |  16 +-
 src/Databases/IDatabase.h                     |   6 -
 src/Interpreters/DDLWorker.cpp                |  36 ++-
 src/Interpreters/DDLWorker.h                  |  10 +-
 src/Interpreters/InterpreterAlterQuery.cpp    |   8 +-
 src/Interpreters/InterpreterCreateQuery.cpp   |  29 ++-
 src/Interpreters/InterpreterDropQuery.cpp     |  16 +-
 src/Interpreters/InterpreterRenameQuery.cpp   |  11 +-
 src/Interpreters/executeDDLQueryOnCluster.cpp |  18 +-
 src/Interpreters/executeDDLQueryOnCluster.h   |   5 +-
 .../test_replicated_database/test.py          |  12 +-
 12 files changed, 224 insertions(+), 149 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 145b3abba00..1213b5bc075 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -13,7 +13,10 @@
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/ZooKeeper/Types.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
-
+#include <Interpreters/DDLWorker.h>
+#include <Interpreters/DDLTask.h>
+#include <Interpreters/executeDDLQueryOnCluster.h>
+#include <Parsers/ASTAlterQuery.h>
 
 namespace DB
 {
@@ -45,6 +48,7 @@ zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
     return res;
 }
 
+DatabaseReplicated::~DatabaseReplicated() = default;
 
 DatabaseReplicated::DatabaseReplicated(
     const String & name_,
@@ -125,12 +129,15 @@ DatabaseReplicated::DatabaseReplicated(
     feedback_timeout = context_.getConfigRef().getInt("database_replicated_feedback_timeout", 0);
     LOG_DEBUG(log, "Snapshot period is set to {} log entries per one snapshot", snapshot_period);
 
-    //TODO do we need separate pool?
-    background_log_executor = context_.getReplicatedSchedulePool().createTask(
-        database_name + "(DatabaseReplicated::background_executor)", [this] { runBackgroundLogExecutor(); }
-    );
+    //FIXME use database UUID
+    ddl_worker = std::make_unique<DDLWorker>(1, zookeeper_path + "/log", context_, nullptr, String{}, true, database_name, replica_name, shard_name);
 
-    background_log_executor->scheduleAfter(500);
+    //TODO do we need separate pool?
+    //background_log_executor = context_.getReplicatedSchedulePool().createTask(
+    //    database_name + "(DatabaseReplicated::background_executor)", [this] { runBackgroundLogExecutor(); }
+    //);
+
+    //background_log_executor->scheduleAfter(500);
 }
 
 void DatabaseReplicated::createDatabaseZooKeeperNodes()
@@ -226,7 +233,7 @@ void DatabaseReplicated::runBackgroundLogExecutor()
         }
     }
 
-    background_log_executor->scheduleAfter(500);
+    //background_log_executor->scheduleAfter(500);
 }
 
 void DatabaseReplicated::writeLastExecutedToDiskAndZK()
@@ -244,95 +251,128 @@ void DatabaseReplicated::writeLastExecutedToDiskAndZK()
     out.close();
 }
 
-void DatabaseReplicated::executeLogName(const String & log_entry_name)
+void DatabaseReplicated::executeLogName(const String & /*log_entry_name*/)
 {
-    String path = zookeeper_path + "/log/" + log_entry_name;
-    current_zookeeper = getZooKeeper();
-    String query_to_execute = current_zookeeper->get(path, {}, nullptr);
-
-    try
-    {
-        current_context = std::make_unique<Context>(global_context);
-        current_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
-        current_context->setCurrentDatabase(database_name);
-        current_context->setCurrentQueryId(""); // generate random query_id
-        executeQuery(query_to_execute, *current_context);
-    }
-    catch (const Exception & e)
-    {
-        tryLogCurrentException(log, "Query from zookeeper " + query_to_execute + " wasn't finished successfully");
-        current_zookeeper->create(
-            zookeeper_path + "/replicas/" + replica_name + "/errors/" + log_entry_name, e.what(), zkutil::CreateMode::Persistent);
-    }
-
-    LOG_DEBUG(log, "Executed query: {}", query_to_execute);
+//    String path = zookeeper_path + "/log/" + log_entry_name;
+//    current_zookeeper = getZooKeeper();
+//    String query_to_execute = current_zookeeper->get(path, {}, nullptr);
+//
+//    try
+//    {
+//        current_context = std::make_unique<Context>(global_context);
+//        current_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
+//        current_context->setCurrentDatabase(database_name);
+//        current_context->setCurrentQueryId(""); // generate random query_id
+//        executeQuery(query_to_execute, *current_context);
+//    }
+//    catch (const Exception & e)
+//    {
+//        tryLogCurrentException(log, "Query from zookeeper " + query_to_execute + " wasn't finished successfully");
+//        current_zookeeper->create(
+//            zookeeper_path + "/replicas/" + replica_name + "/errors/" + log_entry_name, e.what(), zkutil::CreateMode::Persistent);
+//    }
+//
+//    LOG_DEBUG(log, "Executed query: {}", query_to_execute);
 }
 
-void DatabaseReplicated::propose(const ASTPtr & query)
+BlockIO DatabaseReplicated::propose(const ASTPtr & query)
 {
-    current_zookeeper = getZooKeeper();
+    //current_zookeeper = getZooKeeper();
 
-    LOG_DEBUG(log, "Proposing query: {}", queryToString(query));
 
+    if (const auto * query_alter = query->as<ASTAlterQuery>())
     {
-        std::lock_guard lock(log_name_mutex);
-        log_name_to_exec_with_result
-            = current_zookeeper->create(zookeeper_path + "/log/log-", queryToString(query), zkutil::CreateMode::PersistentSequential);
-    }
-
-    background_log_executor->schedule();
-}
-
-BlockIO DatabaseReplicated::getFeedback()
-{
-    BlockIO res;
-    if (feedback_timeout == 0)
-        return res;
-
-    Stopwatch watch;
-
-    NamesAndTypes block_structure =
-    {
-        {"replica_name", std::make_shared<DataTypeString>()},
-        {"execution_feedback", std::make_shared<DataTypeString>()},
-    };
-    auto replica_name_column = block_structure[0].type->createColumn();
-    auto feedback_column = block_structure[1].type->createColumn();
-
-    current_zookeeper = getZooKeeper();
-    Strings replica_states = current_zookeeper->getChildren(zookeeper_path + "/replicas");
-    auto replica_iter = replica_states.begin();
-
-    while (!replica_states.empty() && watch.elapsedSeconds() < feedback_timeout)
-    {
-        String last_executed = current_zookeeper->get(zookeeper_path + "/replicas/" + *replica_iter);
-        if (last_executed > log_name_to_exec_with_result)
+        for (const auto & command : query_alter->command_list->commands)
         {
-            replica_name_column->insert(*replica_iter);
-            String err_path = zookeeper_path + "/replicas/" + *replica_iter + "/errors/" + log_name_to_exec_with_result;
-            if (!current_zookeeper->exists(err_path))
-            {
-                feedback_column->insert("OK");
-            }
-            else
-            {
-                String feedback = current_zookeeper->get(err_path, {}, nullptr);
-                feedback_column->insert(feedback);
-            }
-            replica_states.erase(replica_iter);
-            replica_iter = replica_states.begin();
+            //FIXME allow all types of queries (maybe we should execute ATTACH an similar queries on leader)
+            if (!isSupportedAlterType(command->type))
+                throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED);
         }
     }
 
-    Block block = Block({
-        {std::move(replica_name_column), block_structure[0].type, block_structure[0].name},
-        {std::move(feedback_column), block_structure[1].type, block_structure[1].name}
-    });
+    LOG_DEBUG(log, "Proposing query: {}", queryToString(query));
 
-    res.in = std::make_shared<OneBlockInputStream>(block);
-    return res;
+    DDLLogEntry entry;
+    entry.hosts = {};
+    entry.query = queryToString(query);
+    entry.initiator = ddl_worker->getCommonHostID();
+    String node_path = ddl_worker->enqueueQuery(entry);
+
+    BlockIO io;
+    //FIXME use query context
+    if (global_context.getSettingsRef().distributed_ddl_task_timeout == 0)
+        return io;
+
+    //FIXME need list of all replicas
+    Strings hosts_to_wait;
+    //TODO maybe it's better to use (shard_name + sep + replica_name) as host ID to allow use {replica} macro (may may have the same values across shards)
+    hosts_to_wait.emplace_back(replica_name);
+    auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, global_context);
+    io.in = std::move(stream);
+    return io;
+
+    //executeDDLQueryOnCluster(query, global_context);
+
+
+    //{
+    //    std::lock_guard lock(log_name_mutex);
+    //    log_name_to_exec_with_result
+    //        = current_zookeeper->create(zookeeper_path + "/log/log-", queryToString(query), zkutil::CreateMode::PersistentSequential);
+    //}
+
+    //background_log_executor->schedule();
 }
 
+//BlockIO DatabaseReplicated::getFeedback()
+//{
+//    BlockIO res;
+//    if (feedback_timeout == 0)
+//        return res;
+//
+//    Stopwatch watch;
+//
+//    NamesAndTypes block_structure =
+//    {
+//        {"replica_name", std::make_shared<DataTypeString>()},
+//        {"execution_feedback", std::make_shared<DataTypeString>()},
+//    };
+//    auto replica_name_column = block_structure[0].type->createColumn();
+//    auto feedback_column = block_structure[1].type->createColumn();
+//
+//    current_zookeeper = getZooKeeper();
+//    Strings replica_states = current_zookeeper->getChildren(zookeeper_path + "/replicas");
+//    auto replica_iter = replica_states.begin();
+//
+//    while (!replica_states.empty() && watch.elapsedSeconds() < feedback_timeout)
+//    {
+//        String last_executed = current_zookeeper->get(zookeeper_path + "/replicas/" + *replica_iter);
+//        if (last_executed > log_name_to_exec_with_result)
+//        {
+//            replica_name_column->insert(*replica_iter);
+//            String err_path = zookeeper_path + "/replicas/" + *replica_iter + "/errors/" + log_name_to_exec_with_result;
+//            if (!current_zookeeper->exists(err_path))
+//            {
+//                feedback_column->insert("OK");
+//            }
+//            else
+//            {
+//                String feedback = current_zookeeper->get(err_path, {}, nullptr);
+//                feedback_column->insert(feedback);
+//            }
+//            replica_states.erase(replica_iter);
+//            replica_iter = replica_states.begin();
+//        }
+//    }
+//
+//    Block block = Block({
+//        {std::move(replica_name_column), block_structure[0].type, block_structure[0].name},
+//        {std::move(feedback_column), block_structure[1].type, block_structure[1].name}
+//    });
+//
+//    res.in = std::make_shared<OneBlockInputStream>(block);
+//    return res;
+//}
+
 void DatabaseReplicated::createSnapshot()
 {
     current_zookeeper = getZooKeeper();
@@ -389,7 +429,7 @@ void DatabaseReplicated::loadMetadataFromSnapshot()
 
         String query_to_execute = current_zookeeper->get(path, {}, nullptr);
 
-        current_context = std::make_unique<Context>(global_context);
+        auto current_context = std::make_unique<Context>(global_context);
         current_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
         current_context->setCurrentDatabase(database_name);
         current_context->setCurrentQueryId(""); // generate random query_id
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 375118e7356..537eaad893f 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -6,10 +6,14 @@
 #include <DataStreams/BlockIO.h>
 #include <DataStreams/OneBlockInputStream.h>
 #include <Interpreters/Context.h>
+#include <DataStreams/BlockIO.h>
 
 
 namespace DB
 {
+
+class DDLWorker;
+
 /** DatabaseReplicated engine
   * supports replication of metadata
   * via DDL log being written to ZooKeeper
@@ -39,13 +43,15 @@ public:
                        const String & zookeeper_path_, const String & shard_name_, const String & replica_name_,
                        Context & context);
 
+    ~DatabaseReplicated() override;
+
     void drop(const Context & /*context*/) override;
 
     String getEngineName() const override { return "Replicated"; }
 
-    void propose(const ASTPtr & query) override;
+    BlockIO propose(const ASTPtr & query);
 
-    BlockIO getFeedback();
+    //BlockIO getFeedback();
 
 private:
     void createDatabaseZooKeeperNodes();
@@ -63,7 +69,7 @@ private:
     String shard_name;
     String replica_name;
 
-    std::unique_ptr<Context> current_context; // to run executeQuery
+    //std::unique_ptr<Context> current_context; // to run executeQuery
 
     std::mutex log_name_mutex;
     String log_name_to_exec_with_result;
@@ -73,7 +79,7 @@ private:
 
     String last_executed_log_entry = "";
 
-    BackgroundSchedulePool::TaskHolder background_log_executor;
+    //BackgroundSchedulePool::TaskHolder background_log_executor;
 
     zkutil::ZooKeeperPtr current_zookeeper;        /// Use only the methods below.
     mutable std::mutex current_zookeeper_mutex;    /// To recreate the session in the background thread.
@@ -82,6 +88,8 @@ private:
     zkutil::ZooKeeperPtr getZooKeeper() const;
     void setZooKeeper(zkutil::ZooKeeperPtr zookeeper);
 
+    std::unique_ptr<DDLWorker> ddl_worker;
+
 };
 
 }
diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index 393e8f2d10c..9b744259406 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -184,12 +184,6 @@ public:
     /// Is the database empty.
     virtual bool empty() const = 0;
 
-    /// Submit query to log. Currently used by DatabaseReplicated engine only.
-    virtual void propose(const ASTPtr & /*query*/)
-    {
-        throw Exception(getEngineName() + ": propose() is not supported", ErrorCodes::NOT_IMPLEMENTED);
-    }
-
     /// Add the table to the database. Record its presence in the metadata.
     virtual void createTable(
         const Context & /*context*/,
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 2c454db4787..b607bd084ea 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -142,12 +142,17 @@ std::unique_ptr<ZooKeeperLock> createSimpleZooKeeperLock(
 }
 
 
-DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix)
+DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix,
+                     bool is_replicated_db_, const std::optional<String> & db_name_, const std::optional<String> & db_replica_name_, const std::optional<String> & db_shard_name_)
     : context(context_)
     , log(&Poco::Logger::get("DDLWorker"))
     , pool_size(pool_size_)
     , worker_pool(pool_size_)
 {
+    is_replicated_db = is_replicated_db_;
+    db_name = db_name_;
+    db_replica_name = db_replica_name_;
+    db_shard_name = db_shard_name_;
     last_tasks.reserve(pool_size);
 
     queue_dir = zk_root_dir;
@@ -267,6 +272,15 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
         return {};
     }
 
+    if (is_replicated_db)
+    {
+        //
+        task->host_id.host_name = host_fqdn;
+        task->host_id.port = context.getTCPPort();
+        task->host_id_str = *db_replica_name;
+        return task;
+    }
+
     bool host_in_hostlist = false;
     for (const HostID & host : task->entry.hosts)
     {
@@ -390,6 +404,9 @@ void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
     if (!task.query || !(task.query_on_cluster = dynamic_cast<ASTQueryWithOnCluster *>(task.query.get())))
         throw Exception("Received unknown DDL query", ErrorCodes::UNKNOWN_TYPE_OF_QUERY);
 
+    if (is_replicated_db)
+        return;
+
     task.cluster_name = task.query_on_cluster->cluster;
     task.cluster = context.tryGetCluster(task.cluster_name);
     if (!task.cluster)
@@ -507,7 +524,14 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
     try
     {
         auto current_context = std::make_unique<Context>(context);
-        current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
+        if (is_replicated_db)
+        {
+            current_context->getClientInfo().query_kind
+                = ClientInfo::QueryKind::REPLICATED_LOG_QUERY; //FIXME why do we need separate query kind?
+            current_context->setCurrentDatabase(*db_name);
+        }
+        else
+            current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
         current_context->setCurrentQueryId(""); // generate random query_id
         executeQuery(istr, ostr, false, *current_context, {});
     }
@@ -696,7 +720,11 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
         return res;
     };
 
-    String shard_node_name = get_shard_name(task.cluster->getShardsAddresses().at(task.host_shard_num));
+    String shard_node_name;
+    if (is_replicated_db)
+        shard_node_name = *db_shard_name;
+    else
+        shard_node_name = get_shard_name(task.cluster->getShardsAddresses().at(task.host_shard_num));
     String shard_path = node_path + "/shards/" + shard_node_name;
     String is_executed_path = shard_path + "/executed";
     String tries_to_execute_path = shard_path + "/tries_to_execute";
@@ -892,7 +920,7 @@ void DDLWorker::createStatusDirs(const std::string & node_path, const ZooKeeperP
 
 String DDLWorker::enqueueQuery(DDLLogEntry & entry)
 {
-    if (entry.hosts.empty())
+    if (entry.hosts.empty() && !is_replicated_db)
         throw Exception("Empty host list in a distributed DDL task", ErrorCodes::LOGICAL_ERROR);
 
     auto zookeeper = getAndSetZooKeeper();
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index caa2242caf8..1c28100f933 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -4,6 +4,7 @@
 #include <Common/ThreadPool.h>
 #include <Storages/IStorage_fwd.h>
 #include <Parsers/IAST_fwd.h>
+#include <Interpreters/Context.h>
 
 #include <atomic>
 #include <chrono>
@@ -35,7 +36,8 @@ using DDLTaskPtr = std::unique_ptr<DDLTask>;
 class DDLWorker
 {
 public:
-    DDLWorker(int pool_size_, const std::string & zk_root_dir, Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix);
+    DDLWorker(int pool_size_, const std::string & zk_root_dir, Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix,
+              bool is_replicated_db_ = false, const std::optional<String> & db_name_ = std::nullopt, const std::optional<String> & db_replica_name_ = std::nullopt, const std::optional<String> & db_shard_name_ = std::nullopt);
     ~DDLWorker();
 
     /// Pushes query into DDL queue, returns path to created node
@@ -101,8 +103,12 @@ private:
     void attachToThreadGroup();
 
 private:
+    bool is_replicated_db;
+    std::optional<String> db_name;
+    std::optional<String> db_replica_name;
+    std::optional<String> db_shard_name;
     std::atomic<bool> is_circular_replicated = false;
-    Context & context;
+    Context context;
     Poco::Logger * log;
 
     std::string host_fqdn;      /// current host domain name
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index 013e30a3ed5..38d00c089ab 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -51,12 +51,8 @@ BlockIO InterpreterAlterQuery::execute()
     auto metadata_snapshot = table->getInMemoryMetadataPtr();
 
     DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
-    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY && !table->supportsReplication())
-    {
-        database->propose(query_ptr);
-        auto * database_replicated = typeid_cast<DatabaseReplicated *>(database.get());
-        return database_replicated->getFeedback();
-    }
+    if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY && !table->supportsReplication())
+        return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
 
     /// Add default database to table identifiers that we can encounter in e.g. default expressions,
     /// mutation expression, etc.
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 04c5efce3e2..b36fe32b26d 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -77,6 +77,7 @@ namespace ErrorCodes
     extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE;
     extern const int ILLEGAL_COLUMN;
     extern const int LOGICAL_ERROR;
+    extern const int UNKNOWN_DATABASE;
 }
 
 namespace fs = std::filesystem;
@@ -720,15 +721,22 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
         create.database = current_database;
     }
 
+    //TODO make code better if possible
+    bool need_add_to_database = !create.temporary;
+    if(need_add_to_database && database->getEngineName() == "Replicated")
+    {
+        auto guard = DatabaseCatalog::instance().getDDLGuard(create.database, create.table);
+        database = DatabaseCatalog::instance().getDatabase(create.database);
+        if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+        {
+            assertOrSetUUID(create, database);
+            return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
+        }
+    }
+
     /// Actually creates table
     bool created = doCreateTable(create, properties);
 
-    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
-    {
-        auto * database_replicated = typeid_cast<DatabaseReplicated *>(database.get());
-        return database_replicated->getFeedback();
-    }
-
     if (!created)   /// Table already exists
         return {};
 
@@ -753,6 +761,9 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
         guard = DatabaseCatalog::instance().getDDLGuard(create.database, table_name);
 
         database = DatabaseCatalog::instance().getDatabase(create.database);
+        //TODO do we need it?
+        if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+            throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed");
         assertOrSetUUID(create, database);
 
         /// Table can be created before or it can be created concurrently in another thread, while we were waiting in DDLGuard.
@@ -790,12 +801,6 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
         return true;
     }
 
-    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
-    {
-        database->propose(query_ptr);
-        return true;
-    }
-
     StoragePtr res;
     /// NOTE: CREATE query may be rewritten by Storage creator or table function
     if (create.as_table_function)
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index 0f03525f237..c93f8098713 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -101,8 +101,8 @@ BlockIO InterpreterDropQuery::executeToTable(const ASTDropQuery & query)
             if (database->getEngineName() != "Atomic" && database->getEngineName() != "Replicated")
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
             /// Drop table from memory, don't touch data and metadata
-            if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
-                database->propose(query_ptr);
+            if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+                return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
             else
                 database->detachTable(table_id.table_name);
         }
@@ -115,7 +115,7 @@ BlockIO InterpreterDropQuery::executeToTable(const ASTDropQuery & query)
             auto metadata_snapshot = table->getInMemoryMetadataPtr();
             /// Drop table data, don't touch metadata
             if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
-                database->propose(query_ptr);
+                return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
             else
                 table->truncate(query_ptr, metadata_snapshot, context, table_lock);
         }
@@ -131,8 +131,8 @@ BlockIO InterpreterDropQuery::executeToTable(const ASTDropQuery & query)
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
 
             /// Prevents recursive drop from drop database query. The original query must specify a table.
-            if (!query_ptr->as<ASTDropQuery &>().table.empty() && database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
-                database->propose(query_ptr);
+            if (typeid_cast<DatabaseReplicated *>(database.get()) && !query_ptr->as<ASTDropQuery &>().table.empty() && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+                return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
             else
                 database->dropTable(context, table_id.table_name, query.no_delay);
         }
@@ -151,12 +151,6 @@ BlockIO InterpreterDropQuery::executeToTable(const ASTDropQuery & query)
         }
     }
 
-    if (database && database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
-    {
-        auto * database_replicated = typeid_cast<DatabaseReplicated *>(database.get());
-        return database_replicated->getFeedback();
-    }
-
     return {};
 }
 
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index 3a375e2ba60..4eee34a683e 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -75,9 +75,9 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
             database_catalog.assertTableDoesntExist(StorageID(elem.to_database_name, elem.to_table_name), context);
 
         DatabasePtr database = database_catalog.getDatabase(elem.from_database_name);
-        if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+        if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
         {
-            database->propose(query_ptr);
+            return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
         }
         else
         {
@@ -89,13 +89,6 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
                 rename.exchange,
                 rename.dictionary);
         }
-
-        // TODO it can't work
-        if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
-        {
-            auto * database_replicated = typeid_cast<DatabaseReplicated *>(database.get());
-            return database_replicated->getFeedback();
-        }
     }
 
     return {};
diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
index 6da1704ce55..03065245766 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.cpp
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -25,7 +25,7 @@ namespace ErrorCodes
     extern const int QUERY_IS_PROHIBITED;
 }
 
-static bool isSupportedAlterType(int type)
+bool isSupportedAlterType(int type)
 {
     static const std::unordered_set<int> unsupported_alter_types{
         ASTAlterCommand::ATTACH_PARTITION,
@@ -170,7 +170,8 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont
 }
 
 
-DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context_)
+DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context_,
+                                                     const std::optional<Strings> & hosts_to_wait)
     : node_path(zk_node_path)
     , context(context_)
     , watch(CLOCK_MONOTONIC_COARSE)
@@ -185,10 +186,17 @@ DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path
         {std::make_shared<DataTypeUInt64>(),    "num_hosts_active"},
     };
 
-    for (const HostID & host: entry.hosts)
-        waiting_hosts.emplace(host.toString());
+    if (hosts_to_wait)
+    {
+        waiting_hosts = NameSet(hosts_to_wait->begin(), hosts_to_wait->end());
+    }
+    else
+    {
+        for (const HostID & host : entry.hosts)
+            waiting_hosts.emplace(host.toString());
+    }
 
-    addTotalRowsApprox(entry.hosts.size());
+    addTotalRowsApprox(waiting_hosts.size());
 
     timeout_seconds = context.getSettingsRef().distributed_ddl_task_timeout;
 }
diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h
index 83880cc94c1..0f7a411ed92 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.h
+++ b/src/Interpreters/executeDDLQueryOnCluster.h
@@ -15,6 +15,9 @@ class AccessRightsElements;
 struct DDLLogEntry;
 
 
+/// Returns true if provided ALTER type can be executed ON CLUSTER
+bool isSupportedAlterType(int type);
+
 /// Pushes distributed DDL query to the queue.
 /// Returns DDLQueryStatusInputStream, which reads results of query execution on each host in the cluster.
 BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context);
@@ -25,7 +28,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & conte
 class DDLQueryStatusInputStream : public IBlockInputStream
 {
 public:
-    DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context_);
+    DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context_, const std::optional<Strings> & hosts_to_wait = {});
 
     String getName() const override { return "DDLQueryStatusInputStream"; }
 
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 372ac7a7c3e..06d8aa9467a 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -24,8 +24,8 @@ def assert_create_query(nodes, table_name, expected):
 def started_cluster():
     try:
         cluster.start()
-        main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica1');")
-        dummy_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica2');")
+        main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');")
+        dummy_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica2');")
         yield cluster
 
     finally:
@@ -67,7 +67,7 @@ def test_simple_alter_table(started_cluster):
     assert_create_query([main_node, dummy_node], "alter_test", expected)
 
 def test_create_replica_after_delay(started_cluster):
-    competing_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica3');")
+    competing_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica3');")
 
     main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added3 UInt32;")
     main_node.query("ALTER TABLE testdb.alter_test DROP COLUMN AddedNested1;")
@@ -128,15 +128,15 @@ def test_replica_restart(started_cluster):
 
 def test_snapshot_and_snapshot_recover(started_cluster):
     #FIXME bad test
-    snapshotting_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica4');")
+    snapshotting_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica4');")
     time.sleep(5)
-    snapshot_recovering_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica5');")
+    snapshot_recovering_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica5');")
     time.sleep(5)
     assert snapshotting_node.query("desc table testdb.alter_test") == snapshot_recovering_node.query("desc table testdb.alter_test")
 
 def test_drop_and_create_replica(started_cluster):
     main_node.query("DROP DATABASE testdb")
-    main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'replica1');")
+    main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');")
 
     expected = "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
                "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n" \

From b0262b3d06130854ae96a10b1d2854ad9c7b92bb Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Fri, 13 Nov 2020 21:35:45 +0300
Subject: [PATCH 0057/1238] better replica creation

---
 src/Databases/DatabaseReplicated.cpp | 280 +++++++++++----------------
 src/Databases/DatabaseReplicated.h   |  20 +-
 src/Interpreters/DDLWorker.cpp       |  41 ++--
 src/Interpreters/DDLWorker.h         |  29 ++-
 4 files changed, 159 insertions(+), 211 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 1213b5bc075..c4bffd8fd5d 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -16,6 +16,8 @@
 #include <Interpreters/DDLWorker.h>
 #include <Interpreters/DDLTask.h>
 #include <Interpreters/executeDDLQueryOnCluster.h>
+#include <Interpreters/Cluster.h>
+#include <common/getFQDNOrHostName.h>
 #include <Parsers/ASTAlterQuery.h>
 
 namespace DB
@@ -25,29 +27,22 @@ namespace ErrorCodes
     extern const int NO_ZOOKEEPER;
     extern const int LOGICAL_ERROR;
     extern const int BAD_ARGUMENTS;
+    extern const int REPLICA_IS_ALREADY_EXIST;
 }
 
-//FIXME never used
-void DatabaseReplicated::setZooKeeper(zkutil::ZooKeeperPtr zookeeper)
-{
-    std::lock_guard lock(current_zookeeper_mutex);
-    current_zookeeper = zookeeper;
-}
-
-zkutil::ZooKeeperPtr DatabaseReplicated::tryGetZooKeeper() const
-{
-    std::lock_guard lock(current_zookeeper_mutex);
-    return current_zookeeper;
-}
+constexpr const char * first_entry_name = "query-0000000000";
 
 zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
 {
-    auto res = tryGetZooKeeper();
-    if (!res)
-        throw Exception("Cannot get ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
-    return res;
+    return global_context.getZooKeeper();
 }
 
+static inline String getHostID(const Context & global_context)
+{
+    return Cluster::Address::toString(getFQDNOrHostName(), global_context.getTCPPort());
+}
+
+
 DatabaseReplicated::~DatabaseReplicated() = default;
 
 DatabaseReplicated::DatabaseReplicated(
@@ -64,99 +59,119 @@ DatabaseReplicated::DatabaseReplicated(
     , replica_name(replica_name_)
 {
     if (zookeeper_path.empty() || shard_name.empty() || replica_name.empty())
-        throw Exception("ZooKeeper path and shard and replica names must be non-empty", ErrorCodes::BAD_ARGUMENTS);
+        throw Exception("ZooKeeper path, shard and replica names must be non-empty", ErrorCodes::BAD_ARGUMENTS);
+    if (shard_name.find('/') != std::string::npos || replica_name.find('/') != std::string::npos)
+        throw Exception("Shard and replica names should not contain '/'", ErrorCodes::BAD_ARGUMENTS);
 
     if (zookeeper_path.back() == '/')
         zookeeper_path.resize(zookeeper_path.size() - 1);
+
     /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
     if (zookeeper_path.front() != '/')
         zookeeper_path = "/" + zookeeper_path;
 
-    if (context_.hasZooKeeper())
-    {
-        current_zookeeper = context_.getZooKeeper();
-    }
-    if (!current_zookeeper)
+    if (!context_.hasZooKeeper())
     {
         throw Exception("Can't create replicated database without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
     }
+    //FIXME it will fail on startup if zk is not available
+
+    auto current_zookeeper = global_context.getZooKeeper();
 
-    /// New database
     if (!current_zookeeper->exists(zookeeper_path))
     {
-        createDatabaseZooKeeperNodes();
+        /// Create new database, multiple nodes can execute it concurrently
+        createDatabaseNodesInZooKeeper(current_zookeeper);
     }
 
-    /// Attach existing replica
-    //TODO better protection from wrong replica names
-    if (current_zookeeper->exists(zookeeper_path + "/replicas/" + replica_name))
+    replica_path = zookeeper_path + "/replicas/" + shard_name + "|" + replica_name;
+
+    String replica_host_id;
+    if (current_zookeeper->tryGet(replica_path, replica_host_id))
     {
-        String remote_last_entry = current_zookeeper->get(zookeeper_path + "/replicas/" + replica_name, {}, nullptr);
+        String host_id = getHostID(global_context);
+        if (replica_host_id != host_id)
+            throw Exception(ErrorCodes::REPLICA_IS_ALREADY_EXIST,
+                            "Replica {} of shard {} of replicated database at {} already exists. Replica host ID: '{}', current host ID: '{}'",
+                            replica_name, shard_name, zookeeper_path, replica_host_id, host_id);
 
-        String local_last_entry;
-        try
-        {
-            ReadBufferFromFile in(getMetadataPath() + ".last_entry", 16);
-            readStringUntilEOF(local_last_entry, in);
-        }
-        catch (const Exception &)
-        {
-            /// Metadata is corrupted.
-            /// Replica erases the previous zk last executed log entry
-            /// and behaves like a new clean replica.
-            writeLastExecutedToDiskAndZK();
-        }
-
-        if (!local_last_entry.empty() && local_last_entry == remote_last_entry)
-        {
-            last_executed_log_entry = local_last_entry;
-        }
-        else
-        {
-            //FIXME
-            throw Exception(
-                "Replica name might be in use by a different node. Please check replica_name parameter. Remove .last_entry file from "
-                "metadata to create a new replica.",
-                ErrorCodes::LOGICAL_ERROR);
-        }
+        log_entry_to_execute = current_zookeeper->get(replica_path + "/log_ptr");
     }
     else
     {
-        createReplicaZooKeeperNodes();
+        /// Throws if replica with the same name was created concurrently
+        createReplicaNodesInZooKeeper(current_zookeeper);
     }
 
+    assert(log_entry_to_execute.starts_with("query-"));
+
+
     snapshot_period = context_.getConfigRef().getInt("database_replicated_snapshot_period", 10);
-    feedback_timeout = context_.getConfigRef().getInt("database_replicated_feedback_timeout", 0);
     LOG_DEBUG(log, "Snapshot period is set to {} log entries per one snapshot", snapshot_period);
-
-    //FIXME use database UUID
-    ddl_worker = std::make_unique<DDLWorker>(1, zookeeper_path + "/log", context_, nullptr, String{}, true, database_name, replica_name, shard_name);
-
-    //TODO do we need separate pool?
-    //background_log_executor = context_.getReplicatedSchedulePool().createTask(
-    //    database_name + "(DatabaseReplicated::background_executor)", [this] { runBackgroundLogExecutor(); }
-    //);
-
-    //background_log_executor->scheduleAfter(500);
 }
 
-void DatabaseReplicated::createDatabaseZooKeeperNodes()
+bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper)
 {
-    current_zookeeper = getZooKeeper();
-
     current_zookeeper->createAncestors(zookeeper_path);
 
-    current_zookeeper->createIfNotExists(zookeeper_path, String());
-    current_zookeeper->createIfNotExists(zookeeper_path + "/log", String());
-    current_zookeeper->createIfNotExists(zookeeper_path + "/snapshots", String());
-    current_zookeeper->createIfNotExists(zookeeper_path + "/replicas", String());
+    Coordination::Requests ops;
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/snapshots", "", zkutil::CreateMode::Persistent));
+    /// Create empty snapshot (with no tables)
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/snapshots/" + first_entry_name, "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "", zkutil::CreateMode::Persistent));
+
+    Coordination::Responses responses;
+    auto res = current_zookeeper->tryMulti(ops, responses);
+    if (res == Coordination::Error::ZOK)
+        return true;
+    if (res == Coordination::Error::ZNODEEXISTS)
+        return false;
+
+    zkutil::KeeperMultiException::check(res, ops, responses);
+    assert(false);
 }
 
-void DatabaseReplicated::createReplicaZooKeeperNodes()
+void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper)
 {
-    current_zookeeper->create(zookeeper_path + "/replicas/" + replica_name, "", zkutil::CreateMode::Persistent);
+    current_zookeeper->createAncestors(replica_path);
+
+    Strings snapshots = current_zookeeper->getChildren(zookeeper_path + "/snapshots");
+    std::sort(snapshots.begin(), snapshots.end());
+    if (snapshots.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No snapshots found");
+
+    /// When creating new replica, use latest snapshot version as initial value of log_pointer
+    log_entry_to_execute = snapshots.back();
+
+    /// Write host name to replica_path, it will protect from multiple replicas with the same name
+    auto host_id = getHostID(global_context);
+
+    Coordination::Requests ops;
+    ops.emplace_back(zkutil::makeCreateRequest(replica_path, host_id, zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_ptr", log_entry_to_execute , zkutil::CreateMode::Persistent));
+    current_zookeeper->multi(ops);
 }
 
+void DatabaseReplicated::loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach)
+{
+    DatabaseAtomic::loadStoredObjects(context, has_force_restore_data_flag, force_attach);
+
+    DatabaseReplicatedExtensions ext;
+    ext.database_uuid = getUUID();
+    ext.database_name = getDatabaseName();
+    ext.shard_name = shard_name;
+    ext.replica_name = replica_name;
+    ext.first_not_executed = log_entry_to_execute;
+
+    /// Pool size must be 1 (to avoid reordering of log entries)
+    constexpr size_t pool_size = 1;
+    ddl_worker = std::make_unique<DDLWorker>(pool_size, zookeeper_path + "/log", global_context, nullptr, "",
+                                             std::make_optional<DatabaseReplicatedExtensions>(std::move(ext)));
+}
+
+
 void DatabaseReplicated::removeOutdatedSnapshotsAndLog()
 {
     /// This method removes all snapshots and logged queries
@@ -170,7 +185,7 @@ void DatabaseReplicated::removeOutdatedSnapshotsAndLog()
     /// because the replica will use the latest snapshot available
     /// and this snapshot will set the last executed log query
     /// to a greater one than the least advanced current replica.
-    current_zookeeper = getZooKeeper();
+    auto current_zookeeper = getZooKeeper();
     Strings replica_states = current_zookeeper->getChildren(zookeeper_path + "/replicas");
     //TODO do not use log pointers to determine which entries to remove if there are staled pointers.
     // We can just remove all entries older than previous snapshot version.
@@ -209,7 +224,7 @@ void DatabaseReplicated::runBackgroundLogExecutor()
         loadMetadataFromSnapshot();
     }
 
-    current_zookeeper = getZooKeeper();
+    auto current_zookeeper = getZooKeeper();
     Strings log_entry_names = current_zookeeper->getChildren(zookeeper_path + "/log");
 
     std::sort(log_entry_names.begin(), log_entry_names.end());
@@ -219,7 +234,7 @@ void DatabaseReplicated::runBackgroundLogExecutor()
 
     for (const String & log_entry_name : log_entry_names)
     {
-        executeLogName(log_entry_name);
+        //executeLogName(log_entry_name);
         last_executed_log_entry = log_entry_name;
         writeLastExecutedToDiskAndZK();
 
@@ -238,7 +253,7 @@ void DatabaseReplicated::runBackgroundLogExecutor()
 
 void DatabaseReplicated::writeLastExecutedToDiskAndZK()
 {
-    current_zookeeper = getZooKeeper();
+    auto current_zookeeper = getZooKeeper();
     current_zookeeper->createOrUpdate(
         zookeeper_path + "/replicas/" + replica_name, last_executed_log_entry, zkutil::CreateMode::Persistent);
 
@@ -251,35 +266,9 @@ void DatabaseReplicated::writeLastExecutedToDiskAndZK()
     out.close();
 }
 
-void DatabaseReplicated::executeLogName(const String & /*log_entry_name*/)
-{
-//    String path = zookeeper_path + "/log/" + log_entry_name;
-//    current_zookeeper = getZooKeeper();
-//    String query_to_execute = current_zookeeper->get(path, {}, nullptr);
-//
-//    try
-//    {
-//        current_context = std::make_unique<Context>(global_context);
-//        current_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
-//        current_context->setCurrentDatabase(database_name);
-//        current_context->setCurrentQueryId(""); // generate random query_id
-//        executeQuery(query_to_execute, *current_context);
-//    }
-//    catch (const Exception & e)
-//    {
-//        tryLogCurrentException(log, "Query from zookeeper " + query_to_execute + " wasn't finished successfully");
-//        current_zookeeper->create(
-//            zookeeper_path + "/replicas/" + replica_name + "/errors/" + log_entry_name, e.what(), zkutil::CreateMode::Persistent);
-//    }
-//
-//    LOG_DEBUG(log, "Executed query: {}", query_to_execute);
-}
 
 BlockIO DatabaseReplicated::propose(const ASTPtr & query)
 {
-    //current_zookeeper = getZooKeeper();
-
-
     if (const auto * query_alter = query->as<ASTAlterQuery>())
     {
         for (const auto & command : query_alter->command_list->commands)
@@ -303,79 +292,18 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query)
     if (global_context.getSettingsRef().distributed_ddl_task_timeout == 0)
         return io;
 
-    //FIXME need list of all replicas
+    //FIXME need list of all replicas, we can obtain it from zk
     Strings hosts_to_wait;
-    //TODO maybe it's better to use (shard_name + sep + replica_name) as host ID to allow use {replica} macro (may may have the same values across shards)
-    hosts_to_wait.emplace_back(replica_name);
+    hosts_to_wait.emplace_back(shard_name + '/' +replica_name);
     auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, global_context);
     io.in = std::move(stream);
     return io;
-
-    //executeDDLQueryOnCluster(query, global_context);
-
-
-    //{
-    //    std::lock_guard lock(log_name_mutex);
-    //    log_name_to_exec_with_result
-    //        = current_zookeeper->create(zookeeper_path + "/log/log-", queryToString(query), zkutil::CreateMode::PersistentSequential);
-    //}
-
-    //background_log_executor->schedule();
 }
 
-//BlockIO DatabaseReplicated::getFeedback()
-//{
-//    BlockIO res;
-//    if (feedback_timeout == 0)
-//        return res;
-//
-//    Stopwatch watch;
-//
-//    NamesAndTypes block_structure =
-//    {
-//        {"replica_name", std::make_shared<DataTypeString>()},
-//        {"execution_feedback", std::make_shared<DataTypeString>()},
-//    };
-//    auto replica_name_column = block_structure[0].type->createColumn();
-//    auto feedback_column = block_structure[1].type->createColumn();
-//
-//    current_zookeeper = getZooKeeper();
-//    Strings replica_states = current_zookeeper->getChildren(zookeeper_path + "/replicas");
-//    auto replica_iter = replica_states.begin();
-//
-//    while (!replica_states.empty() && watch.elapsedSeconds() < feedback_timeout)
-//    {
-//        String last_executed = current_zookeeper->get(zookeeper_path + "/replicas/" + *replica_iter);
-//        if (last_executed > log_name_to_exec_with_result)
-//        {
-//            replica_name_column->insert(*replica_iter);
-//            String err_path = zookeeper_path + "/replicas/" + *replica_iter + "/errors/" + log_name_to_exec_with_result;
-//            if (!current_zookeeper->exists(err_path))
-//            {
-//                feedback_column->insert("OK");
-//            }
-//            else
-//            {
-//                String feedback = current_zookeeper->get(err_path, {}, nullptr);
-//                feedback_column->insert(feedback);
-//            }
-//            replica_states.erase(replica_iter);
-//            replica_iter = replica_states.begin();
-//        }
-//    }
-//
-//    Block block = Block({
-//        {std::move(replica_name_column), block_structure[0].type, block_structure[0].name},
-//        {std::move(feedback_column), block_structure[1].type, block_structure[1].name}
-//    });
-//
-//    res.in = std::make_shared<OneBlockInputStream>(block);
-//    return res;
-//}
 
 void DatabaseReplicated::createSnapshot()
 {
-    current_zookeeper = getZooKeeper();
+    auto current_zookeeper = getZooKeeper();
     String snapshot_path = zookeeper_path + "/snapshots/" + last_executed_log_entry;
 
     if (Coordination::Error::ZNODEEXISTS == current_zookeeper->tryCreate(snapshot_path, String(), zkutil::CreateMode::Persistent))
@@ -399,7 +327,7 @@ void DatabaseReplicated::loadMetadataFromSnapshot()
 {
     /// Executes the latest snapshot.
     /// Used by new replicas only.
-    current_zookeeper = getZooKeeper();
+    auto current_zookeeper = getZooKeeper();
 
     Strings snapshots;
     if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshots", snapshots) != Coordination::Error::ZOK)
@@ -443,9 +371,19 @@ void DatabaseReplicated::loadMetadataFromSnapshot()
 
 void DatabaseReplicated::drop(const Context & context_)
 {
-    current_zookeeper = getZooKeeper();
+    auto current_zookeeper = getZooKeeper();
     current_zookeeper->tryRemove(zookeeper_path + "/replicas/" + replica_name);
     DatabaseAtomic::drop(context_);
 }
 
+void DatabaseReplicated::shutdown()
+{
+    if (ddl_worker)
+    {
+        ddl_worker->shutdown();
+        ddl_worker = nullptr;
+    }
+    DatabaseAtomic::shutdown();
+}
+
 }
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 537eaad893f..219779d602d 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -51,14 +51,15 @@ public:
 
     BlockIO propose(const ASTPtr & query);
 
-    //BlockIO getFeedback();
+    void shutdown() override;
+
+    void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach = false) override;
 
 private:
-    void createDatabaseZooKeeperNodes();
-    void createReplicaZooKeeperNodes();
+    bool createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper);
+    void createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper);
 
     void runBackgroundLogExecutor();
-    void executeLogName(const String &);
     void writeLastExecutedToDiskAndZK();
 
     void loadMetadataFromSnapshot();
@@ -68,25 +69,18 @@ private:
     String zookeeper_path;
     String shard_name;
     String replica_name;
+    String replica_path;
 
-    //std::unique_ptr<Context> current_context; // to run executeQuery
+    String log_entry_to_execute;
 
     std::mutex log_name_mutex;
     String log_name_to_exec_with_result;
 
     int snapshot_period;
-    int feedback_timeout;
 
     String last_executed_log_entry = "";
 
-    //BackgroundSchedulePool::TaskHolder background_log_executor;
-
-    zkutil::ZooKeeperPtr current_zookeeper;        /// Use only the methods below.
-    mutable std::mutex current_zookeeper_mutex;    /// To recreate the session in the background thread.
-
-    zkutil::ZooKeeperPtr tryGetZooKeeper() const;
     zkutil::ZooKeeperPtr getZooKeeper() const;
-    void setZooKeeper(zkutil::ZooKeeperPtr zookeeper);
 
     std::unique_ptr<DDLWorker> ddl_worker;
 
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 83e7029ec31..7d947a264a6 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -142,17 +142,15 @@ std::unique_ptr<ZooKeeperLock> createSimpleZooKeeperLock(
 }
 
 
-DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix,
-                     bool is_replicated_db_, const std::optional<String> & db_name_, const std::optional<String> & db_replica_name_, const std::optional<String> & db_shard_name_)
+DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, const Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix,
+                     std::optional<DatabaseReplicatedExtensions> database_replicated_ext_)
     : context(context_)
-    , log(&Poco::Logger::get("DDLWorker"))
+    , log(&Poco::Logger::get(database_replicated_ext_ ? fmt::format("DDLWorker ({})", database_replicated_ext_->database_name) : "DDLWorker"))
+    , database_replicated_ext(std::move(database_replicated_ext_))
     , pool_size(pool_size_)
     , worker_pool(pool_size_)
 {
-    is_replicated_db = is_replicated_db_;
-    db_name = db_name_;
-    db_replica_name = db_replica_name_;
-    db_shard_name = db_shard_name_;
+    assert(!database_replicated_ext || pool_size == 1);
     last_tasks.reserve(pool_size);
 
     queue_dir = zk_root_dir;
@@ -181,25 +179,29 @@ DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, Context &
     cleanup_thread = ThreadFromGlobalPool(&DDLWorker::runCleanupThread, this);
 }
 
-
-DDLWorker::~DDLWorker()
+void DDLWorker::shutdown()
 {
     stop_flag = true;
     queue_updated_event->set();
     cleanup_event->set();
+}
+
+DDLWorker::~DDLWorker()
+{
+    shutdown();
     worker_pool.wait();
     main_thread.join();
     cleanup_thread.join();
 }
 
 
-DDLWorker::ZooKeeperPtr DDLWorker::tryGetZooKeeper() const
+ZooKeeperPtr DDLWorker::tryGetZooKeeper() const
 {
     std::lock_guard lock(zookeeper_mutex);
     return current_zookeeper;
 }
 
-DDLWorker::ZooKeeperPtr DDLWorker::getAndSetZooKeeper()
+ZooKeeperPtr DDLWorker::getAndSetZooKeeper()
 {
     std::lock_guard lock(zookeeper_mutex);
 
@@ -272,12 +274,11 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
         return {};
     }
 
-    if (is_replicated_db)
+    if (database_replicated_ext)
     {
-        //
         task->host_id.host_name = host_fqdn;
         task->host_id.port = context.getTCPPort();
-        task->host_id_str = *db_replica_name;
+        task->host_id_str = database_replicated_ext->shard_name + '|' + database_replicated_ext->replica_name;
         return task;
     }
 
@@ -404,7 +405,7 @@ void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
     if (!task.query || !(task.query_on_cluster = dynamic_cast<ASTQueryWithOnCluster *>(task.query.get())))
         throw Exception("Received unknown DDL query", ErrorCodes::UNKNOWN_TYPE_OF_QUERY);
 
-    if (is_replicated_db)
+    if (database_replicated_ext)
         return;
 
     task.cluster_name = task.query_on_cluster->cluster;
@@ -524,11 +525,11 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
     try
     {
         auto current_context = std::make_unique<Context>(context);
-        if (is_replicated_db)
+        if (database_replicated_ext)
         {
             current_context->getClientInfo().query_kind
                 = ClientInfo::QueryKind::REPLICATED_LOG_QUERY; //FIXME why do we need separate query kind?
-            current_context->setCurrentDatabase(*db_name);
+            current_context->setCurrentDatabase(database_replicated_ext->database_name);
         }
         else
             current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
@@ -721,8 +722,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
     };
 
     String shard_node_name;
-    if (is_replicated_db)
-        shard_node_name = *db_shard_name;
+    if (database_replicated_ext)
+        shard_node_name = database_replicated_ext->shard_name;
     else
         shard_node_name = get_shard_name(task.cluster->getShardsAddresses().at(task.host_shard_num));
     String shard_path = node_path + "/shards/" + shard_node_name;
@@ -920,7 +921,7 @@ void DDLWorker::createStatusDirs(const std::string & node_path, const ZooKeeperP
 
 String DDLWorker::enqueueQuery(DDLLogEntry & entry)
 {
-    if (entry.hosts.empty() && !is_replicated_db)
+    if (entry.hosts.empty() && !database_replicated_ext)
         throw Exception("Empty host list in a distributed DDL task", ErrorCodes::LOGICAL_ERROR);
 
     auto zookeeper = getAndSetZooKeeper();
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 1c28100f933..f38d41df503 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -31,13 +31,30 @@ class ASTAlterQuery;
 struct DDLLogEntry;
 struct DDLTask;
 using DDLTaskPtr = std::unique_ptr<DDLTask>;
+using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
+
+
+struct DatabaseReplicatedExtensions
+{
+    UUID database_uuid;
+    String database_name;
+    String shard_name;
+    String replica_name;
+    String first_not_executed;
+    using NewEntryCallback = std::function<void(const String & entry_name, const ZooKeeperPtr)>;
+    using EntryExecutedCallback = std::function<void(const String & entry_name, const ZooKeeperPtr)>;
+    using EntryErrorCallback = std::function<void(const String & entry_name, const ZooKeeperPtr, const std::exception_ptr &)>;
+    NewEntryCallback before_execution_callback;
+    EntryExecutedCallback executed_callback;
+    EntryErrorCallback error_callback;
+};
 
 
 class DDLWorker
 {
 public:
-    DDLWorker(int pool_size_, const std::string & zk_root_dir, Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix,
-              bool is_replicated_db_ = false, const std::optional<String> & db_name_ = std::nullopt, const std::optional<String> & db_replica_name_ = std::nullopt, const std::optional<String> & db_shard_name_ = std::nullopt);
+    DDLWorker(int pool_size_, const std::string & zk_root_dir, const Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix,
+              std::optional<DatabaseReplicatedExtensions> database_replicated_ext_ = std::nullopt);
     ~DDLWorker();
 
     /// Pushes query into DDL queue, returns path to created node
@@ -50,8 +67,9 @@ public:
         return host_fqdn_id;
     }
 
+    void shutdown();
+
 private:
-    using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
 
     /// Returns cached ZooKeeper session (possibly expired).
     ZooKeeperPtr tryGetZooKeeper() const;
@@ -103,13 +121,10 @@ private:
     void attachToThreadGroup();
 
 private:
-    bool is_replicated_db;
-    std::optional<String> db_name;
-    std::optional<String> db_replica_name;
-    std::optional<String> db_shard_name;
     std::atomic<bool> is_circular_replicated = false;
     Context context;
     Poco::Logger * log;
+    std::optional<DatabaseReplicatedExtensions> database_replicated_ext;
 
     std::string host_fqdn;      /// current host domain name
     std::string host_fqdn_id;   /// host_name:port

From 2283906a1118d0836fc6cb813557e8a3d8f21383 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Thu, 19 Nov 2020 13:34:45 +0300
Subject: [PATCH 0058/1238] try support replica recovery

---
 src/Common/ErrorCodes.cpp            |   1 +
 src/Databases/DatabaseReplicated.cpp | 259 +++++++++++++++++----------
 src/Databases/DatabaseReplicated.h   |  22 ++-
 src/Interpreters/DDLWorker.cpp       |  65 ++++++-
 src/Interpreters/DDLWorker.h         |  18 +-
 5 files changed, 253 insertions(+), 112 deletions(-)

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 405b8c60af8..1981dea5cb9 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -522,6 +522,7 @@
     M(553, ROCKSDB_ERROR) \
     M(553, LZMA_STREAM_ENCODER_FAILED) \
     M(554, LZMA_STREAM_DECODER_FAILED) \
+    M(554, DATABASE_REPLICATION_FAILED) \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
     M(1001, STD_EXCEPTION) \
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index c4bffd8fd5d..7b6d98f992a 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -28,9 +28,10 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
     extern const int BAD_ARGUMENTS;
     extern const int REPLICA_IS_ALREADY_EXIST;
+    extern const int DATABASE_REPLICATION_FAILED;
 }
 
-constexpr const char * first_entry_name = "query-0000000000";
+static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768;
 
 zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
 {
@@ -42,6 +43,15 @@ static inline String getHostID(const Context & global_context)
     return Cluster::Address::toString(getFQDNOrHostName(), global_context.getTCPPort());
 }
 
+Strings DatabaseReplicated::getSnapshots(const ZooKeeperPtr & zookeeper) const
+{
+    Strings snapshots = zookeeper->getChildren(zookeeper_path + "/snapshots");
+    std::sort(snapshots.begin(), snapshots.end());
+    if (snapshots.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No snapshots found");
+    return snapshots;
+}
+
 
 DatabaseReplicated::~DatabaseReplicated() = default;
 
@@ -84,7 +94,7 @@ DatabaseReplicated::DatabaseReplicated(
         createDatabaseNodesInZooKeeper(current_zookeeper);
     }
 
-    replica_path = zookeeper_path + "/replicas/" + shard_name + "|" + replica_name;
+    replica_path = zookeeper_path + "/replicas/" + shard_name + "/" + replica_name;
 
     String replica_host_id;
     if (current_zookeeper->tryGet(replica_path, replica_host_id))
@@ -95,7 +105,7 @@ DatabaseReplicated::DatabaseReplicated(
                             "Replica {} of shard {} of replicated database at {} already exists. Replica host ID: '{}', current host ID: '{}'",
                             replica_name, shard_name, zookeeper_path, replica_host_id, host_id);
 
-        log_entry_to_execute = current_zookeeper->get(replica_path + "/log_ptr");
+        log_entry_to_execute = parse<UInt32>(current_zookeeper->get(replica_path + "/log_ptr"));
     }
     else
     {
@@ -103,10 +113,7 @@ DatabaseReplicated::DatabaseReplicated(
         createReplicaNodesInZooKeeper(current_zookeeper);
     }
 
-    assert(log_entry_to_execute.starts_with("query-"));
-
-
-    snapshot_period = context_.getConfigRef().getInt("database_replicated_snapshot_period", 10);
+    snapshot_period = 1; //context_.getConfigRef().getInt("database_replicated_snapshot_period", 10);
     LOG_DEBUG(log, "Snapshot period is set to {} log entries per one snapshot", snapshot_period);
 }
 
@@ -117,10 +124,12 @@ bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperP
     Coordination::Requests ops;
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "", zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/snapshots", "", zkutil::CreateMode::Persistent));
     /// Create empty snapshot (with no tables)
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/snapshots/" + first_entry_name, "", zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/snapshots/0", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata/0", "", zkutil::CreateMode::Persistent));
 
     Coordination::Responses responses;
     auto res = current_zookeeper->tryMulti(ops, responses);
@@ -137,20 +146,24 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt
 {
     current_zookeeper->createAncestors(replica_path);
 
-    Strings snapshots = current_zookeeper->getChildren(zookeeper_path + "/snapshots");
-    std::sort(snapshots.begin(), snapshots.end());
-    if (snapshots.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "No snapshots found");
-
     /// When creating new replica, use latest snapshot version as initial value of log_pointer
-    log_entry_to_execute = snapshots.back();
+    log_entry_to_execute = parse<UInt32>(getSnapshots(current_zookeeper).back());
 
     /// Write host name to replica_path, it will protect from multiple replicas with the same name
     auto host_id = getHostID(global_context);
 
+    /// On replica creation add empty entry to log. Can be used to trigger some actions on other replicas (e.g. update cluster info).
+    DDLLogEntry entry;
+    entry.hosts = {};
+    entry.query = {};
+    entry.initiator = {};
+
+    recoverLostReplica(current_zookeeper, log_entry_to_execute, true);
+
     Coordination::Requests ops;
     ops.emplace_back(zkutil::makeCreateRequest(replica_path, host_id, zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_ptr", log_entry_to_execute , zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_ptr", toString(log_entry_to_execute), zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/query-", entry.toString(), zkutil::CreateMode::PersistentSequential));
     current_zookeeper->multi(ops);
 }
 
@@ -160,10 +173,13 @@ void DatabaseReplicated::loadStoredObjects(Context & context, bool has_force_res
 
     DatabaseReplicatedExtensions ext;
     ext.database_uuid = getUUID();
+    ext.zookeeper_path = zookeeper_path;
     ext.database_name = getDatabaseName();
     ext.shard_name = shard_name;
     ext.replica_name = replica_name;
     ext.first_not_executed = log_entry_to_execute;
+    ext.lost_callback     = [this] (const String & entry_name, const ZooKeeperPtr & zookeeper) { onUnexpectedLogEntry(entry_name, zookeeper); };
+    ext.executed_callback = [this] (const String & entry_name, const ZooKeeperPtr & zookeeper) { onExecutedLogEntry(entry_name, zookeeper); };
 
     /// Pool size must be 1 (to avoid reordering of log entries)
     constexpr size_t pool_size = 1;
@@ -171,6 +187,41 @@ void DatabaseReplicated::loadStoredObjects(Context & context, bool has_force_res
                                              std::make_optional<DatabaseReplicatedExtensions>(std::move(ext)));
 }
 
+void DatabaseReplicated::onUnexpectedLogEntry(const String & entry_name, const ZooKeeperPtr & zookeeper)
+{
+    /// We cannot execute next entry of replication log. Possible reasons:
+    /// 1. Replica is staled, some entries were removed by log cleanup process.
+    ///    In this case we should recover replica from the last snapshot.
+    /// 2. Replication log is broken due to manual operations with ZooKeeper or logical error.
+    ///    In this case we just stop replication without any attempts to recover it automatically,
+    ///    because such attempts may lead to unexpected data removal.
+
+    constexpr const char * name = "query-";
+    if (!startsWith(entry_name, name))
+        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "Unexpected entry in replication log: {}", entry_name);
+
+    UInt32 entry_number;
+    if (!tryParse(entry_number, entry_name.substr(strlen(name))))
+        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "Cannot parse number of replication log entry {}", entry_name);
+
+    if (entry_number < log_entry_to_execute)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} already executed, current pointer is {}", entry_number, log_entry_to_execute);
+
+    /// Entry name is valid. Let's get min snapshot version to check if replica is staled.
+    Strings snapshots = getSnapshots(zookeeper);
+    UInt32 min_snapshot = parse<UInt32>(snapshots.front());
+
+    if (log_entry_to_execute < min_snapshot)
+    {
+        recoverLostReplica(zookeeper, parse<UInt32>(snapshots.back()));
+        return;
+    }
+
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot recover replica, probably it's a bug. "
+                                               "Got log entry '{}' when expected entry number {}, "
+                                               "available snapshots: ",
+                                                entry_name, log_entry_to_execute, boost::algorithm::join(snapshots, ", "));
+}
 
 void DatabaseReplicated::removeOutdatedSnapshotsAndLog()
 {
@@ -217,40 +268,51 @@ void DatabaseReplicated::removeOutdatedSnapshotsAndLog()
     }
 }
 
-void DatabaseReplicated::runBackgroundLogExecutor()
+void DatabaseReplicated::onExecutedLogEntry(const String & entry_name, const ZooKeeperPtr & zookeeper)
 {
-    if (last_executed_log_entry.empty())
+    assert(entry_name == DatabaseReplicatedExtensions::getLogEntryName(log_entry_to_execute));
+    ++log_entry_to_execute;
+
+    if (snapshot_period > 0 && log_entry_to_execute % snapshot_period == 0)
     {
-        loadMetadataFromSnapshot();
+        createSnapshot(zookeeper);
     }
-
-    auto current_zookeeper = getZooKeeper();
-    Strings log_entry_names = current_zookeeper->getChildren(zookeeper_path + "/log");
-
-    std::sort(log_entry_names.begin(), log_entry_names.end());
-    auto newest_entry_it = std::upper_bound(log_entry_names.begin(), log_entry_names.end(), last_executed_log_entry);
-
-    log_entry_names.erase(log_entry_names.begin(), newest_entry_it);
-
-    for (const String & log_entry_name : log_entry_names)
-    {
-        //executeLogName(log_entry_name);
-        last_executed_log_entry = log_entry_name;
-        writeLastExecutedToDiskAndZK();
-
-        int log_n = parse<int>(log_entry_name.substr(4));
-        int last_log_n = parse<int>(log_entry_names.back().substr(4));
-
-        /// The third condition gurantees at most one snapshot creation per batch
-        if (log_n > 0 && snapshot_period > 0 && (last_log_n - log_n) / snapshot_period == 0 && log_n % snapshot_period == 0)
-        {
-            createSnapshot();
-        }
-    }
-
-    //background_log_executor->scheduleAfter(500);
 }
 
+//void DatabaseReplicated::runBackgroundLogExecutor()
+//{
+//    if (last_executed_log_entry.empty())
+//    {
+//        loadMetadataFromSnapshot();
+//    }
+//
+//    auto current_zookeeper = getZooKeeper();
+//    Strings log_entry_names = current_zookeeper->getChildren(zookeeper_path + "/log");
+//
+//    std::sort(log_entry_names.begin(), log_entry_names.end());
+//    auto newest_entry_it = std::upper_bound(log_entry_names.begin(), log_entry_names.end(), last_executed_log_entry);
+//
+//    log_entry_names.erase(log_entry_names.begin(), newest_entry_it);
+//
+//    for (const String & log_entry_name : log_entry_names)
+//    {
+//        //executeLogName(log_entry_name);
+//        last_executed_log_entry = log_entry_name;
+//        writeLastExecutedToDiskAndZK();
+//
+//        int log_n = parse<int>(log_entry_name.substr(4));
+//        int last_log_n = parse<int>(log_entry_names.back().substr(4));
+//
+//        /// The third condition gurantees at most one snapshot creation per batch
+//        if (log_n > 0 && snapshot_period > 0 && (last_log_n - log_n) / snapshot_period == 0 && log_n % snapshot_period == 0)
+//        {
+//            createSnapshot();
+//        }
+//    }
+//
+//    //background_log_executor->scheduleAfter(500);
+//}
+
 void DatabaseReplicated::writeLastExecutedToDiskAndZK()
 {
     auto current_zookeeper = getZooKeeper();
@@ -294,79 +356,88 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query)
 
     //FIXME need list of all replicas, we can obtain it from zk
     Strings hosts_to_wait;
-    hosts_to_wait.emplace_back(shard_name + '/' +replica_name);
+    hosts_to_wait.emplace_back(shard_name + '|' +replica_name);
     auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, global_context);
     io.in = std::move(stream);
     return io;
 }
 
 
-void DatabaseReplicated::createSnapshot()
+void DatabaseReplicated::createSnapshot(const ZooKeeperPtr & zookeeper)
 {
-    auto current_zookeeper = getZooKeeper();
-    String snapshot_path = zookeeper_path + "/snapshots/" + last_executed_log_entry;
+    String snapshot_path = zookeeper_path + "/snapshot/" + toString(log_entry_to_execute);
 
-    if (Coordination::Error::ZNODEEXISTS == current_zookeeper->tryCreate(snapshot_path, String(), zkutil::CreateMode::Persistent))
-    {
+    if (zookeeper->exists(snapshot_path))
         return;
-    }
 
-    for (auto iterator = getTablesIterator(global_context, {}); iterator->isValid(); iterator->next())
+    std::vector<std::pair<String, String>> create_queries;
     {
-        String table_name = iterator->name();
-        auto query = getCreateQueryFromMetadata(getObjectMetadataPath(table_name), true);
-        String statement = queryToString(query);
-        current_zookeeper->create(snapshot_path + "/" + table_name, statement, zkutil::CreateMode::Persistent);
+        std::lock_guard lock{mutex};
+        create_queries.reserve(tables.size());
+        for (const auto & table : tables)
+        {
+            const String & name = table.first;
+            ReadBufferFromFile in(getObjectMetadataPath(name), METADATA_FILE_BUFFER_SIZE);
+            String attach_query;
+            readStringUntilEOF(attach_query, in);
+            create_queries.emplace_back(escapeForFileName(name), std::move(attach_query));
+        }
     }
-    current_zookeeper->create(snapshot_path + "/.completed", String(), zkutil::CreateMode::Persistent);
 
-    removeOutdatedSnapshotsAndLog();
+    if (zookeeper->exists(snapshot_path))
+        return;
+
+    String queries_path = zookeeper_path + "/metadata/" + toString(log_entry_to_execute);
+    zookeeper->tryCreate(queries_path, "", zkutil::CreateMode::Persistent);
+    queries_path += '/';
+
+    //FIXME use tryMulti with MULTI_BATCH_SIZE
+
+    for (const auto & table : create_queries)
+        zookeeper->tryCreate(queries_path + table.first, table.second, zkutil::CreateMode::Persistent);
+
+    if (create_queries.size() != zookeeper->getChildren(zookeeper_path + "/metadata/" + toString(log_entry_to_execute)).size())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Created invalid snapshot");
+
+    zookeeper->tryCreate(snapshot_path, String(), zkutil::CreateMode::Persistent);
 }
 
-void DatabaseReplicated::loadMetadataFromSnapshot()
+void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 from_snapshot, bool create)
 {
-    /// Executes the latest snapshot.
-    /// Used by new replicas only.
-    auto current_zookeeper = getZooKeeper();
+    LOG_WARNING(log, "Will recover replica from snapshot", from_snapshot);
 
-    Strings snapshots;
-    if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshots", snapshots) != Coordination::Error::ZOK)
-        return;
+    //FIXME drop old tables
 
-    auto latest_snapshot = std::max_element(snapshots.begin(), snapshots.end());
-    while (snapshots.size() > 0 && !current_zookeeper->exists(zookeeper_path + "/snapshots/" + *latest_snapshot + "/.completed"))
+    String snapshot_metadata_path = zookeeper_path + "/metadata/" + toString(from_snapshot);
+    Strings tables_in_snapshot = current_zookeeper->getChildren(snapshot_metadata_path);
+    current_zookeeper->get(zookeeper_path + "/snapshots/" + toString(from_snapshot));   /// Assert node exists
+    snapshot_metadata_path += '/';
+
+    for (const auto & table_name : tables_in_snapshot)
     {
-        snapshots.erase(latest_snapshot);
-        latest_snapshot = std::max_element(snapshots.begin(), snapshots.end());
+        String query_to_execute = current_zookeeper->get(snapshot_metadata_path + table_name);
+
+
+        if (!startsWith(query_to_execute, "ATTACH "))
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected query: {}", query_to_execute);
+        query_to_execute = "CREATE " + query_to_execute.substr(strlen("ATTACH "));
+
+        Context current_context = global_context;
+        current_context.getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
+        current_context.setCurrentDatabase(database_name);
+        current_context.setCurrentQueryId(""); // generate random query_id
+
+        executeQuery(query_to_execute, current_context);
     }
 
-    if (snapshots.size() < 1)
-    {
-        return;
-    }
-
-    Strings metadatas;
-    if (current_zookeeper->tryGetChildren(zookeeper_path + "/snapshots/" + *latest_snapshot, metadatas) != Coordination::Error::ZOK)
+    if (create)
         return;
 
-    LOG_DEBUG(log, "Executing {} snapshot", *latest_snapshot);
+    current_zookeeper->set(replica_path + "/log-ptr", toString(from_snapshot));
+    last_executed_log_entry = from_snapshot;
+    ddl_worker->setLogPointer(from_snapshot); //FIXME
 
-    for (auto t = metadatas.begin(); t != metadatas.end(); ++t)
-    {
-        String path = zookeeper_path + "/snapshots/" + *latest_snapshot + "/" + *t;
-
-        String query_to_execute = current_zookeeper->get(path, {}, nullptr);
-
-        auto current_context = std::make_unique<Context>(global_context);
-        current_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
-        current_context->setCurrentDatabase(database_name);
-        current_context->setCurrentQueryId(""); // generate random query_id
-
-        executeQuery(query_to_execute, *current_context);
-    }
-
-    last_executed_log_entry = *latest_snapshot;
-    writeLastExecutedToDiskAndZK();
+    //writeLastExecutedToDiskAndZK();
 }
 
 void DatabaseReplicated::drop(const Context & context_)
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 219779d602d..3f5bd4608f1 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -13,6 +13,7 @@ namespace DB
 {
 
 class DDLWorker;
+using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
 
 /** DatabaseReplicated engine
   * supports replication of metadata
@@ -56,22 +57,29 @@ public:
     void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach = false) override;
 
 private:
-    bool createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper);
-    void createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper);
+    bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
+    void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
 
-    void runBackgroundLogExecutor();
+    //void runBackgroundLogExecutor();
     void writeLastExecutedToDiskAndZK();
 
-    void loadMetadataFromSnapshot();
-    void createSnapshot();
+    //void loadMetadataFromSnapshot();
+    void createSnapshot(const ZooKeeperPtr & zookeeper);
     void removeOutdatedSnapshotsAndLog();
 
+    Strings getSnapshots(const ZooKeeperPtr & zookeeper) const;
+
+    void onUnexpectedLogEntry(const String & entry_name, const ZooKeeperPtr & zookeeper);
+    void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 from_snapshot, bool create = false);
+
+    void onExecutedLogEntry(const String & entry_name, const ZooKeeperPtr & zookeeper);
+
     String zookeeper_path;
     String shard_name;
     String replica_name;
     String replica_path;
 
-    String log_entry_to_execute;
+    UInt32 log_entry_to_execute;
 
     std::mutex log_name_mutex;
     String log_name_to_exec_with_result;
@@ -84,6 +92,8 @@ private:
 
     std::unique_ptr<DDLWorker> ddl_worker;
 
+
+
 };
 
 }
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 7d947a264a6..51f0e1b45a9 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -142,6 +142,22 @@ std::unique_ptr<ZooKeeperLock> createSimpleZooKeeperLock(
 }
 
 
+String DatabaseReplicatedExtensions::getLogEntryName(UInt32 log_entry_number)
+{
+    constexpr size_t seq_node_digits = 10;
+    String number = toString(log_entry_number);
+    String name = "query-" + String(seq_node_digits - number.size(), '0') + number;
+    return name;
+}
+
+UInt32 DatabaseReplicatedExtensions::getLogEntryNumber(const String & log_entry_name)
+{
+    constexpr const char * name = "query-";
+    assert(startsWith(log_entry_name, name));
+    return parse<UInt32>(log_entry_name.substr(strlen(name)));
+}
+
+
 DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, const Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix,
                      std::optional<DatabaseReplicatedExtensions> database_replicated_ext_)
     : context(context_)
@@ -236,8 +252,21 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
     String node_data;
     String entry_path = queue_dir + "/" + entry_name;
 
+    if (database_replicated_ext)
+    {
+        auto expected_log_entry = DatabaseReplicatedExtensions::getLogEntryName(database_replicated_ext->first_not_executed);
+        if (entry_name != expected_log_entry)
+        {
+            database_replicated_ext->lost_callback(entry_name, zookeeper);
+            out_reason = "DatabaseReplicated: expected " + expected_log_entry + " got " + entry_name;
+            return {};
+        }
+    }
+
     if (!zookeeper->tryGet(entry_path, node_data))
     {
+        if (database_replicated_ext)
+            database_replicated_ext->lost_callback(entry_name, zookeeper);
         /// It is Ok that node could be deleted just now. It means that there are no current host in node's host list.
         out_reason = "The task was deleted";
         return {};
@@ -339,7 +368,7 @@ void DDLWorker::scheduleTasks()
         ? queue_nodes.begin()
         : std::upper_bound(queue_nodes.begin(), queue_nodes.end(), last_tasks.back());
 
-    for (auto it = begin_node; it != queue_nodes.end(); ++it)
+    for (auto it = begin_node; it != queue_nodes.end() && !stop_flag; ++it)
     {
         String entry_name = *it;
 
@@ -362,11 +391,17 @@ void DDLWorker::scheduleTasks()
 
         if (!already_processed)
         {
-            worker_pool.scheduleOrThrowOnError([this, task_ptr = task.release()]()
+            if (database_replicated_ext)
             {
-                setThreadName("DDLWorkerExec");
-                enqueueTask(DDLTaskPtr(task_ptr));
-            });
+                enqueueTask(DDLTaskPtr(task.release()));
+            }
+            else
+            {
+                worker_pool.scheduleOrThrowOnError([this, task_ptr = task.release()]() {
+                    setThreadName("DDLWorkerExec");
+                    enqueueTask(DDLTaskPtr(task_ptr));
+                });
+            }
         }
         else
         {
@@ -374,9 +409,6 @@ void DDLWorker::scheduleTasks()
         }
 
         saveTask(entry_name);
-
-        if (stop_flag)
-            break;
     }
 }
 
@@ -599,6 +631,7 @@ void DDLWorker::enqueueTask(DDLTaskPtr task_ptr)
         }
     }
 }
+
 void DDLWorker::processTask(DDLTask & task)
 {
     auto zookeeper = tryGetZooKeeper();
@@ -626,7 +659,9 @@ void DDLWorker::processTask(DDLTask & task)
     else
         throw Coordination::Exception(code, active_node_path);
 
-    if (!task.was_executed)
+    //FIXME
+    bool is_dummy_query = database_replicated_ext && task.entry.query.empty();
+    if (!task.was_executed && !is_dummy_query)
     {
         try
         {
@@ -675,7 +710,19 @@ void DDLWorker::processTask(DDLTask & task)
     Coordination::Requests ops;
     ops.emplace_back(zkutil::makeRemoveRequest(active_node_path, -1));
     ops.emplace_back(zkutil::makeCreateRequest(finished_node_path, task.execution_status.serializeText(), zkutil::CreateMode::Persistent));
+    if (database_replicated_ext)
+    {
+        assert(DatabaseReplicatedExtensions::getLogEntryName(database_replicated_ext->first_not_executed) == task.entry_name);
+        ops.emplace_back(zkutil::makeSetRequest(database_replicated_ext->getReplicaPath() + "/log_ptr", toString(database_replicated_ext->first_not_executed), -1));
+    }
+
     zookeeper->multi(ops);
+
+    if (database_replicated_ext)
+    {
+        database_replicated_ext->executed_callback(task.entry_name, zookeeper);
+        ++(database_replicated_ext->first_not_executed);
+    }
 }
 
 
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index f38d41df503..08bf641264e 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -37,16 +37,25 @@ using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
 struct DatabaseReplicatedExtensions
 {
     UUID database_uuid;
+    String zookeeper_path;
     String database_name;
     String shard_name;
     String replica_name;
-    String first_not_executed;
-    using NewEntryCallback = std::function<void(const String & entry_name, const ZooKeeperPtr)>;
+    UInt32 first_not_executed;
+    using EntryLostCallback = std::function<void(const String & entry_name, const ZooKeeperPtr)>;
     using EntryExecutedCallback = std::function<void(const String & entry_name, const ZooKeeperPtr)>;
     using EntryErrorCallback = std::function<void(const String & entry_name, const ZooKeeperPtr, const std::exception_ptr &)>;
-    NewEntryCallback before_execution_callback;
+    EntryLostCallback lost_callback;
     EntryExecutedCallback executed_callback;
     EntryErrorCallback error_callback;
+
+    String getReplicaPath() const
+    {
+        return zookeeper_path + "/replicas/" + shard_name + "/" + replica_name;
+    }
+
+    static String getLogEntryName(UInt32 log_entry_number);
+    static UInt32 getLogEntryNumber(const String & log_entry_name);
 };
 
 
@@ -69,6 +78,9 @@ public:
 
     void shutdown();
 
+    //FIXME get rid of this method
+    void setLogPointer(UInt32 log_pointer) { database_replicated_ext->first_not_executed = log_pointer; }
+
 private:
 
     /// Returns cached ZooKeeper session (possibly expired).

From 7ab4445e993333f15cea8d69e0de9a909c7d6495 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Fri, 20 Nov 2020 19:06:27 +0300
Subject: [PATCH 0059/1238] try another approach

---
 src/Databases/DatabaseAtomic.cpp            |  18 ++-
 src/Databases/DatabaseAtomic.h              |   4 +-
 src/Databases/DatabaseOnDisk.cpp            |   5 +-
 src/Databases/DatabaseOnDisk.h              |   2 +-
 src/Databases/DatabaseReplicated.cpp        | 124 +++-----------------
 src/Databases/DatabaseReplicated.h          |   2 -
 src/Interpreters/Context.cpp                |  13 ++
 src/Interpreters/Context.h                  |  11 ++
 src/Interpreters/DDLTask.h                  |  22 ++++
 src/Interpreters/DDLWorker.cpp              |  96 ++++++++++++---
 src/Interpreters/DDLWorker.h                |   5 +
 src/Interpreters/SystemLog.h                |   9 +-
 src/Storages/StorageReplicatedMergeTree.cpp |   7 ++
 13 files changed, 186 insertions(+), 132 deletions(-)

diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index 15a55da89b2..78400368924 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -11,6 +11,9 @@
 #include <Interpreters/ExternalDictionariesLoader.h>
 #include <filesystem>
 
+//FIXME it shouldn't be here
+#include <Interpreters/DDLTask.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
 
 namespace DB
 {
@@ -263,7 +266,8 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
 }
 
 void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
-                                       const String & table_metadata_tmp_path, const String & table_metadata_path)
+                                       const String & table_metadata_tmp_path, const String & table_metadata_path,
+                                       const Context & query_context)
 {
     DetachedTables not_in_use;
     auto table_data_path = getTableDataPath(query);
@@ -280,6 +284,18 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora
         /// We will get en exception if some table with the same UUID exists (even if it's detached table or table from another database)
         DatabaseCatalog::instance().addUUIDMapping(query.uuid);
         locked_uuid = true;
+
+        if (auto txn = query_context.getMetadataTransaction())
+        {
+            String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(query.table);
+            String statement = getObjectDefinitionFromCreateQuery(query.clone());
+            /// zk::multi(...) will throw if `metadata_zk_path` exists
+            txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path, statement, zkutil::CreateMode::Persistent));
+            txn->current_zookeeper->multi(txn->ops);     /// Commit point (a sort of) for Replicated database
+            /// NOTE: replica will be lost if server crashes before the following renameNoReplace(...)
+            /// TODO better detection and recovery
+        }
+
         /// It throws if `table_metadata_path` already exists (it's possible if table was detached)
         renameNoReplace(table_metadata_tmp_path, table_metadata_path);  /// Commit point (a sort of)
         attachTableUnlocked(query.table, table, lock);   /// Should never throw
diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h
index 97e6e1173d1..61ce2721701 100644
--- a/src/Databases/DatabaseAtomic.h
+++ b/src/Databases/DatabaseAtomic.h
@@ -60,10 +60,10 @@ public:
 
     void waitDetachedTableNotInUse(const UUID & uuid);
 
-private:
+protected:
     void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path) override;
     void commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
-                           const String & table_metadata_tmp_path, const String & table_metadata_path) override;
+                           const String & table_metadata_tmp_path, const String & table_metadata_path, const Context & query_context) override;
 
     void assertDetachedTableNotInUse(const UUID & uuid);
     typedef std::unordered_map<UUID, StoragePtr> DetachedTables;
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index 8fa136f4969..8f24f53fc3f 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -193,11 +193,12 @@ void DatabaseOnDisk::createTable(
         out.close();
     }
 
-    commitCreateTable(create, table, table_metadata_tmp_path, table_metadata_path);
+    commitCreateTable(create, table, table_metadata_tmp_path, table_metadata_path, context);
 }
 
 void DatabaseOnDisk::commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
-                                       const String & table_metadata_tmp_path, const String & table_metadata_path)
+                                       const String & table_metadata_tmp_path, const String & table_metadata_path,
+                                       const Context & /*query_context*/)
 {
     try
     {
diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h
index 23c1584ff9c..a5510ef4810 100644
--- a/src/Databases/DatabaseOnDisk.h
+++ b/src/Databases/DatabaseOnDisk.h
@@ -83,7 +83,7 @@ protected:
     ASTPtr getCreateQueryFromMetadata(const String & metadata_path, bool throw_on_error) const;
 
     virtual void commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
-                                   const String & table_metadata_tmp_path, const String & table_metadata_path);
+                                   const String & table_metadata_tmp_path, const String & table_metadata_path, const Context & query_context);
 
     const String metadata_path;
     const String data_path;
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 7b6d98f992a..608d03c339b 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -29,10 +29,9 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
     extern const int REPLICA_IS_ALREADY_EXIST;
     extern const int DATABASE_REPLICATION_FAILED;
+    extern const int UNKNOWN_DATABASE;
 }
 
-static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768;
-
 zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
 {
     return global_context.getZooKeeper();
@@ -43,15 +42,6 @@ static inline String getHostID(const Context & global_context)
     return Cluster::Address::toString(getFQDNOrHostName(), global_context.getTCPPort());
 }
 
-Strings DatabaseReplicated::getSnapshots(const ZooKeeperPtr & zookeeper) const
-{
-    Strings snapshots = zookeeper->getChildren(zookeeper_path + "/snapshots");
-    std::sort(snapshots.begin(), snapshots.end());
-    if (snapshots.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "No snapshots found");
-    return snapshots;
-}
-
 
 DatabaseReplicated::~DatabaseReplicated() = default;
 
@@ -125,11 +115,9 @@ bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperP
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "", zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "", zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/snapshots", "", zkutil::CreateMode::Persistent));
-    /// Create empty snapshot (with no tables)
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/snapshots/0", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/counter", "", zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", "", zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata/0", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/min_log_ptr", "0", zkutil::CreateMode::Persistent));
 
     Coordination::Responses responses;
     auto res = current_zookeeper->tryMulti(ops, responses);
@@ -147,7 +135,7 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt
     current_zookeeper->createAncestors(replica_path);
 
     /// When creating new replica, use latest snapshot version as initial value of log_pointer
-    log_entry_to_execute = parse<UInt32>(getSnapshots(current_zookeeper).back());
+    log_entry_to_execute = 0;   //FIXME
 
     /// Write host name to replica_path, it will protect from multiple replicas with the same name
     auto host_id = getHostID(global_context);
@@ -160,10 +148,16 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt
 
     recoverLostReplica(current_zookeeper, log_entry_to_execute, true);
 
+    String query_path_prefix = zookeeper_path + "/log/query-";
+    String counter_prefix = zookeeper_path + "/counter/cnt-";
+    String counter_path = current_zookeeper->create(counter_prefix, "", zkutil::CreateMode::EphemeralSequential);
+    String query_path = query_path_prefix + counter_path.substr(counter_prefix.size());
+
     Coordination::Requests ops;
     ops.emplace_back(zkutil::makeCreateRequest(replica_path, host_id, zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_ptr", toString(log_entry_to_execute), zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/query-", entry.toString(), zkutil::CreateMode::PersistentSequential));
+    ops.emplace_back(zkutil::makeCreateRequest(query_path, entry.toString(), zkutil::CreateMode::PersistentSequential));
+    ops.emplace_back(zkutil::makeRemoveRequest(counter_path, -1));
     current_zookeeper->multi(ops);
 }
 
@@ -207,20 +201,17 @@ void DatabaseReplicated::onUnexpectedLogEntry(const String & entry_name, const Z
     if (entry_number < log_entry_to_execute)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} already executed, current pointer is {}", entry_number, log_entry_to_execute);
 
-    /// Entry name is valid. Let's get min snapshot version to check if replica is staled.
-    Strings snapshots = getSnapshots(zookeeper);
-    UInt32 min_snapshot = parse<UInt32>(snapshots.front());
+    /// Entry name is valid. Let's get min log pointer to check if replica is staled.
+    UInt32 min_snapshot = parse<UInt32>(zookeeper->get(zookeeper_path + "/min_log_ptr"));
 
     if (log_entry_to_execute < min_snapshot)
     {
-        recoverLostReplica(zookeeper, parse<UInt32>(snapshots.back()));
+        recoverLostReplica(zookeeper, 0);   //FIXME log_pointer
         return;
     }
 
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot recover replica, probably it's a bug. "
-                                               "Got log entry '{}' when expected entry number {}, "
-                                               "available snapshots: ",
-                                                entry_name, log_entry_to_execute, boost::algorithm::join(snapshots, ", "));
+                                               "Got log entry '{}' when expected entry number {}");
 }
 
 void DatabaseReplicated::removeOutdatedSnapshotsAndLog()
@@ -268,51 +259,11 @@ void DatabaseReplicated::removeOutdatedSnapshotsAndLog()
     }
 }
 
-void DatabaseReplicated::onExecutedLogEntry(const String & entry_name, const ZooKeeperPtr & zookeeper)
+void DatabaseReplicated::onExecutedLogEntry(const String & /*entry_name*/, const ZooKeeperPtr & /*zookeeper*/)
 {
-    assert(entry_name == DatabaseReplicatedExtensions::getLogEntryName(log_entry_to_execute));
-    ++log_entry_to_execute;
 
-    if (snapshot_period > 0 && log_entry_to_execute % snapshot_period == 0)
-    {
-        createSnapshot(zookeeper);
-    }
 }
 
-//void DatabaseReplicated::runBackgroundLogExecutor()
-//{
-//    if (last_executed_log_entry.empty())
-//    {
-//        loadMetadataFromSnapshot();
-//    }
-//
-//    auto current_zookeeper = getZooKeeper();
-//    Strings log_entry_names = current_zookeeper->getChildren(zookeeper_path + "/log");
-//
-//    std::sort(log_entry_names.begin(), log_entry_names.end());
-//    auto newest_entry_it = std::upper_bound(log_entry_names.begin(), log_entry_names.end(), last_executed_log_entry);
-//
-//    log_entry_names.erase(log_entry_names.begin(), newest_entry_it);
-//
-//    for (const String & log_entry_name : log_entry_names)
-//    {
-//        //executeLogName(log_entry_name);
-//        last_executed_log_entry = log_entry_name;
-//        writeLastExecutedToDiskAndZK();
-//
-//        int log_n = parse<int>(log_entry_name.substr(4));
-//        int last_log_n = parse<int>(log_entry_names.back().substr(4));
-//
-//        /// The third condition gurantees at most one snapshot creation per batch
-//        if (log_n > 0 && snapshot_period > 0 && (last_log_n - log_n) / snapshot_period == 0 && log_n % snapshot_period == 0)
-//        {
-//            createSnapshot();
-//        }
-//    }
-//
-//    //background_log_executor->scheduleAfter(500);
-//}
-
 void DatabaseReplicated::writeLastExecutedToDiskAndZK()
 {
     auto current_zookeeper = getZooKeeper();
@@ -363,58 +314,19 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query)
 }
 
 
-void DatabaseReplicated::createSnapshot(const ZooKeeperPtr & zookeeper)
-{
-    String snapshot_path = zookeeper_path + "/snapshot/" + toString(log_entry_to_execute);
-
-    if (zookeeper->exists(snapshot_path))
-        return;
-
-    std::vector<std::pair<String, String>> create_queries;
-    {
-        std::lock_guard lock{mutex};
-        create_queries.reserve(tables.size());
-        for (const auto & table : tables)
-        {
-            const String & name = table.first;
-            ReadBufferFromFile in(getObjectMetadataPath(name), METADATA_FILE_BUFFER_SIZE);
-            String attach_query;
-            readStringUntilEOF(attach_query, in);
-            create_queries.emplace_back(escapeForFileName(name), std::move(attach_query));
-        }
-    }
-
-    if (zookeeper->exists(snapshot_path))
-        return;
-
-    String queries_path = zookeeper_path + "/metadata/" + toString(log_entry_to_execute);
-    zookeeper->tryCreate(queries_path, "", zkutil::CreateMode::Persistent);
-    queries_path += '/';
-
-    //FIXME use tryMulti with MULTI_BATCH_SIZE
-
-    for (const auto & table : create_queries)
-        zookeeper->tryCreate(queries_path + table.first, table.second, zkutil::CreateMode::Persistent);
-
-    if (create_queries.size() != zookeeper->getChildren(zookeeper_path + "/metadata/" + toString(log_entry_to_execute)).size())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Created invalid snapshot");
-
-    zookeeper->tryCreate(snapshot_path, String(), zkutil::CreateMode::Persistent);
-}
-
 void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 from_snapshot, bool create)
 {
     LOG_WARNING(log, "Will recover replica from snapshot", from_snapshot);
 
     //FIXME drop old tables
 
-    String snapshot_metadata_path = zookeeper_path + "/metadata/" + toString(from_snapshot);
+    String snapshot_metadata_path = zookeeper_path + "/metadata";
     Strings tables_in_snapshot = current_zookeeper->getChildren(snapshot_metadata_path);
-    current_zookeeper->get(zookeeper_path + "/snapshots/" + toString(from_snapshot));   /// Assert node exists
     snapshot_metadata_path += '/';
 
     for (const auto & table_name : tables_in_snapshot)
     {
+        //FIXME It's not atomic. We need multiget here (available since ZooKeeper 3.6.0).
         String query_to_execute = current_zookeeper->get(snapshot_metadata_path + table_name);
 
 
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 3f5bd4608f1..663df59ac63 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -64,10 +64,8 @@ private:
     void writeLastExecutedToDiskAndZK();
 
     //void loadMetadataFromSnapshot();
-    void createSnapshot(const ZooKeeperPtr & zookeeper);
     void removeOutdatedSnapshotsAndLog();
 
-    Strings getSnapshots(const ZooKeeperPtr & zookeeper) const;
 
     void onUnexpectedLogEntry(const String & entry_name, const ZooKeeperPtr & zookeeper);
     void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 from_snapshot, bool create = false);
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 1b9391b8725..a7309e9ae47 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2415,4 +2415,17 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w
     return StorageID::createEmpty();
 }
 
+void Context::initMetadataTransaction(MetadataTransactionPtr txn)
+{
+    assert(!metadata_transaction);
+    assert(query_context == this);
+    metadata_transaction = std::move(txn);
+}
+
+MetadataTransactionPtr Context::getMetadataTransaction() const
+{
+    assert(query_context == this);
+    return metadata_transaction;
+}
+
 }
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index c55d8e6d604..ed11fab7599 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -114,6 +114,8 @@ using VolumePtr = std::shared_ptr<IVolume>;
 struct NamedSession;
 struct BackgroundTaskSchedulingSettings;
 
+struct MetadataTransaction;
+using MetadataTransactionPtr = std::shared_ptr<MetadataTransaction>;
 
 #if USE_EMBEDDED_COMPILER
 class CompiledExpressionCache;
@@ -212,6 +214,12 @@ private:
                                    /// to be customized in HTTP and TCP servers by overloading the customizeContext(DB::Context&)
                                    /// methods.
 
+    MetadataTransactionPtr metadata_transaction;    /// Distributed DDL context. I'm not sure if it's a suitable place for this,
+                                                    /// but it's the easiest way to pass this through the whole stack from executeQuery(...)
+                                                    /// to DatabaseOnDisk::commitCreateTable(...) or IStorage::alter(...) without changing
+                                                    /// thousands of signatures.
+                                                    /// And I hope it will be replaced with more common Transaction sometime.
+
     /// Use copy constructor or createGlobal() instead
     Context();
 
@@ -634,6 +642,9 @@ public:
     IHostContextPtr & getHostContext();
     const IHostContextPtr & getHostContext() const;
 
+    void initMetadataTransaction(MetadataTransactionPtr txn);
+    MetadataTransactionPtr getMetadataTransaction() const;
+
     struct MySQLWireContext
     {
         uint8_t sequence_id = 0;
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 51f09efd0bd..ba58fe3f42e 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -1,12 +1,14 @@
 #pragma once
 #include <Core/Types.h>
 #include <Interpreters/Cluster.h>
+#include <Common/ZooKeeper/Types.h>
 
 
 namespace DB
 {
 
 class ASTQueryWithOnCluster;
+using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
 
 struct HostID
 {
@@ -62,6 +64,8 @@ struct DDLTask
     String entry_path;
     DDLLogEntry entry;
 
+    bool we_are_initiator = false;
+
     /// Stage 2: resolve host_id and check that
     HostID host_id;
     String host_id_str;
@@ -82,7 +86,25 @@ struct DDLTask
     bool was_executed = false;
 
     /// Stage 4: commit results to ZooKeeper
+
+    String active_path;
+    String finished_path;
+    String shard_path;
 };
 
 
+struct MetadataTransaction
+{
+    ZooKeeperPtr current_zookeeper;
+    String zookeeper_path;
+    Coordination::Requests ops;
+
+
+
+    void addOps(Coordination::Requests & other_ops)
+    {
+        std::move(ops.begin(), ops.end(), std::back_inserter(other_ops));
+    }
+};
+
 }
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 51f0e1b45a9..5e4d79c32ab 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -252,13 +252,35 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
     String node_data;
     String entry_path = queue_dir + "/" + entry_name;
 
+    auto task = std::make_unique<DDLTask>();
+    task->entry_name = entry_name;
+    task->entry_path = entry_path;
+
     if (database_replicated_ext)
     {
-        auto expected_log_entry = DatabaseReplicatedExtensions::getLogEntryName(database_replicated_ext->first_not_executed);
-        if (entry_name != expected_log_entry)
+        //auto expected_log_entry = DatabaseReplicatedExtensions::getLogEntryName(database_replicated_ext->first_not_executed);
+        //if (entry_name != expected_log_entry)
+        //{
+        //    database_replicated_ext->lost_callback(entry_name, zookeeper);
+        //    out_reason = "DatabaseReplicated: expected " + expected_log_entry + " got " + entry_name;
+        //    return {};
+        //}
+
+        String initiator_name;
+        zkutil::EventPtr wait_committed_or_failed;
+
+        if (zookeeper->tryGet(entry_path + "/try", initiator_name, nullptr, wait_committed_or_failed))
         {
-            database_replicated_ext->lost_callback(entry_name, zookeeper);
-            out_reason = "DatabaseReplicated: expected " + expected_log_entry + " got " + entry_name;
+            task->we_are_initiator = initiator_name == database_replicated_ext->getFullReplicaName();
+            /// Query is not committed yet. We cannot just skip it and execute next one, because reordering may break replication.
+            //FIXME add some timeouts
+            if (!task->we_are_initiator)
+                wait_committed_or_failed->wait();
+        }
+
+        if (!task->we_are_initiator && !zookeeper->exists(entry_path + "/committed"))
+        {
+            out_reason = "Entry " + entry_name + " hasn't been committed";
             return {};
         }
     }
@@ -272,10 +294,6 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
         return {};
     }
 
-    auto task = std::make_unique<DDLTask>();
-    task->entry_name = entry_name;
-    task->entry_path = entry_path;
-
     try
     {
         task->entry.parse(node_data);
@@ -557,15 +575,34 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
     try
     {
         auto current_context = std::make_unique<Context>(context);
+        current_context->makeQueryContext();
+        current_context->setCurrentQueryId(""); // generate random query_id
+
         if (database_replicated_ext)
         {
             current_context->getClientInfo().query_kind
                 = ClientInfo::QueryKind::REPLICATED_LOG_QUERY; //FIXME why do we need separate query kind?
             current_context->setCurrentDatabase(database_replicated_ext->database_name);
+
+            if (task.we_are_initiator)
+            {
+                auto txn = std::make_shared<MetadataTransaction>();
+                current_context->initMetadataTransaction(txn);
+                txn->current_zookeeper = current_zookeeper;
+                txn->zookeeper_path = database_replicated_ext->zookeeper_path;
+                txn->ops.emplace_back(zkutil::makeRemoveRequest(task.entry_path + "/try", -1));
+                txn->ops.emplace_back(zkutil::makeCreateRequest(task.entry_path + "/committed",
+                                                                database_replicated_ext->getFullReplicaName(), zkutil::CreateMode::Persistent));
+                txn->ops.emplace_back(zkutil::makeRemoveRequest(task.active_path, -1));
+                if (!task.shard_path.empty())
+                    txn->ops.emplace_back(zkutil::makeCreateRequest(task.shard_path, task.host_id_str, zkutil::CreateMode::Persistent));
+                txn->ops.emplace_back(zkutil::makeCreateRequest(task.finished_path, task.execution_status.serializeText(), zkutil::CreateMode::Persistent));
+                //txn->ops.emplace_back(zkutil::makeSetRequest(database_replicated_ext->getReplicaPath() + "/log_ptr", toString(database_replicated_ext->first_not_executed), -1));
+            }
         }
         else
             current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
-        current_context->setCurrentQueryId(""); // generate random query_id
+
         executeQuery(istr, ostr, false, *current_context, {});
     }
     catch (...)
@@ -639,8 +676,9 @@ void DDLWorker::processTask(DDLTask & task)
     LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query);
 
     String dummy;
-    String active_node_path = task.entry_path + "/active/" + task.host_id_str;
-    String finished_node_path = task.entry_path + "/finished/" + task.host_id_str;
+    //FIXME duplicate
+    String active_node_path = task.active_path = task.entry_path + "/active/" + task.host_id_str;
+    String finished_node_path = task.finished_path = task.entry_path + "/finished/" + task.host_id_str;
 
     auto code = zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy);
 
@@ -712,11 +750,15 @@ void DDLWorker::processTask(DDLTask & task)
     ops.emplace_back(zkutil::makeCreateRequest(finished_node_path, task.execution_status.serializeText(), zkutil::CreateMode::Persistent));
     if (database_replicated_ext)
     {
-        assert(DatabaseReplicatedExtensions::getLogEntryName(database_replicated_ext->first_not_executed) == task.entry_name);
-        ops.emplace_back(zkutil::makeSetRequest(database_replicated_ext->getReplicaPath() + "/log_ptr", toString(database_replicated_ext->first_not_executed), -1));
+        //assert(DatabaseReplicatedExtensions::getLogEntryName(database_replicated_ext->first_not_executed) == task.entry_name);
+        //ops.emplace_back(zkutil::makeSetRequest(database_replicated_ext->getReplicaPath() + "/log_ptr", toString(database_replicated_ext->first_not_executed), -1));
     }
 
-    zookeeper->multi(ops);
+    //FIXME replace with multi(...) or use MetadataTransaction
+    Coordination::Responses responses;
+    auto res = zookeeper->tryMulti(ops, responses);
+    if (res != Coordination::Error::ZNODEEXISTS && res != Coordination::Error::ZNONODE)
+        zkutil::KeeperMultiException::check(res, ops, responses);
 
     if (database_replicated_ext)
     {
@@ -774,6 +816,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
     else
         shard_node_name = get_shard_name(task.cluster->getShardsAddresses().at(task.host_shard_num));
     String shard_path = node_path + "/shards/" + shard_node_name;
+    task.shard_path = shard_path; //FIXME duplicate
     String is_executed_path = shard_path + "/executed";
     String tries_to_execute_path = shard_path + "/tries_to_execute";
     zookeeper->createAncestors(shard_path + "/");
@@ -826,7 +869,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
             /// and on the next iteration new leader will take lock
             if (tryExecuteQuery(rewritten_query, task, task.execution_status))
             {
-                zookeeper->create(is_executed_path, task.host_id_str, zkutil::CreateMode::Persistent);
+                //FIXME replace with create(...) or remove and use MetadataTransaction
+                zookeeper->createIfNotExists(is_executed_path, task.host_id_str);
                 executed_by_leader = true;
                 break;
             }
@@ -976,7 +1020,27 @@ String DDLWorker::enqueueQuery(DDLLogEntry & entry)
     String query_path_prefix = queue_dir + "/query-";
     zookeeper->createAncestors(query_path_prefix);
 
-    String node_path = zookeeper->create(query_path_prefix, entry.toString(), zkutil::CreateMode::PersistentSequential);
+    String node_path;
+    if (database_replicated_ext)
+    {
+        /// We cannot create sequential node and it's ephemeral child in a single transaction, so allocate sequential number another way
+        String counter_prefix = database_replicated_ext->zookeeper_path + "/counter/cnt-";
+        String counter_path = zookeeper->create(counter_prefix, "", zkutil::CreateMode::EphemeralSequential);
+        node_path = query_path_prefix + counter_path.substr(counter_prefix.size());
+
+        Coordination::Requests ops;
+        /// Query is not committed yet, but we have to write it into log to avoid reordering
+        ops.emplace_back(zkutil::makeCreateRequest(node_path, entry.toString(), zkutil::CreateMode::Persistent));
+        /// '/try' will be replaced with '/committed' or will be removed due to expired session or other error
+        ops.emplace_back(zkutil::makeCreateRequest(node_path + "/try", database_replicated_ext->getFullReplicaName(), zkutil::CreateMode::Ephemeral));
+        /// We don't need it anymore
+        ops.emplace_back(zkutil::makeRemoveRequest(counter_path, -1));
+        zookeeper->multi(ops);
+    }
+    else
+    {
+        node_path = zookeeper->create(query_path_prefix, entry.toString(), zkutil::CreateMode::PersistentSequential);
+    }
 
     /// Optional step
     try
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 08bf641264e..86677bfbb19 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -54,6 +54,11 @@ struct DatabaseReplicatedExtensions
         return zookeeper_path + "/replicas/" + shard_name + "/" + replica_name;
     }
 
+    String getFullReplicaName() const
+    {
+        return shard_name + '|' + replica_name;
+    }
+
     static String getLogEntryName(UInt32 log_entry_number);
     static UInt32 getLogEntryNumber(const String & log_entry_name);
 };
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 6c56565a152..20980a186cb 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -505,7 +505,9 @@ void SystemLog<LogElement>::prepareTable()
 
             LOG_DEBUG(log, "Existing table {} for system log has obsolete or different structure. Renaming it to {}", description, backQuoteIfNeed(to.table));
 
-            InterpreterRenameQuery(rename, context).execute();
+            Context query_context = context;
+            query_context.makeQueryContext();
+            InterpreterRenameQuery(rename, query_context).execute();
 
             /// The required table will be created.
             table = nullptr;
@@ -521,7 +523,10 @@ void SystemLog<LogElement>::prepareTable()
 
         auto create = getCreateTableQuery();
 
-        InterpreterCreateQuery interpreter(create, context);
+
+        Context query_context = context;
+        query_context.makeQueryContext();
+        InterpreterCreateQuery interpreter(create, query_context);
         interpreter.setInternal(true);
         interpreter.execute();
 
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index b93500000b5..5c176de1395 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -46,6 +46,7 @@
 #include <Interpreters/InterpreterAlterQuery.h>
 #include <Interpreters/PartLog.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/DDLTask.h>
 
 #include <DataStreams/RemoteBlockInputStream.h>
 #include <DataStreams/copyData.h>
@@ -4104,6 +4105,12 @@ void StorageReplicatedMergeTree::alter(
                 zkutil::makeCreateRequest(mutations_path + "/", mutation_entry.toString(), zkutil::CreateMode::PersistentSequential));
         }
 
+        if (auto txn = query_context.getMetadataTransaction())
+        {
+            txn->addOps(ops);
+            //TODO maybe also change here table metadata in replicated database?
+        }
+
         Coordination::Responses results;
         Coordination::Error rc = zookeeper->tryMulti(ops, results);
 

From b3e2ebbaa5900f50eba8515f8cff682c3eaff2a5 Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Mon, 23 Nov 2020 08:19:38 +0300
Subject: [PATCH 0060/1238] Used global region for accessing S3 if can't
 determine exactly.

---
 src/IO/S3/PocoHTTPClient.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index 4a5b79e31ea..b8b78a38985 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -71,6 +71,10 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()
             boost::algorithm::to_lower(matched_region);
             region = matched_region;
         }
+        else
+        {
+            region = Aws::Region::AWS_GLOBAL;
+        }
     }
 }
 

From dad21ee684c5869d1c83b572cdec5c6f3bcb9130 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Tue, 24 Nov 2020 13:24:39 +0300
Subject: [PATCH 0061/1238] maintain metadata in zk

---
 src/Common/ZooKeeper/ZooKeeper.cpp            |  8 +++
 src/Databases/DatabaseAtomic.cpp              | 56 ++++++++++++++++-
 src/Databases/DatabaseAtomic.h                |  2 +-
 src/Databases/DatabaseOrdinary.cpp            |  4 +-
 src/Databases/DatabaseOrdinary.h              |  2 +-
 src/Databases/DatabaseReplicated.cpp          |  4 +-
 src/Interpreters/DDLWorker.cpp                | 24 +++-----
 src/Interpreters/InterpreterAlterQuery.cpp    |  4 +-
 src/Storages/StorageReplicatedMergeTree.cpp   | 30 ++++++++--
 .../test_replicated_database/test.py          | 60 +++++++++++--------
 10 files changed, 140 insertions(+), 54 deletions(-)

diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index bee875d1c74..09703e523bb 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -537,6 +537,14 @@ Coordination::Error ZooKeeper::trySet(const std::string & path, const std::strin
 
 Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses)
 {
+    String desc;
+    for (const auto & r : requests)
+    {
+        auto & r_ref = *r;
+        desc += String(typeid(r_ref).name()) + "\t" + r->getPath() + "\n";
+    }
+    LOG_TRACE(&Poco::Logger::get("ZKTX"), "zk multi {}", desc);
+
     if (requests.empty())
         return Coordination::Error::ZOK;
 
diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index 78400368924..ca39cefc5c8 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -4,6 +4,7 @@
 #include <Poco/Path.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
+#include <IO/ReadBufferFromFile.h>
 #include <Parsers/formatAST.h>
 #include <Common/renameat2.h>
 #include <Storages/StorageMaterializedView.h>
@@ -108,7 +109,7 @@ StoragePtr DatabaseAtomic::detachTable(const String & name)
     return table;
 }
 
-void DatabaseAtomic::dropTable(const Context &, const String & table_name, bool no_delay)
+void DatabaseAtomic::dropTable(const Context & context, const String & table_name, bool no_delay)
 {
     String table_metadata_path = getObjectMetadataPath(table_name);
     String table_metadata_path_drop;
@@ -117,6 +118,16 @@ void DatabaseAtomic::dropTable(const Context &, const String & table_name, bool
         std::unique_lock lock(mutex);
         table = getTableUnlocked(table_name, lock);
         table_metadata_path_drop = DatabaseCatalog::instance().getPathForDroppedMetadata(table->getStorageID());
+
+        if (auto txn = context.getMetadataTransaction())
+        {
+            String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_name);
+            txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+            txn->current_zookeeper->multi(txn->ops);     /// Commit point (a sort of) for Replicated database
+            /// NOTE: replica will be lost if server crashes before the following rename
+            /// TODO better detection and recovery
+        }
+
         Poco::File(table_metadata_path).renameTo(table_metadata_path_drop);    /// Mark table as dropped
         DatabaseWithDictionaries::detachTableUnlocked(table_name, lock);       /// Should never throw
         table_name_to_path.erase(table_name);
@@ -146,6 +157,8 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
 
     if (exchange && dictionary)
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot exchange dictionaries");
+    if (exchange && !supportsRenameat2())
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "RENAME EXCHANGE is not supported");
 
     auto & other_db = dynamic_cast<DatabaseAtomic &>(to_database);
     bool inside_database = this == &other_db;
@@ -231,6 +244,33 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
     }
 
     /// Table renaming actually begins here
+    if (auto txn = context.getMetadataTransaction())
+    {
+        String statement;
+        String statement_to;
+        {
+            ReadBufferFromFile in(old_metadata_path, 4096);
+            readStringUntilEOF(statement, in);
+            if (exchange)
+            {
+                ReadBufferFromFile in_to(new_metadata_path, 4096);
+                readStringUntilEOF(statement_to, in_to);
+            }
+        }
+        String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_name);
+        String metadata_zk_path_to = txn->zookeeper_path + "/metadata/" + escapeForFileName(to_table_name);
+        txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+        if (exchange)
+        {
+            txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path_to, -1));
+            txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path, statement_to, zkutil::CreateMode::Persistent));
+        }
+        txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path_to, statement, zkutil::CreateMode::Persistent));
+        txn->current_zookeeper->multi(txn->ops);     /// Commit point (a sort of) for Replicated database
+        /// NOTE: replica will be lost if server crashes before the following rename
+        /// TODO better detection and recovery
+    }
+
     if (exchange)
         renameExchange(old_metadata_path, new_metadata_path);
     else
@@ -312,7 +352,7 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora
         tryCreateSymlink(query.table, table_data_path);
 }
 
-void DatabaseAtomic::commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path)
+void DatabaseAtomic::commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path, const String & statement, const Context & query_context)
 {
     bool check_file_exists = true;
     SCOPE_EXIT({ std::error_code code; if (check_file_exists) std::filesystem::remove(table_metadata_tmp_path, code); });
@@ -323,6 +363,18 @@ void DatabaseAtomic::commitAlterTable(const StorageID & table_id, const String &
     if (table_id.uuid != actual_table_id.uuid)
         throw Exception("Cannot alter table because it was renamed", ErrorCodes::CANNOT_ASSIGN_ALTER);
 
+    if (&query_context != &query_context.getGlobalContext())    // FIXME
+    {
+        if (auto txn = query_context.getMetadataTransaction())
+        {
+            String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_id.table_name);
+            txn->ops.emplace_back(zkutil::makeSetRequest(metadata_zk_path, statement, -1));
+            txn->current_zookeeper->multi(txn->ops); /// Commit point (a sort of) for Replicated database
+            /// NOTE: replica will be lost if server crashes before the following rename
+            /// TODO better detection and recovery
+        }
+    }
+
     check_file_exists = renameExchangeIfSupported(table_metadata_tmp_path, table_metadata_path);
     if (!check_file_exists)
         std::filesystem::rename(table_metadata_tmp_path, table_metadata_path);
diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h
index 61ce2721701..9cc6a429656 100644
--- a/src/Databases/DatabaseAtomic.h
+++ b/src/Databases/DatabaseAtomic.h
@@ -61,7 +61,7 @@ public:
     void waitDetachedTableNotInUse(const UUID & uuid);
 
 protected:
-    void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path) override;
+    void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path, const String & statement, const Context & query_context) override;
     void commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
                            const String & table_metadata_tmp_path, const String & table_metadata_path, const Context & query_context) override;
 
diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp
index b363058c0c6..3df0d8fe907 100644
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@@ -312,10 +312,10 @@ void DatabaseOrdinary::alterTable(const Context & context, const StorageID & tab
         out.close();
     }
 
-    commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path);
+    commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path, statement, context);
 }
 
-void DatabaseOrdinary::commitAlterTable(const StorageID &, const String & table_metadata_tmp_path, const String & table_metadata_path)
+void DatabaseOrdinary::commitAlterTable(const StorageID &, const String & table_metadata_tmp_path, const String & table_metadata_path, const String & /*statement*/, const Context & /*query_context*/)
 {
     try
     {
diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h
index b5ea286ef15..6a21e19d5e2 100644
--- a/src/Databases/DatabaseOrdinary.h
+++ b/src/Databases/DatabaseOrdinary.h
@@ -30,7 +30,7 @@ public:
         const StorageInMemoryMetadata & metadata) override;
 
 protected:
-    virtual void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path);
+    virtual void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path, const String & statement, const Context & query_context);
 
     void startupTables(ThreadPool & thread_pool);
 };
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 608d03c339b..25fb95ba0de 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -146,8 +146,6 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt
     entry.query = {};
     entry.initiator = {};
 
-    recoverLostReplica(current_zookeeper, log_entry_to_execute, true);
-
     String query_path_prefix = zookeeper_path + "/log/query-";
     String counter_prefix = zookeeper_path + "/counter/cnt-";
     String counter_path = current_zookeeper->create(counter_prefix, "", zkutil::CreateMode::EphemeralSequential);
@@ -165,6 +163,8 @@ void DatabaseReplicated::loadStoredObjects(Context & context, bool has_force_res
 {
     DatabaseAtomic::loadStoredObjects(context, has_force_restore_data_flag, force_attach);
 
+    recoverLostReplica(global_context.getZooKeeper(), 0, true); //FIXME
+
     DatabaseReplicatedExtensions ext;
     ext.database_uuid = getUUID();
     ext.zookeeper_path = zookeeper_path;
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 5e4d79c32ab..099b968d895 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -258,16 +258,8 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
 
     if (database_replicated_ext)
     {
-        //auto expected_log_entry = DatabaseReplicatedExtensions::getLogEntryName(database_replicated_ext->first_not_executed);
-        //if (entry_name != expected_log_entry)
-        //{
-        //    database_replicated_ext->lost_callback(entry_name, zookeeper);
-        //    out_reason = "DatabaseReplicated: expected " + expected_log_entry + " got " + entry_name;
-        //    return {};
-        //}
-
         String initiator_name;
-        zkutil::EventPtr wait_committed_or_failed;
+        zkutil::EventPtr wait_committed_or_failed = std::make_shared<Poco::Event>();
 
         if (zookeeper->tryGet(entry_path + "/try", initiator_name, nullptr, wait_committed_or_failed))
         {
@@ -275,7 +267,10 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
             /// Query is not committed yet. We cannot just skip it and execute next one, because reordering may break replication.
             //FIXME add some timeouts
             if (!task->we_are_initiator)
+            {
+                LOG_TRACE(log, "Waiting for initiator {} to commit or rollback entry {}", initiator_name, entry_path);
                 wait_committed_or_failed->wait();
+            }
         }
 
         if (!task->we_are_initiator && !zookeeper->exists(entry_path + "/committed"))
@@ -378,7 +373,10 @@ void DDLWorker::scheduleTasks()
     Strings queue_nodes = zookeeper->getChildren(queue_dir, nullptr, queue_updated_event);
     filterAndSortQueueNodes(queue_nodes);
     if (queue_nodes.empty())
+    {
+        LOG_TRACE(log, "No tasks to schedule");
         return;
+    }
 
     bool server_startup = last_tasks.empty();
 
@@ -389,6 +387,7 @@ void DDLWorker::scheduleTasks()
     for (auto it = begin_node; it != queue_nodes.end() && !stop_flag; ++it)
     {
         String entry_name = *it;
+        LOG_TRACE(log, "Checking task {}", entry_name);
 
         String reason;
         auto task = initAndCheckTask(entry_name, reason, zookeeper);
@@ -748,11 +747,6 @@ void DDLWorker::processTask(DDLTask & task)
     Coordination::Requests ops;
     ops.emplace_back(zkutil::makeRemoveRequest(active_node_path, -1));
     ops.emplace_back(zkutil::makeCreateRequest(finished_node_path, task.execution_status.serializeText(), zkutil::CreateMode::Persistent));
-    if (database_replicated_ext)
-    {
-        //assert(DatabaseReplicatedExtensions::getLogEntryName(database_replicated_ext->first_not_executed) == task.entry_name);
-        //ops.emplace_back(zkutil::makeSetRequest(database_replicated_ext->getReplicaPath() + "/log_ptr", toString(database_replicated_ext->first_not_executed), -1));
-    }
 
     //FIXME replace with multi(...) or use MetadataTransaction
     Coordination::Responses responses;
@@ -816,8 +810,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
     else
         shard_node_name = get_shard_name(task.cluster->getShardsAddresses().at(task.host_shard_num));
     String shard_path = node_path + "/shards/" + shard_node_name;
-    task.shard_path = shard_path; //FIXME duplicate
     String is_executed_path = shard_path + "/executed";
+    task.shard_path = is_executed_path; //FIXME duplicate
     String tries_to_execute_path = shard_path + "/tries_to_execute";
     zookeeper->createAncestors(shard_path + "/");
 
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index c094bb8377c..5f6058b48c0 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -51,9 +51,11 @@ BlockIO InterpreterAlterQuery::execute()
     auto metadata_snapshot = table->getInMemoryMetadataPtr();
 
     DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
-    if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY && !table->supportsReplication())
+    if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
         return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
 
+    //FIXME commit MetadataTransaction for all ALTER kinds. Now its' implemented only for metadata alter.
+
     /// Add default database to table identifiers that we can encounter in e.g. default expressions,
     /// mutation expression, etc.
     AddDefaultDatabaseVisitor visitor(table_id.getDatabaseName());
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 5c176de1395..9db2821502d 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -30,6 +30,7 @@
 #include <Disks/StoragePolicy.h>
 
 #include <Databases/IDatabase.h>
+#include <Databases/DatabaseOnDisk.h>
 
 #include <Parsers/formatAST.h>
 #include <Parsers/ASTDropQuery.h>
@@ -4047,6 +4048,8 @@ void StorageReplicatedMergeTree::alter(
             future_metadata_in_zk.constraints = new_constraints_str;
 
         Coordination::Requests ops;
+        size_t alter_path_idx = std::numeric_limits<size_t>::max();
+        size_t mutation_path_idx = std::numeric_limits<size_t>::max();
 
         String new_metadata_str = future_metadata_in_zk.toString();
         ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/metadata", new_metadata_str, metadata_version));
@@ -4078,6 +4081,7 @@ void StorageReplicatedMergeTree::alter(
             *current_metadata, query_context.getSettingsRef().materialize_ttl_after_modify, query_context);
         alter_entry->have_mutation = !maybe_mutation_commands.empty();
 
+        alter_path_idx = ops.size();
         ops.emplace_back(zkutil::makeCreateRequest(
             zookeeper_path + "/log/log-", alter_entry->toString(), zkutil::CreateMode::PersistentSequential));
 
@@ -4101,6 +4105,7 @@ void StorageReplicatedMergeTree::alter(
             mutation_entry.create_time = time(nullptr);
 
             ops.emplace_back(zkutil::makeSetRequest(mutations_path, String(), mutations_stat.version));
+            mutation_path_idx = ops.size();
             ops.emplace_back(
                 zkutil::makeCreateRequest(mutations_path + "/", mutation_entry.toString(), zkutil::CreateMode::PersistentSequential));
         }
@@ -4108,7 +4113,24 @@ void StorageReplicatedMergeTree::alter(
         if (auto txn = query_context.getMetadataTransaction())
         {
             txn->addOps(ops);
-            //TODO maybe also change here table metadata in replicated database?
+            /// NOTE: IDatabase::alterTable(...) is called when executing ALTER_METADATA queue entry without query context,
+            /// so we have to update metadata of DatabaseReplicated here.
+            /// It also may cause "Table columns structure in ZooKeeper is different" error on server startup
+            /// even for Ordinary and Atomic databases.
+            String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_id.table_name);
+            auto ast = DatabaseCatalog::instance().getDatabase(table_id.database_name)->getCreateTableQuery(table_id.table_name, query_context);
+            auto & ast_create_query = ast->as<ASTCreateQuery &>();
+
+            //FIXME copy-paste
+            ASTPtr new_columns = InterpreterCreateQuery::formatColumns(future_metadata.columns);
+            ASTPtr new_indices = InterpreterCreateQuery::formatIndices(future_metadata.secondary_indices);
+            ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(future_metadata.constraints);
+
+            ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
+            ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
+            ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
+
+            ops.emplace_back(zkutil::makeSetRequest(metadata_zk_path, getObjectDefinitionFromCreateQuery(ast), -1));
         }
 
         Coordination::Responses results;
@@ -4124,17 +4146,17 @@ void StorageReplicatedMergeTree::alter(
             if (alter_entry->have_mutation)
             {
                 /// ALTER_METADATA record in replication /log
-                String alter_path = dynamic_cast<const Coordination::CreateResponse &>(*results[2]).path_created;
+                String alter_path = dynamic_cast<const Coordination::CreateResponse &>(*results[alter_path_idx]).path_created;
                 alter_entry->znode_name = alter_path.substr(alter_path.find_last_of('/') + 1);
 
                 /// ReplicatedMergeTreeMutationEntry record in /mutations
-                String mutation_path = dynamic_cast<const Coordination::CreateResponse &>(*results.back()).path_created;
+                String mutation_path = dynamic_cast<const Coordination::CreateResponse &>(*results[mutation_path_idx]).path_created;
                 mutation_znode = mutation_path.substr(mutation_path.find_last_of('/') + 1);
             }
             else
             {
                 /// ALTER_METADATA record in replication /log
-                String alter_path = dynamic_cast<const Coordination::CreateResponse &>(*results.back()).path_created;
+                String alter_path = dynamic_cast<const Coordination::CreateResponse &>(*results[alter_path_idx]).path_created;
                 alter_entry->znode_name = alter_path.substr(alter_path.find_last_of('/') + 1);
             }
             break;
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 06d8aa9467a..11bfbad393b 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -16,7 +16,7 @@ snapshot_recovering_node = cluster.add_instance('snapshot_recovering_node', main
 uuid_regex = re.compile("[0-9a-f]{8}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{12}")
 def assert_create_query(nodes, table_name, expected):
     replace_uuid = lambda x: re.sub(uuid_regex, "uuid", x)
-    query = "show create table testdb.{}".format(table_name)
+    query = "show create table {}".format(table_name)
     for node in nodes:
         assert_eq_with_retry(node, query, expected, get_result=replace_uuid)
 
@@ -41,45 +41,53 @@ def test_create_replicated_table(started_cluster):
     expected = "CREATE TABLE testdb.replicated_table\\n(\\n    `d` Date,\\n    `k` UInt64,\\n    `i32` Int32\\n)\\n" \
                "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')\\n" \
                "PARTITION BY toYYYYMM(d)\\nORDER BY k\\nSETTINGS index_granularity = 8192"
-    assert_create_query([main_node, dummy_node], "replicated_table", expected)
+    assert_create_query([main_node, dummy_node], "testdb.replicated_table", expected)
     # assert without replacing uuid
     assert main_node.query("show create testdb.replicated_table") == dummy_node.query("show create testdb.replicated_table")
 
-def test_simple_alter_table(started_cluster):
-    #TODO add test with ReplicatedMergeTree
-    main_node.query("CREATE TABLE testdb.alter_test "
+@pytest.mark.parametrize("engine", ['MergeTree', 'ReplicatedMergeTree'])
+def test_simple_alter_table(started_cluster, engine):
+    name  = "testdb.alter_test_{}".format(engine)
+    main_node.query("CREATE TABLE {} "
                     "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
-                    "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
-    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added0 UInt32;")
-    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added2 UInt32;")
-    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added1 UInt32 AFTER Added0;")
-    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;")
-    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;")
-    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;")
+                    "ENGINE = {} PARTITION BY StartDate ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID);".format(name, engine))
+    main_node.query("ALTER TABLE {} ADD COLUMN Added0 UInt32;".format(name))
+    main_node.query("ALTER TABLE {} ADD COLUMN Added2 UInt32;".format(name))
+    main_node.query("ALTER TABLE {} ADD COLUMN Added1 UInt32 AFTER Added0;".format(name))
+    main_node.query("ALTER TABLE {} ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;".format(name))
+    main_node.query("ALTER TABLE {} ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;".format(name))
+    main_node.query("ALTER TABLE {} ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;".format(name))
 
-    expected = "CREATE TABLE testdb.alter_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+    full_engine = engine if not "Replicated" in engine else engine + "(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')"
+    expected = "CREATE TABLE {}\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
                "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n" \
                "    `ToDrop` UInt32,\\n    `Added0` UInt32,\\n    `Added1` UInt32,\\n    `Added2` UInt32,\\n" \
                "    `AddedNested1.A` Array(UInt32),\\n    `AddedNested1.B` Array(UInt64),\\n    `AddedNested1.C` Array(String),\\n" \
                "    `AddedNested2.A` Array(UInt32),\\n    `AddedNested2.B` Array(UInt64)\\n)\\n" \
-               "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
+               "ENGINE = {}\\nPARTITION BY StartDate\\nORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)\\n" \
+               "SETTINGS index_granularity = 8192".format(name, full_engine)
 
-    assert_create_query([main_node, dummy_node], "alter_test", expected)
+    assert_create_query([main_node, dummy_node], name, expected)
 
-def test_create_replica_after_delay(started_cluster):
+
+@pytest.mark.parametrize("engine", ['MergeTree', 'ReplicatedMergeTree'])
+def test_create_replica_after_delay(started_cluster, engine):
     competing_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica3');")
 
-    main_node.query("ALTER TABLE testdb.alter_test ADD COLUMN Added3 UInt32;")
-    main_node.query("ALTER TABLE testdb.alter_test DROP COLUMN AddedNested1;")
-    main_node.query("ALTER TABLE testdb.alter_test RENAME COLUMN Added1 TO AddedNested1;")
+    name  = "testdb.alter_test_{}".format(engine)
+    main_node.query("ALTER TABLE {} ADD COLUMN Added3 UInt32;".format(name))
+    main_node.query("ALTER TABLE {} DROP COLUMN AddedNested1;".format(name))
+    main_node.query("ALTER TABLE {} RENAME COLUMN Added1 TO AddedNested1;".format(name))
 
-    expected = "CREATE TABLE testdb.alter_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
+    full_engine = engine if not "Replicated" in engine else engine + "(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')"
+    expected = "CREATE TABLE {}\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
                "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n" \
                "    `ToDrop` UInt32,\\n    `Added0` UInt32,\\n    `AddedNested1` UInt32,\\n    `Added2` UInt32,\\n" \
                "    `AddedNested2.A` Array(UInt32),\\n    `AddedNested2.B` Array(UInt64),\\n    `Added3` UInt32\\n)\\n" \
-               "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
+               "ENGINE = {}\\nPARTITION BY StartDate\\nORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)\\n" \
+               "SETTINGS index_granularity = 8192".format(name, full_engine)
 
-    assert_create_query([main_node, dummy_node, competing_node], "alter_test", expected)
+    assert_create_query([main_node, dummy_node, competing_node], name, expected)
 
 def test_alters_from_different_replicas(started_cluster):
     main_node.query("CREATE TABLE testdb.concurrent_test "
@@ -103,7 +111,7 @@ def test_alters_from_different_replicas(started_cluster):
                "    `AddedNested2.B` Array(UInt64)\\n)\\n" \
                "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
 
-    assert_create_query([main_node, competing_node], "concurrent_test", expected)
+    assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
 
 def test_drop_and_create_table(started_cluster):
     main_node.query("DROP TABLE testdb.concurrent_test")
@@ -115,7 +123,7 @@ def test_drop_and_create_table(started_cluster):
                "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n" \
                "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
 
-    assert_create_query([main_node, competing_node], "concurrent_test", expected)
+    assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
 
 def test_replica_restart(started_cluster):
     main_node.restart_clickhouse()
@@ -124,7 +132,7 @@ def test_replica_restart(started_cluster):
                "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n" \
                "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
 
-    assert_create_query([main_node, competing_node], "concurrent_test", expected)
+    assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
 
 def test_snapshot_and_snapshot_recover(started_cluster):
     #FIXME bad test
@@ -142,7 +150,7 @@ def test_drop_and_create_replica(started_cluster):
                "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n" \
                "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
 
-    assert_create_query([main_node, competing_node], "concurrent_test", expected)
+    assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
 
 #TODO tests with Distributed
 

From f1a52a609bd6ced447fbb2cb4102675c798e32c0 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Fri, 27 Nov 2020 17:04:03 +0300
Subject: [PATCH 0062/1238] separate DatabaseReplicatedDDLWorker

---
 src/Databases/DatabaseAtomic.cpp              |   4 +-
 src/Databases/DatabaseAtomic.h                |   4 +-
 src/Databases/DatabaseLazy.cpp                |   2 +-
 src/Databases/DatabaseLazy.h                  |   2 +-
 src/Databases/DatabaseOnDisk.cpp              |   2 +-
 src/Databases/DatabaseOnDisk.h                |   2 +-
 src/Databases/DatabaseOrdinary.cpp            |   4 +-
 src/Databases/DatabaseOrdinary.h              |   4 +-
 src/Databases/DatabaseReplicated.cpp          |  91 +++--
 src/Databases/DatabaseReplicated.h            |  13 +-
 src/Databases/DatabaseReplicatedWorker.cpp    | 114 ++++++
 src/Databases/DatabaseReplicatedWorker.h      |  26 ++
 src/Databases/DatabaseWithDictionaries.cpp    |   2 +-
 src/Databases/DatabaseWithDictionaries.h      |   2 +-
 src/Interpreters/Context.cpp                  |   3 +-
 src/Interpreters/DDLTask.cpp                  | 280 +++++++++++++
 src/Interpreters/DDLTask.h                    |  85 +++-
 src/Interpreters/DDLWorker.cpp                | 371 ++----------------
 src/Interpreters/DDLWorker.h                  |  64 +--
 .../configs/config.xml                        |   3 +
 .../configs/disable_snapshots.xml             |   3 -
 .../configs/snapshot_each_query.xml           |   3 -
 .../test_replicated_database/test.py          |  21 +-
 23 files changed, 639 insertions(+), 466 deletions(-)
 create mode 100644 src/Databases/DatabaseReplicatedWorker.cpp
 create mode 100644 src/Databases/DatabaseReplicatedWorker.h
 create mode 100644 tests/integration/test_replicated_database/configs/config.xml
 delete mode 100644 tests/integration/test_replicated_database/configs/disable_snapshots.xml
 delete mode 100644 tests/integration/test_replicated_database/configs/snapshot_each_query.xml

diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index ca39cefc5c8..a444d9cc200 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -38,12 +38,12 @@ public:
     UUID uuid() const override { return table()->getStorageID().uuid; }
 };
 
-DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, Context & context_)
+DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const Context & context_)
     : DatabaseAtomic(name_, metadata_path_, uuid, "DatabaseAtomic (" + name_ + ")", context_)
 {
 }
 
-DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger, Context & context_)
+DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger, const Context & context_)
     : DatabaseOrdinary(name_, std::move(metadata_path_), "store/", logger, context_)
     , path_to_table_symlinks(global_context.getPath() + "data/" + escapeForFileName(name_) + "/")
     , path_to_metadata_symlink(global_context.getPath() + "metadata/" + escapeForFileName(name_))
diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h
index 9cc6a429656..e9cb418c787 100644
--- a/src/Databases/DatabaseAtomic.h
+++ b/src/Databases/DatabaseAtomic.h
@@ -20,8 +20,8 @@ namespace DB
 class DatabaseAtomic : public DatabaseOrdinary
 {
 public:
-    DatabaseAtomic(String name_, String metadata_path_, UUID uuid, Context & context_);
-    DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger, Context & context_);
+    DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const Context & context_);
+    DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger, const Context & context_);
 
     String getEngineName() const override { return "Atomic"; }
     UUID getUUID() const override { return db_uuid; }
diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index a4ace4bde9b..0119f17f843 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -27,7 +27,7 @@ namespace ErrorCodes
 }
 
 
-DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, Context & context_)
+DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, const Context & context_)
     : DatabaseOnDisk(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseLazy (" + name_ + ")", context_)
     , expiration_time(expiration_time_)
 {
diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h
index 0893b085fae..2d091297c91 100644
--- a/src/Databases/DatabaseLazy.h
+++ b/src/Databases/DatabaseLazy.h
@@ -18,7 +18,7 @@ class Context;
 class DatabaseLazy final : public DatabaseOnDisk
 {
 public:
-    DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, Context & context_);
+    DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, const Context & context_);
 
     String getEngineName() const override { return "Lazy"; }
 
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index 8f24f53fc3f..18941ba7c04 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -131,7 +131,7 @@ DatabaseOnDisk::DatabaseOnDisk(
     const String & metadata_path_,
     const String & data_path_,
     const String & logger,
-    Context & context)
+    const Context & context)
     : DatabaseWithOwnTablesBase(name, logger, context)
     , metadata_path(metadata_path_)
     , data_path(data_path_)
diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h
index a5510ef4810..f5b9ea0c0d5 100644
--- a/src/Databases/DatabaseOnDisk.h
+++ b/src/Databases/DatabaseOnDisk.h
@@ -31,7 +31,7 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query);
 class DatabaseOnDisk : public DatabaseWithOwnTablesBase
 {
 public:
-    DatabaseOnDisk(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, Context & context);
+    DatabaseOnDisk(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, const Context & context);
 
     void createTable(
         const Context & context,
diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp
index aaceb640213..470c9e7db29 100644
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@@ -99,13 +99,13 @@ namespace
 }
 
 
-DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, Context & context_)
+DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, const Context & context_)
     : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseOrdinary (" + name_ + ")", context_)
 {
 }
 
 DatabaseOrdinary::DatabaseOrdinary(
-    const String & name_, const String & metadata_path_, const String & data_path_, const String & logger, Context & context_)
+    const String & name_, const String & metadata_path_, const String & data_path_, const String & logger, const Context & context_)
     : DatabaseWithDictionaries(name_, metadata_path_, data_path_, logger, context_)
 {
 }
diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h
index 6a21e19d5e2..c1ad32345f6 100644
--- a/src/Databases/DatabaseOrdinary.h
+++ b/src/Databases/DatabaseOrdinary.h
@@ -14,8 +14,8 @@ namespace DB
 class DatabaseOrdinary : public DatabaseWithDictionaries
 {
 public:
-    DatabaseOrdinary(const String & name_, const String & metadata_path_, Context & context);
-    DatabaseOrdinary(const String & name_, const String & metadata_path_, const String & data_path_, const String & logger, Context & context_);
+    DatabaseOrdinary(const String & name_, const String & metadata_path_, const Context & context);
+    DatabaseOrdinary(const String & name_, const String & metadata_path_, const String & data_path_, const String & logger, const Context & context_);
 
     String getEngineName() const override { return "Ordinary"; }
 
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 25fb95ba0de..eef1b98afe2 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -13,12 +13,16 @@
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/ZooKeeper/Types.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
-#include <Interpreters/DDLWorker.h>
+#include <Databases/DatabaseReplicatedWorker.h>
 #include <Interpreters/DDLTask.h>
 #include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Interpreters/Cluster.h>
 #include <common/getFQDNOrHostName.h>
 #include <Parsers/ASTAlterQuery.h>
+#include <Parsers/ParserCreateQuery.h>
+#include <Parsers/parseQuery.h>
+#include <Interpreters/InterpreterCreateQuery.h>
+#include <Parsers/formatAST.h>
 
 namespace DB
 {
@@ -52,7 +56,7 @@ DatabaseReplicated::DatabaseReplicated(
     const String & zookeeper_path_,
     const String & shard_name_,
     const String & replica_name_,
-    Context & context_)
+    const Context & context_)
     : DatabaseAtomic(name_, metadata_path_, uuid, "DatabaseReplicated (" + name_ + ")", context_)
     , zookeeper_path(zookeeper_path_)
     , shard_name(shard_name_)
@@ -116,8 +120,11 @@ bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperP
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "", zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "", zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/counter", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/counter/cnt-", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/counter/cnt-", -1));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", "", zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/min_log_ptr", "0", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/min_log_ptr", "1", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/max_log_ptr", "1", zkutil::CreateMode::Persistent));
 
     Coordination::Responses responses;
     auto res = current_zookeeper->tryMulti(ops, responses);
@@ -128,6 +135,7 @@ bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperP
 
     zkutil::KeeperMultiException::check(res, ops, responses);
     assert(false);
+    __builtin_unreachable();
 }
 
 void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper)
@@ -135,7 +143,7 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt
     current_zookeeper->createAncestors(replica_path);
 
     /// When creating new replica, use latest snapshot version as initial value of log_pointer
-    log_entry_to_execute = 0;   //FIXME
+    //log_entry_to_execute = 0;   //FIXME
 
     /// Write host name to replica_path, it will protect from multiple replicas with the same name
     auto host_id = getHostID(global_context);
@@ -153,8 +161,8 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt
 
     Coordination::Requests ops;
     ops.emplace_back(zkutil::makeCreateRequest(replica_path, host_id, zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_ptr", toString(log_entry_to_execute), zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(query_path, entry.toString(), zkutil::CreateMode::PersistentSequential));
+    ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_ptr", "0", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(query_path, entry.toString(), zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeRemoveRequest(counter_path, -1));
     current_zookeeper->multi(ops);
 }
@@ -163,22 +171,9 @@ void DatabaseReplicated::loadStoredObjects(Context & context, bool has_force_res
 {
     DatabaseAtomic::loadStoredObjects(context, has_force_restore_data_flag, force_attach);
 
-    recoverLostReplica(global_context.getZooKeeper(), 0, true); //FIXME
+    //recoverLostReplica(global_context.getZooKeeper(), 0, true); //FIXME
 
-    DatabaseReplicatedExtensions ext;
-    ext.database_uuid = getUUID();
-    ext.zookeeper_path = zookeeper_path;
-    ext.database_name = getDatabaseName();
-    ext.shard_name = shard_name;
-    ext.replica_name = replica_name;
-    ext.first_not_executed = log_entry_to_execute;
-    ext.lost_callback     = [this] (const String & entry_name, const ZooKeeperPtr & zookeeper) { onUnexpectedLogEntry(entry_name, zookeeper); };
-    ext.executed_callback = [this] (const String & entry_name, const ZooKeeperPtr & zookeeper) { onExecutedLogEntry(entry_name, zookeeper); };
-
-    /// Pool size must be 1 (to avoid reordering of log entries)
-    constexpr size_t pool_size = 1;
-    ddl_worker = std::make_unique<DDLWorker>(pool_size, zookeeper_path + "/log", global_context, nullptr, "",
-                                             std::make_optional<DatabaseReplicatedExtensions>(std::move(ext)));
+    ddl_worker = std::make_unique<DatabaseReplicatedDDLWorker>(this, global_context);
 }
 
 void DatabaseReplicated::onUnexpectedLogEntry(const String & entry_name, const ZooKeeperPtr & zookeeper)
@@ -314,48 +309,68 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query)
 }
 
 
-void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 from_snapshot, bool create)
+void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 from_snapshot, bool /*create*/)
 {
-    LOG_WARNING(log, "Will recover replica from snapshot", from_snapshot);
+    LOG_WARNING(log, "Will recover replica");
 
     //FIXME drop old tables
 
     String snapshot_metadata_path = zookeeper_path + "/metadata";
     Strings tables_in_snapshot = current_zookeeper->getChildren(snapshot_metadata_path);
     snapshot_metadata_path += '/';
+    from_snapshot = parse<UInt32>(current_zookeeper->get(zookeeper_path + "/max_log_ptr"));
 
     for (const auto & table_name : tables_in_snapshot)
     {
         //FIXME It's not atomic. We need multiget here (available since ZooKeeper 3.6.0).
-        String query_to_execute = current_zookeeper->get(snapshot_metadata_path + table_name);
+        String query_text = current_zookeeper->get(snapshot_metadata_path + table_name);
+        auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, query_text);
 
+        Context query_context = global_context;
+        query_context.makeQueryContext();
+        query_context.getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
+        query_context.setCurrentDatabase(database_name);
+        query_context.setCurrentQueryId(""); // generate random query_id
 
-        if (!startsWith(query_to_execute, "ATTACH "))
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected query: {}", query_to_execute);
-        query_to_execute = "CREATE " + query_to_execute.substr(strlen("ATTACH "));
+        //FIXME
+        DatabaseCatalog::instance().waitTableFinallyDropped(query_ast->as<ASTCreateQuery>()->uuid);
 
-        Context current_context = global_context;
-        current_context.getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
-        current_context.setCurrentDatabase(database_name);
-        current_context.setCurrentQueryId(""); // generate random query_id
-
-        executeQuery(query_to_execute, current_context);
+        LOG_INFO(log, "Executing {}", serializeAST(*query_ast));
+        InterpreterCreateQuery(query_ast, query_context).execute();
     }
 
-    if (create)
-        return;
+    //if (create)
+    //    return;
 
-    current_zookeeper->set(replica_path + "/log-ptr", toString(from_snapshot));
+    current_zookeeper->set(replica_path + "/log_ptr", toString(from_snapshot));
     last_executed_log_entry = from_snapshot;
-    ddl_worker->setLogPointer(from_snapshot); //FIXME
+    //ddl_worker->setLogPointer(from_snapshot); //FIXME
 
     //writeLastExecutedToDiskAndZK();
 }
 
+ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query)
+{
+    ParserCreateQuery parser;
+    String description = "in ZooKeeper " + zookeeper_path + "/metadata/" + node_name;
+    auto ast = parseQuery(parser, query, description, 0, global_context.getSettingsRef().max_parser_depth);
+
+    auto & create = ast->as<ASTCreateQuery &>();
+    if (create.uuid == UUIDHelpers::Nil || create.table != TABLE_WITH_UUID_NAME_PLACEHOLDER || ! create.database.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected query from {}: {}", node_name, query);
+
+    create.database = getDatabaseName();
+    create.table = unescapeForFileName(node_name);
+    create.attach = false;
+
+    return ast;
+}
+
 void DatabaseReplicated::drop(const Context & context_)
 {
     auto current_zookeeper = getZooKeeper();
-    current_zookeeper->tryRemove(zookeeper_path + "/replicas/" + replica_name);
+    current_zookeeper->set(replica_path, "DROPPED");
+    current_zookeeper->tryRemoveRecursive(replica_path);
     DatabaseAtomic::drop(context_);
 }
 
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 663df59ac63..d6cd93773cf 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -12,7 +12,7 @@
 namespace DB
 {
 
-class DDLWorker;
+class DatabaseReplicatedDDLWorker;
 using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
 
 /** DatabaseReplicated engine
@@ -42,7 +42,7 @@ class DatabaseReplicated : public DatabaseAtomic
 public:
     DatabaseReplicated(const String & name_, const String & metadata_path_, UUID uuid,
                        const String & zookeeper_path_, const String & shard_name_, const String & replica_name_,
-                       Context & context);
+                       const Context & context);
 
     ~DatabaseReplicated() override;
 
@@ -56,6 +56,11 @@ public:
 
     void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach = false) override;
 
+    String getFullReplicaName() const { return shard_name + '|' + replica_name; }
+
+    //FIXME
+    friend struct DatabaseReplicatedTask;
+    friend class DatabaseReplicatedDDLWorker;
 private:
     bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
     void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
@@ -72,6 +77,8 @@ private:
 
     void onExecutedLogEntry(const String & entry_name, const ZooKeeperPtr & zookeeper);
 
+    ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query);
+
     String zookeeper_path;
     String shard_name;
     String replica_name;
@@ -88,7 +95,7 @@ private:
 
     zkutil::ZooKeeperPtr getZooKeeper() const;
 
-    std::unique_ptr<DDLWorker> ddl_worker;
+    std::unique_ptr<DatabaseReplicatedDDLWorker> ddl_worker;
 
 
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
new file mode 100644
index 00000000000..869b888d3ad
--- /dev/null
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -0,0 +1,114 @@
+#include <Databases/DatabaseReplicatedWorker.h>
+#include <Databases/DatabaseReplicated.h>
+#include <Interpreters/DDLTask.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db, const Context & context_)
+    : DDLWorker(/* pool_size */ 1, db->zookeeper_path + "/log", context_, nullptr, {}, fmt::format("DDLWorker({})", db->getDatabaseName()))
+    , database(db)
+{
+    /// Pool size must be 1 (to avoid reordering of log entries)
+}
+
+void DatabaseReplicatedDDLWorker::initialize()
+{
+    /// Check if we need to recover replica.
+    /// Invariant: replica is lost if it's log_ptr value is less then min_log_ptr value.
+
+    UInt32 our_log_ptr = parse<UInt32>(current_zookeeper->get(database->replica_path + "/log_ptr"));
+    UInt32 min_log_ptr = parse<UInt32>(current_zookeeper->get(database->zookeeper_path + "/min_log_ptr"));
+    if (our_log_ptr < min_log_ptr)
+        database->recoverLostReplica(current_zookeeper, 0);
+}
+
+String DatabaseReplicatedDDLWorker::enqueueQuery(DDLLogEntry & entry)
+{
+    auto zookeeper = getAndSetZooKeeper();
+    const String query_path_prefix = queue_dir + "/query-";
+
+    /// We cannot create sequential node and it's ephemeral child in a single transaction, so allocate sequential number another way
+    String counter_prefix = database->zookeeper_path + "/counter/cnt-";
+    String counter_path = zookeeper->create(counter_prefix, "", zkutil::CreateMode::EphemeralSequential);
+    String node_path = query_path_prefix + counter_path.substr(counter_prefix.size());
+
+    Coordination::Requests ops;
+    /// Query is not committed yet, but we have to write it into log to avoid reordering
+    ops.emplace_back(zkutil::makeCreateRequest(node_path, entry.toString(), zkutil::CreateMode::Persistent));
+    /// '/try' will be replaced with '/committed' or will be removed due to expired session or other error
+    ops.emplace_back(zkutil::makeCreateRequest(node_path + "/try", database->getFullReplicaName(), zkutil::CreateMode::Ephemeral));
+    /// We don't need it anymore
+    ops.emplace_back(zkutil::makeRemoveRequest(counter_path, -1));
+    /// Create status dirs
+    ops.emplace_back(zkutil::makeCreateRequest(node_path + "/active", "", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(node_path + "/finished", "", zkutil::CreateMode::Persistent));
+    zookeeper->multi(ops);
+
+    return node_path;
+}
+
+DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper)
+{
+    UInt32 our_log_ptr = parse<UInt32>(current_zookeeper->get(database->replica_path + "/log_ptr"));
+    UInt32 entry_num = DatabaseReplicatedTask::getLogEntryNumber(entry_name);
+
+    if (entry_num <= our_log_ptr)
+    {
+        out_reason = fmt::format("Task {} already executed according to log pointer {}", entry_name, our_log_ptr);
+        return {};
+    }
+
+    String entry_path = queue_dir + "/" + entry_name;
+    auto task = std::make_unique<DatabaseReplicatedTask>(entry_name, entry_path, database);
+
+    String initiator_name;
+    zkutil::EventPtr wait_committed_or_failed = std::make_shared<Poco::Event>();
+
+    if (zookeeper->tryGet(entry_path + "/try", initiator_name, nullptr, wait_committed_or_failed))
+    {
+        task->we_are_initiator = initiator_name == task->host_id_str;
+        /// Query is not committed yet. We cannot just skip it and execute next one, because reordering may break replication.
+        //FIXME add some timeouts
+        if (!task->we_are_initiator)
+        {
+            LOG_TRACE(log, "Waiting for initiator {} to commit or rollback entry {}", initiator_name, entry_path);
+            wait_committed_or_failed->wait();
+        }
+    }
+
+    if (!task->we_are_initiator && !zookeeper->exists(entry_path + "/committed"))
+    {
+        out_reason = "Entry " + entry_name + " hasn't been committed";
+        return {};
+    }
+
+    String node_data;
+    if (!zookeeper->tryGet(entry_path, node_data))
+    {
+        LOG_ERROR(log, "Cannot get log entry {}", entry_path);
+        database->onUnexpectedLogEntry(entry_name, zookeeper);
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "should be unreachable");
+    }
+
+    auto error = task->tryParseEntry(node_data);
+    if (error)
+    {
+        LOG_ERROR(log, "Cannot parse query from '{}': {}", node_data, *error);
+        database->onUnexpectedLogEntry(entry_name, zookeeper);
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "should be unreachable");
+    }
+
+    task->parseQueryFromEntry(context);
+
+    return task;
+}
+
+
+
+}
diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h
new file mode 100644
index 00000000000..d190bd1795d
--- /dev/null
+++ b/src/Databases/DatabaseReplicatedWorker.h
@@ -0,0 +1,26 @@
+#pragma once
+#include <Interpreters/DDLWorker.h>
+
+
+namespace DB
+{
+
+class DatabaseReplicated;
+
+class DatabaseReplicatedDDLWorker : public DDLWorker
+{
+public:
+    DatabaseReplicatedDDLWorker(DatabaseReplicated * db, const Context & context_);
+
+    String enqueueQuery(DDLLogEntry & entry) override;
+
+private:
+    void initialize() override;
+
+    DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper) override;
+
+    DatabaseReplicated * database;
+
+};
+
+}
diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp
index da7f7f9b83e..ee16f4ae15e 100644
--- a/src/Databases/DatabaseWithDictionaries.cpp
+++ b/src/Databases/DatabaseWithDictionaries.cpp
@@ -349,7 +349,7 @@ void DatabaseWithDictionaries::shutdown()
 
 
 DatabaseWithDictionaries::DatabaseWithDictionaries(
-    const String & name, const String & metadata_path_, const String & data_path_, const String & logger, Context & context)
+    const String & name, const String & metadata_path_, const String & data_path_, const String & logger, const Context & context)
     : DatabaseOnDisk(name, metadata_path_, data_path_, logger, context)
     , external_loader(context.getExternalDictionariesLoader())
 {
diff --git a/src/Databases/DatabaseWithDictionaries.h b/src/Databases/DatabaseWithDictionaries.h
index 36cee18e4db..d69289d7456 100644
--- a/src/Databases/DatabaseWithDictionaries.h
+++ b/src/Databases/DatabaseWithDictionaries.h
@@ -38,7 +38,7 @@ public:
     ~DatabaseWithDictionaries() override;
 
 protected:
-    DatabaseWithDictionaries(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, Context & context);
+    DatabaseWithDictionaries(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, const Context & context);
 
     ASTPtr getCreateDictionaryQueryImpl(const String & dictionary_name, bool throw_on_error) const override;
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 04bd6b37280..b9283935ec9 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2437,7 +2437,8 @@ void Context::initMetadataTransaction(MetadataTransactionPtr txn)
 
 MetadataTransactionPtr Context::getMetadataTransaction() const
 {
-    assert(query_context == this);
+    //FIXME
+    //assert(query_context == this);
     return metadata_transaction;
 }
 
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index dfb8f5ff746..0bc98dfd0dd 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -6,6 +6,12 @@
 #include <IO/Operators.h>
 #include <IO/ReadBufferFromString.h>
 #include <Poco/Net/NetException.h>
+#include <common/logger_useful.h>
+#include <Parsers/ParserQuery.h>
+#include <Parsers/parseQuery.h>
+#include <Parsers/ASTQueryWithOnCluster.h>
+#include <Parsers/ASTQueryWithTableAndOutput.h>
+#include <Databases/DatabaseReplicated.h>
 
 namespace DB
 {
@@ -13,6 +19,8 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int UNKNOWN_FORMAT_VERSION;
+    extern const int UNKNOWN_TYPE_OF_QUERY;
+    extern const int INCONSISTENT_CLUSTER_DEFINITION;
 }
 
 HostID HostID::fromString(const String & host_port_str)
@@ -78,4 +86,276 @@ void DDLLogEntry::parse(const String & data)
 }
 
 
+std::optional<String> DDLTaskBase::tryParseEntry(const String & data)
+{
+    std::optional<String> error;
+    try
+    {
+        entry.parse(data);
+    }
+    catch (...)
+    {
+        error = ExecutionStatus::fromCurrentException().serializeText();
+    }
+    return error;
+}
+
+void DDLTaskBase::parseQueryFromEntry(const Context & context)
+{
+    const char * begin = entry.query.data();
+    const char * end = begin + entry.query.size();
+
+    ParserQuery parser_query(end);
+    String description;
+    query = parseQuery(parser_query, begin, end, description, 0, context.getSettingsRef().max_parser_depth);
+}
+
+std::unique_ptr<Context> DDLTaskBase::makeQueryContext(Context & from_context) const
+{
+    auto query_context = std::make_unique<Context>(from_context);
+    query_context->makeQueryContext();
+    query_context->setCurrentQueryId(""); // generate random query_id
+    query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
+    return query_context;
+}
+
+
+bool DDLTask::findCurrentHostID(const Context & global_context, Poco::Logger * log)
+{
+    bool host_in_hostlist = false;
+
+    for (const HostID & host : entry.hosts)
+    {
+        auto maybe_secure_port = global_context.getTCPPortSecure();
+
+        /// The port is considered local if it matches TCP or TCP secure port that the server is listening.
+        bool is_local_port = (maybe_secure_port && host.isLocalAddress(*maybe_secure_port))
+                             || host.isLocalAddress(global_context.getTCPPort());
+
+        if (!is_local_port)
+            continue;
+
+        if (host_in_hostlist)
+        {
+            /// This check could be slow a little bit
+            LOG_WARNING(log, "There are two the same ClickHouse instances in task {}: {} and {}. Will use the first one only.",
+                             entry_name, host_id.readableString(), host.readableString());
+        }
+        else
+        {
+            host_in_hostlist = true;
+            host_id = host;
+            host_id_str = host.toString();
+        }
+    }
+
+    return host_in_hostlist;
+}
+
+void DDLTask::setClusterInfo(const Context & context, Poco::Logger * log)
+{
+    auto query_on_cluster = dynamic_cast<ASTQueryWithOnCluster *>(query.get());
+    if (!query_on_cluster)
+        throw Exception("Received unknown DDL query", ErrorCodes::UNKNOWN_TYPE_OF_QUERY);
+
+    cluster_name = query_on_cluster->cluster;
+    cluster = context.tryGetCluster(cluster_name);
+
+    if (!cluster)
+        throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
+                        "DDL task {} contains current host {} in cluster {}, but there are no such cluster here.",
+                        entry_name, host_id.readableString(), cluster_name);
+
+    /// Try to find host from task host list in cluster
+    /// At the first, try find exact match (host name and ports should be literally equal)
+    /// If the attempt fails, try find it resolving host name of each instance
+
+    if (!tryFindHostInCluster())
+    {
+        LOG_WARNING(log, "Not found the exact match of host {} from task {} in cluster {} definition. Will try to find it using host name resolving.",
+                         host_id.readableString(), entry_name, cluster_name);
+
+        if (!tryFindHostInClusterViaResolving(context))
+            throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION, "Not found host {} in definition of cluster {}",
+                                                                 host_id.readableString(), cluster_name);
+
+        LOG_INFO(log, "Resolved host {} from task {} as host {} in definition of cluster {}",
+                 host_id.readableString(), entry_name, address_in_cluster.readableString(), cluster_name);
+    }
+
+    query = query_on_cluster->getRewrittenASTWithoutOnCluster(address_in_cluster.default_database);
+    query_on_cluster = nullptr;
+}
+
+bool DDLTask::tryFindHostInCluster()
+{
+    const auto & shards = cluster->getShardsAddresses();
+    bool found_exact_match = false;
+    String default_database;
+
+    for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
+    {
+        for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
+        {
+            const Cluster::Address & address = shards[shard_num][replica_num];
+
+            if (address.host_name == host_id.host_name && address.port == host_id.port)
+            {
+                if (found_exact_match)
+                {
+                    if (default_database == address.default_database)
+                    {
+                        throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
+                                        "There are two exactly the same ClickHouse instances {} in cluster {}",
+                                        address.readableString(), cluster_name);
+                    }
+                    else
+                    {
+                        /* Circular replication is used.
+                         * It is when every physical node contains
+                         * replicas of different shards of the same table.
+                         * To distinguish one replica from another on the same node,
+                         * every shard is placed into separate database.
+                         * */
+                        is_circular_replicated = true;
+                        auto * query_with_table = dynamic_cast<ASTQueryWithTableAndOutput *>(query.get());
+                        if (!query_with_table || query_with_table->database.empty())
+                        {
+                            throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
+                                            "For a distributed DDL on circular replicated cluster its table name must be qualified by database name.");
+                        }
+                        if (default_database == query_with_table->database)
+                            return true;
+                    }
+                }
+                found_exact_match = true;
+                host_shard_num = shard_num;
+                host_replica_num = replica_num;
+                address_in_cluster = address;
+                default_database = address.default_database;
+            }
+        }
+    }
+
+    return found_exact_match;
+}
+
+bool DDLTask::tryFindHostInClusterViaResolving(const Context & context)
+{
+    const auto & shards = cluster->getShardsAddresses();
+    bool found_via_resolving = false;
+
+    for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
+    {
+        for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
+        {
+            const Cluster::Address & address = shards[shard_num][replica_num];
+
+            if (auto resolved = address.getResolvedAddress();
+                resolved && (isLocalAddress(*resolved, context.getTCPPort())
+                             || (context.getTCPPortSecure() && isLocalAddress(*resolved, *context.getTCPPortSecure()))))
+            {
+                if (found_via_resolving)
+                {
+                    throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
+                                    "There are two the same ClickHouse instances in cluster {} : {} and {}",
+                                    cluster_name, address_in_cluster.readableString(), address.readableString());
+                }
+                else
+                {
+                    found_via_resolving = true;
+                    host_shard_num = shard_num;
+                    host_replica_num = replica_num;
+                    address_in_cluster = address;
+                }
+            }
+        }
+    }
+
+    return found_via_resolving;
+}
+
+String DDLTask::getShardID() const
+{
+    /// Generate unique name for shard node, it will be used to execute the query by only single host
+    /// Shard node name has format 'replica_name1,replica_name2,...,replica_nameN'
+    /// Where replica_name is 'replica_config_host_name:replica_port'
+
+    auto shard_addresses = cluster->getShardsAddresses().at(host_shard_num);
+
+    Strings replica_names;
+    for (const Cluster::Address & address : shard_addresses)
+        replica_names.emplace_back(address.readableString());
+    std::sort(replica_names.begin(), replica_names.end());
+
+    String res;
+    for (auto it = replica_names.begin(); it != replica_names.end(); ++it)
+        res += *it + (std::next(it) != replica_names.end() ? "," : "");
+
+    return res;
+}
+
+DatabaseReplicatedTask::DatabaseReplicatedTask(const String & name, const String & path, DatabaseReplicated * database_)
+    : DDLTaskBase(name, path)
+    , database(database_)
+{
+    host_id_str = database->getFullReplicaName();
+}
+
+String DatabaseReplicatedTask::getShardID() const
+{
+    return database->shard_name;
+}
+
+std::unique_ptr<Context> DatabaseReplicatedTask::makeQueryContext(Context & from_context) const
+{
+    auto query_context = DDLTaskBase::makeQueryContext(from_context);
+    query_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY; //FIXME why do we need separate query kind?
+    query_context->setCurrentDatabase(database->getDatabaseName());
+
+    if (we_are_initiator)
+    {
+        auto txn = std::make_shared<MetadataTransaction>();
+        query_context->initMetadataTransaction(txn);
+        txn->current_zookeeper = from_context.getZooKeeper();
+        txn->zookeeper_path = database->zookeeper_path;
+        txn->ops.emplace_back(zkutil::makeRemoveRequest(entry_path + "/try", -1));
+        txn->ops.emplace_back(zkutil::makeCreateRequest(entry_path + "/committed", host_id_str, zkutil::CreateMode::Persistent));
+        txn->ops.emplace_back(zkutil::makeRemoveRequest(getActiveNodePath(), -1));
+        if (execute_on_leader)
+            txn->ops.emplace_back(zkutil::makeCreateRequest(getShardNodePath() + "/executed", host_id_str, zkutil::CreateMode::Persistent));
+        txn->ops.emplace_back(zkutil::makeCreateRequest(getFinishedNodePath(), execution_status.serializeText(), zkutil::CreateMode::Persistent));
+        txn->ops.emplace_back(zkutil::makeSetRequest(database->replica_path + "/log_ptr", toString(getLogEntryNumber(entry_name)), -1));
+        txn->ops.emplace_back(zkutil::makeSetRequest(database->zookeeper_path + "/max_log_ptr", toString(getLogEntryNumber(entry_name)), -1));
+    }
+
+    return query_context;
+}
+
+String DatabaseReplicatedTask::getLogEntryName(UInt32 log_entry_number)
+{
+    constexpr size_t seq_node_digits = 10;
+    String number = toString(log_entry_number);
+    String name = "query-" + String(seq_node_digits - number.size(), '0') + number;
+    return name;
+}
+
+UInt32 DatabaseReplicatedTask::getLogEntryNumber(const String & log_entry_name)
+{
+    constexpr const char * name = "query-";
+    assert(startsWith(log_entry_name, name));
+    return parse<UInt32>(log_entry_name.substr(strlen(name)));
+}
+
+void DatabaseReplicatedTask::parseQueryFromEntry(const Context & context)
+{
+    if (entry.query.empty())
+    {
+        was_executed = true;
+        return;
+    }
+
+    DDLTaskBase::parseQueryFromEntry(context);
+}
+
 }
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index ba58fe3f42e..19d92a1bc78 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -3,12 +3,17 @@
 #include <Interpreters/Cluster.h>
 #include <Common/ZooKeeper/Types.h>
 
+namespace Poco
+{
+class Logger;
+}
 
 namespace DB
 {
 
 class ASTQueryWithOnCluster;
 using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
+class DatabaseReplicated;
 
 struct HostID
 {
@@ -54,42 +59,88 @@ struct DDLLogEntry
     void parse(const String & data);
 };
 
+struct DDLTaskBase
+{
+    const String entry_name;
+    const String entry_path;
 
-struct DDLTask
+    DDLTaskBase(const String & name, const String & path) : entry_name(name), entry_path(path) {}
+    virtual ~DDLTaskBase() = default;
+
+    std::optional<String> tryParseEntry(const String & data);
+    virtual void parseQueryFromEntry(const Context & context);
+
+    DDLLogEntry entry;
+
+    String host_id_str;
+    ASTPtr query;
+
+    bool is_circular_replicated = false;
+    bool execute_on_leader = false;
+
+    ExecutionStatus execution_status;
+    bool was_executed = false;
+
+    virtual String getShardID() const = 0;
+
+    virtual std::unique_ptr<Context> makeQueryContext(Context & from_context) const;
+
+    inline String getActiveNodePath() const { return entry_path + "/active/" + host_id_str; }
+    inline String getFinishedNodePath() const { return entry_path + "/finished/" + host_id_str; }
+    inline String getShardNodePath() const { return entry_path + "/shards/" + getShardID(); }
+
+};
+
+struct DDLTask : public DDLTaskBase
 {
     /// Stages of task lifetime correspond ordering of these data fields:
 
-    /// Stage 1: parse entry
-    String entry_name;
-    String entry_path;
-    DDLLogEntry entry;
+    DDLTask(const String & name, const String & path) : DDLTaskBase(name, path) {}
+
+    bool findCurrentHostID(const Context & global_context, Poco::Logger * log);
+
+    void setClusterInfo(const Context & context, Poco::Logger * log);
 
-    bool we_are_initiator = false;
 
     /// Stage 2: resolve host_id and check that
-    HostID host_id;
-    String host_id_str;
+
 
     /// Stage 3.1: parse query
-    ASTPtr query;
-    ASTQueryWithOnCluster * query_on_cluster = nullptr;
 
     /// Stage 3.2: check cluster and find the host in cluster
+
+    /// Stage 3.3: execute query
+
+    /// Stage 4: commit results to ZooKeeper
+
+    String getShardID() const override;
+
+private:
+    bool tryFindHostInCluster();
+    bool tryFindHostInClusterViaResolving(const Context & context);
+
+    HostID host_id;
     String cluster_name;
     ClusterPtr cluster;
     Cluster::Address address_in_cluster;
     size_t host_shard_num;
     size_t host_replica_num;
+};
 
-    /// Stage 3.3: execute query
-    ExecutionStatus execution_status;
-    bool was_executed = false;
+struct DatabaseReplicatedTask : public DDLTaskBase
+{
+    DatabaseReplicatedTask(const String & name, const String & path, DatabaseReplicated * database_);
 
-    /// Stage 4: commit results to ZooKeeper
+    void parseQueryFromEntry(const Context & context) override;
 
-    String active_path;
-    String finished_path;
-    String shard_path;
+    String getShardID() const override;
+    std::unique_ptr<Context> makeQueryContext(Context & from_context) const override;
+
+    static String getLogEntryName(UInt32 log_entry_number);
+    static UInt32 getLogEntryNumber(const String & log_entry_name);
+
+    DatabaseReplicated * database;
+    bool we_are_initiator = false;
 };
 
 
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index fc9039be576..0399687a4d8 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -142,33 +142,13 @@ std::unique_ptr<ZooKeeperLock> createSimpleZooKeeperLock(
 }
 
 
-String DatabaseReplicatedExtensions::getLogEntryName(UInt32 log_entry_number)
-{
-    constexpr size_t seq_node_digits = 10;
-    String number = toString(log_entry_number);
-    String name = "query-" + String(seq_node_digits - number.size(), '0') + number;
-    return name;
-}
-
-UInt32 DatabaseReplicatedExtensions::getLogEntryNumber(const String & log_entry_name)
-{
-    constexpr const char * name = "query-";
-    assert(startsWith(log_entry_name, name));
-    return parse<UInt32>(log_entry_name.substr(strlen(name)));
-}
-
-
 DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, const Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix,
-                     std::optional<DatabaseReplicatedExtensions> database_replicated_ext_)
+                     const String & logger_name)
     : context(context_)
-    , log(&Poco::Logger::get(database_replicated_ext_ ? fmt::format("DDLWorker ({})", database_replicated_ext_->database_name) : "DDLWorker"))
-    , database_replicated_ext(std::move(database_replicated_ext_))
-    , pool_size(pool_size_)
+    , log(&Poco::Logger::get(logger_name))
+    , pool_size(pool_size_)     //FIXME make it optional
     , worker_pool(pool_size_)
 {
-    assert(!database_replicated_ext || pool_size == 1);
-    last_tasks.reserve(pool_size);
-
     queue_dir = zk_root_dir;
     if (queue_dir.back() == '/')
         queue_dir.resize(queue_dir.size() - 1);
@@ -252,60 +232,26 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
     String node_data;
     String entry_path = queue_dir + "/" + entry_name;
 
-    auto task = std::make_unique<DDLTask>();
-    task->entry_name = entry_name;
-    task->entry_path = entry_path;
-
-    if (database_replicated_ext)
-    {
-        String initiator_name;
-        zkutil::EventPtr wait_committed_or_failed = std::make_shared<Poco::Event>();
-
-        if (zookeeper->tryGet(entry_path + "/try", initiator_name, nullptr, wait_committed_or_failed))
-        {
-            task->we_are_initiator = initiator_name == database_replicated_ext->getFullReplicaName();
-            /// Query is not committed yet. We cannot just skip it and execute next one, because reordering may break replication.
-            //FIXME add some timeouts
-            if (!task->we_are_initiator)
-            {
-                LOG_TRACE(log, "Waiting for initiator {} to commit or rollback entry {}", initiator_name, entry_path);
-                wait_committed_or_failed->wait();
-            }
-        }
-
-        if (!task->we_are_initiator && !zookeeper->exists(entry_path + "/committed"))
-        {
-            out_reason = "Entry " + entry_name + " hasn't been committed";
-            return {};
-        }
-    }
+    auto task = std::make_unique<DDLTask>(entry_name, entry_path);
 
     if (!zookeeper->tryGet(entry_path, node_data))
     {
-        if (database_replicated_ext)
-            database_replicated_ext->lost_callback(entry_name, zookeeper);
         /// It is Ok that node could be deleted just now. It means that there are no current host in node's host list.
         out_reason = "The task was deleted";
         return {};
     }
 
-    try
-    {
-        task->entry.parse(node_data);
-    }
-    catch (...)
+    auto error = task->tryParseEntry(node_data);
+    if (error)
     {
         /// What should we do if we even cannot parse host name and therefore cannot properly submit execution status?
         /// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be successful.
         /// Otherwise, that node will be ignored by DDLQueryStatusInputStream.
-
-        tryLogCurrentException(log, "Cannot parse DDL task " + entry_name + ", will try to send error status");
-
-        String status = ExecutionStatus::fromCurrentException().serializeText();
+        LOG_ERROR(log, "Cannot parse DDL task {}, will try to send error status: {}", entry_name, *error);
         try
         {
             createStatusDirs(entry_path, zookeeper);
-            zookeeper->tryCreate(entry_path + "/finished/" + host_fqdn_id, status, zkutil::CreateMode::Persistent);
+            zookeeper->tryCreate(entry_path + "/finished/" + host_fqdn_id, *error, zkutil::CreateMode::Persistent);
         }
         catch (...)
         {
@@ -316,45 +262,15 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
         return {};
     }
 
-    if (database_replicated_ext)
-    {
-        task->host_id.host_name = host_fqdn;
-        task->host_id.port = context.getTCPPort();
-        task->host_id_str = database_replicated_ext->shard_name + '|' + database_replicated_ext->replica_name;
-        return task;
-    }
-
-    bool host_in_hostlist = false;
-    for (const HostID & host : task->entry.hosts)
-    {
-        auto maybe_secure_port = context.getTCPPortSecure();
-
-        /// The port is considered local if it matches TCP or TCP secure port that the server is listening.
-        bool is_local_port = (maybe_secure_port && host.isLocalAddress(*maybe_secure_port))
-            || host.isLocalAddress(context.getTCPPort());
-
-        if (!is_local_port)
-            continue;
-
-        if (host_in_hostlist)
-        {
-            /// This check could be slow a little bit
-            LOG_WARNING(log, "There are two the same ClickHouse instances in task {}: {} and {}. Will use the first one only.", entry_name, task->host_id.readableString(), host.readableString());
-        }
-        else
-        {
-            host_in_hostlist = true;
-            task->host_id = host;
-            task->host_id_str = host.toString();
-        }
-    }
-
-    if (!host_in_hostlist)
+    if (!task->findCurrentHostID(context, log))
     {
         out_reason = "There is no a local address in host list";
         return {};
     }
 
+    task->parseQueryFromEntry(context);
+    task->setClusterInfo(context, log);
+
     return task;
 }
 
@@ -378,11 +294,11 @@ void DDLWorker::scheduleTasks()
         return;
     }
 
-    bool server_startup = last_tasks.empty();
+    bool server_startup = !last_entry_name.has_value();
 
     auto begin_node = server_startup
         ? queue_nodes.begin()
-        : std::upper_bound(queue_nodes.begin(), queue_nodes.end(), last_tasks.back());
+        : std::upper_bound(queue_nodes.begin(), queue_nodes.end(), *last_entry_name);
 
     for (auto it = begin_node; it != queue_nodes.end() && !stop_flag; ++it)
     {
@@ -394,7 +310,7 @@ void DDLWorker::scheduleTasks()
         if (!task)
         {
             LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason);
-            saveTask(entry_name);
+            last_entry_name = entry_name;
             continue;
         }
 
@@ -408,7 +324,7 @@ void DDLWorker::scheduleTasks()
 
         if (!already_processed)
         {
-            if (database_replicated_ext)
+            if (pool_size == 1)
             {
                 enqueueTask(DDLTaskPtr(task.release()));
             }
@@ -425,143 +341,18 @@ void DDLWorker::scheduleTasks()
             LOG_DEBUG(log, "Task {} ({}) has been already processed", entry_name, task->entry.query);
         }
 
-        saveTask(entry_name);
+        last_entry_name = entry_name;
     }
 }
 
-void DDLWorker::saveTask(const String & entry_name)
-{
-    if (last_tasks.size() == pool_size)
-    {
-        last_tasks.erase(last_tasks.begin());
-    }
-    last_tasks.emplace_back(entry_name);
-}
-
 /// Parses query and resolves cluster and host in cluster
-void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
+void DDLWorker::parseQueryAndResolveHost(DDLTaskBase & /*task*/)
 {
-    {
-        const char * begin = task.entry.query.data();
-        const char * end = begin + task.entry.query.size();
 
-        ParserQuery parser_query(end);
-        String description;
-        task.query = parseQuery(parser_query, begin, end, description, 0, context.getSettingsRef().max_parser_depth);
-    }
-
-    // XXX: serious design flaw since `ASTQueryWithOnCluster` is not inherited from `IAST`!
-    if (!task.query || !(task.query_on_cluster = dynamic_cast<ASTQueryWithOnCluster *>(task.query.get())))
-        throw Exception("Received unknown DDL query", ErrorCodes::UNKNOWN_TYPE_OF_QUERY);
-
-    if (database_replicated_ext)
-        return;
-
-    task.cluster_name = task.query_on_cluster->cluster;
-    task.cluster = context.tryGetCluster(task.cluster_name);
-    if (!task.cluster)
-        throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
-            "DDL task {} contains current host {} in cluster {}, but there are no such cluster here.",
-            task.entry_name, task.host_id.readableString(), task.cluster_name);
-
-    /// Try to find host from task host list in cluster
-    /// At the first, try find exact match (host name and ports should be literally equal)
-    /// If the attempt fails, try find it resolving host name of each instance
-    const auto & shards = task.cluster->getShardsAddresses();
-
-    bool found_exact_match = false;
-    String default_database;
-    for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
-    {
-        for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
-        {
-            const Cluster::Address & address = shards[shard_num][replica_num];
-
-            if (address.host_name == task.host_id.host_name && address.port == task.host_id.port)
-            {
-                if (found_exact_match)
-                {
-                    if (default_database == address.default_database)
-                    {
-                        throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
-                            "There are two exactly the same ClickHouse instances {} in cluster {}",
-                            address.readableString(), task.cluster_name);
-                    }
-                    else
-                    {
-                        /* Circular replication is used.
-                         * It is when every physical node contains
-                         * replicas of different shards of the same table.
-                         * To distinguish one replica from another on the same node,
-                         * every shard is placed into separate database.
-                         * */
-                        is_circular_replicated = true;
-                        auto * query_with_table = dynamic_cast<ASTQueryWithTableAndOutput *>(task.query.get());
-                        if (!query_with_table || query_with_table->database.empty())
-                        {
-                            throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
-                                "For a distributed DDL on circular replicated cluster its table name must be qualified by database name.");
-                        }
-                        if (default_database == query_with_table->database)
-                            return;
-                    }
-                }
-                found_exact_match = true;
-                task.host_shard_num = shard_num;
-                task.host_replica_num = replica_num;
-                task.address_in_cluster = address;
-                default_database = address.default_database;
-            }
-        }
-    }
-
-    if (found_exact_match)
-        return;
-
-    LOG_WARNING(log, "Not found the exact match of host {} from task {} in cluster {} definition. Will try to find it using host name resolving.", task.host_id.readableString(), task.entry_name, task.cluster_name);
-
-    bool found_via_resolving = false;
-    for (size_t shard_num = 0; shard_num < shards.size(); ++shard_num)
-    {
-        for (size_t replica_num = 0; replica_num < shards[shard_num].size(); ++replica_num)
-        {
-            const Cluster::Address & address = shards[shard_num][replica_num];
-
-            if (auto resolved = address.getResolvedAddress();
-                resolved && (isLocalAddress(*resolved, context.getTCPPort())
-                    || (context.getTCPPortSecure() && isLocalAddress(*resolved, *context.getTCPPortSecure()))))
-            {
-                if (found_via_resolving)
-                {
-                    throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
-                        "There are two the same ClickHouse instances in cluster {} : {} and {}",
-                        task.cluster_name, task.address_in_cluster.readableString(), address.readableString());
-                }
-                else
-                {
-                    found_via_resolving = true;
-                    task.host_shard_num = shard_num;
-                    task.host_replica_num = replica_num;
-                    task.address_in_cluster = address;
-                }
-            }
-        }
-    }
-
-    if (!found_via_resolving)
-    {
-        throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
-            "Not found host {} in definition of cluster {}",
-            task.host_id.readableString(), task.cluster_name);
-    }
-    else
-    {
-        LOG_INFO(log, "Resolved host {} from task {} as host {} in definition of cluster {}", task.host_id.readableString(), task.entry_name, task.address_in_cluster.readableString(), task.cluster_name);
-    }
 }
 
 
-bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, ExecutionStatus & status)
+bool DDLWorker::tryExecuteQuery(const String & query, const DDLTaskBase & task, ExecutionStatus & status)
 {
     /// Add special comment at the start of query to easily identify DDL-produced queries in query_log
     String query_prefix = "/* ddl_entry=" + task.entry_name + " */ ";
@@ -573,36 +364,8 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
 
     try
     {
-        auto current_context = std::make_unique<Context>(context);
-        current_context->makeQueryContext();
-        current_context->setCurrentQueryId(""); // generate random query_id
-
-        if (database_replicated_ext)
-        {
-            current_context->getClientInfo().query_kind
-                = ClientInfo::QueryKind::REPLICATED_LOG_QUERY; //FIXME why do we need separate query kind?
-            current_context->setCurrentDatabase(database_replicated_ext->database_name);
-
-            if (task.we_are_initiator)
-            {
-                auto txn = std::make_shared<MetadataTransaction>();
-                current_context->initMetadataTransaction(txn);
-                txn->current_zookeeper = current_zookeeper;
-                txn->zookeeper_path = database_replicated_ext->zookeeper_path;
-                txn->ops.emplace_back(zkutil::makeRemoveRequest(task.entry_path + "/try", -1));
-                txn->ops.emplace_back(zkutil::makeCreateRequest(task.entry_path + "/committed",
-                                                                database_replicated_ext->getFullReplicaName(), zkutil::CreateMode::Persistent));
-                txn->ops.emplace_back(zkutil::makeRemoveRequest(task.active_path, -1));
-                if (!task.shard_path.empty())
-                    txn->ops.emplace_back(zkutil::makeCreateRequest(task.shard_path, task.host_id_str, zkutil::CreateMode::Persistent));
-                txn->ops.emplace_back(zkutil::makeCreateRequest(task.finished_path, task.execution_status.serializeText(), zkutil::CreateMode::Persistent));
-                //txn->ops.emplace_back(zkutil::makeSetRequest(database_replicated_ext->getReplicaPath() + "/log_ptr", toString(database_replicated_ext->first_not_executed), -1));
-            }
-        }
-        else
-            current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
-
-        executeQuery(istr, ostr, false, *current_context, {});
+        auto query_context = task.makeQueryContext(context);
+        executeQuery(istr, ostr, false, *query_context, {});
     }
     catch (...)
     {
@@ -644,6 +407,7 @@ void DDLWorker::enqueueTask(DDLTaskPtr task_ptr)
             processTask(task);
             return;
         }
+        /// TODO recover zk in runMainThread(...) and retry task (why do we need another place where session is recovered?)
         catch (const Coordination::Exception & e)
         {
             if (Coordination::isHardwareError(e.code))
@@ -668,17 +432,16 @@ void DDLWorker::enqueueTask(DDLTaskPtr task_ptr)
     }
 }
 
-void DDLWorker::processTask(DDLTask & task)
+void DDLWorker::processTask(DDLTaskBase & task)
 {
     auto zookeeper = tryGetZooKeeper();
 
     LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query);
 
-    String dummy;
-    //FIXME duplicate
-    String active_node_path = task.active_path = task.entry_path + "/active/" + task.host_id_str;
-    String finished_node_path = task.finished_path = task.entry_path + "/finished/" + task.host_id_str;
+    String active_node_path = task.getActiveNodePath();
+    String finished_node_path = task.getFinishedNodePath();
 
+    String dummy;
     auto code = zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy);
 
     if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS)
@@ -696,22 +459,16 @@ void DDLWorker::processTask(DDLTask & task)
     else
         throw Coordination::Exception(code, active_node_path);
 
-    //FIXME
-    bool is_dummy_query = database_replicated_ext && task.entry.query.empty();
-    if (!task.was_executed && !is_dummy_query)
+    if (!task.was_executed)
     {
         try
         {
-            is_circular_replicated = false;
-            parseQueryAndResolveHost(task);
-
-            ASTPtr rewritten_ast = task.query_on_cluster->getRewrittenASTWithoutOnCluster(task.address_in_cluster.default_database);
-            String rewritten_query = queryToString(rewritten_ast);
+            String rewritten_query = queryToString(task.query);
             LOG_DEBUG(log, "Executing query: {}", rewritten_query);
 
-            if (auto * query_with_table = dynamic_cast<ASTQueryWithTableAndOutput *>(rewritten_ast.get()); query_with_table)
+            StoragePtr storage;
+            if (auto * query_with_table = dynamic_cast<ASTQueryWithTableAndOutput *>(task.query.get()); query_with_table)
             {
-                StoragePtr storage;
                 if (!query_with_table->table.empty())
                 {
                     /// It's not CREATE DATABASE
@@ -719,11 +476,11 @@ void DDLWorker::processTask(DDLTask & task)
                     storage = DatabaseCatalog::instance().tryGetTable(table_id, context);
                 }
 
-                if (storage && taskShouldBeExecutedOnLeader(rewritten_ast, storage)  && !is_circular_replicated)
-                    tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper);
-                else
-                    tryExecuteQuery(rewritten_query, task, task.execution_status);
+                task.execute_on_leader = storage && taskShouldBeExecutedOnLeader(task.query, storage) && !task.is_circular_replicated;
             }
+
+            if (task.execute_on_leader)
+                tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper);
             else
                 tryExecuteQuery(rewritten_query, task, task.execution_status);
         }
@@ -753,12 +510,6 @@ void DDLWorker::processTask(DDLTask & task)
     auto res = zookeeper->tryMulti(ops, responses);
     if (res != Coordination::Error::ZNODEEXISTS && res != Coordination::Error::ZNONODE)
         zkutil::KeeperMultiException::check(res, ops, responses);
-
-    if (database_replicated_ext)
-    {
-        database_replicated_ext->executed_callback(task.entry_name, zookeeper);
-        ++(database_replicated_ext->first_not_executed);
-    }
 }
 
 
@@ -775,10 +526,10 @@ bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, const Storage
 }
 
 bool DDLWorker::tryExecuteQueryOnLeaderReplica(
-    DDLTask & task,
+    DDLTaskBase & task,
     StoragePtr storage,
     const String & rewritten_query,
-    const String & node_path,
+    const String & /*node_path*/,
     const ZooKeeperPtr & zookeeper)
 {
     StorageReplicatedMergeTree * replicated_storage = dynamic_cast<StorageReplicatedMergeTree *>(storage.get());
@@ -787,31 +538,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
     if (!replicated_storage)
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Storage type '{}' is not supported by distributed DDL", storage->getName());
 
-    /// Generate unique name for shard node, it will be used to execute the query by only single host
-    /// Shard node name has format 'replica_name1,replica_name2,...,replica_nameN'
-    /// Where replica_name is 'replica_config_host_name:replica_port'
-    auto get_shard_name = [] (const Cluster::Addresses & shard_addresses)
-    {
-        Strings replica_names;
-        for (const Cluster::Address & address : shard_addresses)
-            replica_names.emplace_back(address.readableString());
-        std::sort(replica_names.begin(), replica_names.end());
-
-        String res;
-        for (auto it = replica_names.begin(); it != replica_names.end(); ++it)
-            res += *it + (std::next(it) != replica_names.end() ? "," : "");
-
-        return res;
-    };
-
-    String shard_node_name;
-    if (database_replicated_ext)
-        shard_node_name = database_replicated_ext->shard_name;
-    else
-        shard_node_name = get_shard_name(task.cluster->getShardsAddresses().at(task.host_shard_num));
-    String shard_path = node_path + "/shards/" + shard_node_name;
+    String shard_path = task.getShardNodePath();
     String is_executed_path = shard_path + "/executed";
-    task.shard_path = is_executed_path; //FIXME duplicate
     String tries_to_execute_path = shard_path + "/tries_to_execute";
     zookeeper->createAncestors(shard_path + "/");
 
@@ -1035,7 +763,7 @@ void DDLWorker::createStatusDirs(const std::string & node_path, const ZooKeeperP
 
 String DDLWorker::enqueueQuery(DDLLogEntry & entry)
 {
-    if (entry.hosts.empty() && !database_replicated_ext)
+    if (entry.hosts.empty())
         throw Exception("Empty host list in a distributed DDL task", ErrorCodes::LOGICAL_ERROR);
 
     auto zookeeper = getAndSetZooKeeper();
@@ -1043,27 +771,7 @@ String DDLWorker::enqueueQuery(DDLLogEntry & entry)
     String query_path_prefix = queue_dir + "/query-";
     zookeeper->createAncestors(query_path_prefix);
 
-    String node_path;
-    if (database_replicated_ext)
-    {
-        /// We cannot create sequential node and it's ephemeral child in a single transaction, so allocate sequential number another way
-        String counter_prefix = database_replicated_ext->zookeeper_path + "/counter/cnt-";
-        String counter_path = zookeeper->create(counter_prefix, "", zkutil::CreateMode::EphemeralSequential);
-        node_path = query_path_prefix + counter_path.substr(counter_prefix.size());
-
-        Coordination::Requests ops;
-        /// Query is not committed yet, but we have to write it into log to avoid reordering
-        ops.emplace_back(zkutil::makeCreateRequest(node_path, entry.toString(), zkutil::CreateMode::Persistent));
-        /// '/try' will be replaced with '/committed' or will be removed due to expired session or other error
-        ops.emplace_back(zkutil::makeCreateRequest(node_path + "/try", database_replicated_ext->getFullReplicaName(), zkutil::CreateMode::Ephemeral));
-        /// We don't need it anymore
-        ops.emplace_back(zkutil::makeRemoveRequest(counter_path, -1));
-        zookeeper->multi(ops);
-    }
-    else
-    {
-        node_path = zookeeper->create(query_path_prefix, entry.toString(), zkutil::CreateMode::PersistentSequential);
-    }
+    String node_path = zookeeper->create(query_path_prefix, entry.toString(), zkutil::CreateMode::PersistentSequential);
 
     /// Optional step
     try
@@ -1091,6 +799,7 @@ void DDLWorker::runMainThread()
         {
             auto zookeeper = getAndSetZooKeeper();
             zookeeper->createAncestors(queue_dir + "/");
+            initialize();
             initialized = true;
         }
         catch (const Coordination::Exception & e)
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 86677bfbb19..39087d05fbb 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -29,50 +29,20 @@ namespace DB
 class Context;
 class ASTAlterQuery;
 struct DDLLogEntry;
-struct DDLTask;
-using DDLTaskPtr = std::unique_ptr<DDLTask>;
+struct DDLTaskBase;
+using DDLTaskPtr = std::unique_ptr<DDLTaskBase>;
 using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
 
 
-struct DatabaseReplicatedExtensions
-{
-    UUID database_uuid;
-    String zookeeper_path;
-    String database_name;
-    String shard_name;
-    String replica_name;
-    UInt32 first_not_executed;
-    using EntryLostCallback = std::function<void(const String & entry_name, const ZooKeeperPtr)>;
-    using EntryExecutedCallback = std::function<void(const String & entry_name, const ZooKeeperPtr)>;
-    using EntryErrorCallback = std::function<void(const String & entry_name, const ZooKeeperPtr, const std::exception_ptr &)>;
-    EntryLostCallback lost_callback;
-    EntryExecutedCallback executed_callback;
-    EntryErrorCallback error_callback;
-
-    String getReplicaPath() const
-    {
-        return zookeeper_path + "/replicas/" + shard_name + "/" + replica_name;
-    }
-
-    String getFullReplicaName() const
-    {
-        return shard_name + '|' + replica_name;
-    }
-
-    static String getLogEntryName(UInt32 log_entry_number);
-    static UInt32 getLogEntryNumber(const String & log_entry_name);
-};
-
-
 class DDLWorker
 {
 public:
     DDLWorker(int pool_size_, const std::string & zk_root_dir, const Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix,
-              std::optional<DatabaseReplicatedExtensions> database_replicated_ext_ = std::nullopt);
-    ~DDLWorker();
+              const String & logger_name = "DDLWorker");
+    virtual ~DDLWorker();
 
     /// Pushes query into DDL queue, returns path to created node
-    String enqueueQuery(DDLLogEntry & entry);
+    virtual String enqueueQuery(DDLLogEntry & entry);
 
     /// Host ID (name:port) for logging purposes
     /// Note that in each task hosts are identified individually by name:port from initiator server cluster config
@@ -83,10 +53,7 @@ public:
 
     void shutdown();
 
-    //FIXME get rid of this method
-    void setLogPointer(UInt32 log_pointer) { database_replicated_ext->first_not_executed = log_pointer; }
-
-private:
+protected:
 
     /// Returns cached ZooKeeper session (possibly expired).
     ZooKeeperPtr tryGetZooKeeper() const;
@@ -97,14 +64,13 @@ private:
 
     void checkCurrentTasks();
     void scheduleTasks();
-    void saveTask(const String & entry_name);
 
     /// Reads entry and check that the host belongs to host list of the task
     /// Returns non-empty DDLTaskPtr if entry parsed and the check is passed
-    DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper);
+    virtual DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper);
 
     void enqueueTask(DDLTaskPtr task);
-    void processTask(DDLTask & task);
+    void processTask(DDLTaskBase & task);
 
     /// Check that query should be executed on leader replica only
     static bool taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, StoragePtr storage);
@@ -115,15 +81,15 @@ private:
     /// query via RemoteBlockOutputStream to leader, so to avoid such "2-phase" query execution we
     /// execute query directly on leader.
     bool tryExecuteQueryOnLeaderReplica(
-        DDLTask & task,
+        DDLTaskBase & task,
         StoragePtr storage,
         const String & rewritten_query,
         const String & node_path,
         const ZooKeeperPtr & zookeeper);
 
-    void parseQueryAndResolveHost(DDLTask & task);
+    void parseQueryAndResolveHost(DDLTaskBase & task);
 
-    bool tryExecuteQuery(const String & query, const DDLTask & task, ExecutionStatus & status);
+    bool tryExecuteQuery(const String & query, const DDLTaskBase & task, ExecutionStatus & status);
 
     /// Checks and cleanups queue's nodes
     void cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper);
@@ -131,17 +97,16 @@ private:
     /// Init task node
     static void createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper);
 
+    virtual void initialize() {}
 
     void runMainThread();
     void runCleanupThread();
 
     void attachToThreadGroup();
 
-private:
-    std::atomic<bool> is_circular_replicated = false;
+protected:
     Context context;
     Poco::Logger * log;
-    std::optional<DatabaseReplicatedExtensions> database_replicated_ext;
 
     std::string host_fqdn;      /// current host domain name
     std::string host_fqdn_id;   /// host_name:port
@@ -151,7 +116,8 @@ private:
     ZooKeeperPtr current_zookeeper;
 
     /// Save state of executed task to avoid duplicate execution on ZK error
-    std::vector<std::string> last_tasks;
+    //std::vector<std::string> last_tasks;
+    std::optional<String> last_entry_name;
 
     std::shared_ptr<Poco::Event> queue_updated_event = std::make_shared<Poco::Event>();
     std::shared_ptr<Poco::Event> cleanup_event = std::make_shared<Poco::Event>();
diff --git a/tests/integration/test_replicated_database/configs/config.xml b/tests/integration/test_replicated_database/configs/config.xml
new file mode 100644
index 00000000000..d751454437c
--- /dev/null
+++ b/tests/integration/test_replicated_database/configs/config.xml
@@ -0,0 +1,3 @@
+<yandex>
+    <database_atomic_delay_before_drop_table_sec>10</database_atomic_delay_before_drop_table_sec>
+</yandex>
diff --git a/tests/integration/test_replicated_database/configs/disable_snapshots.xml b/tests/integration/test_replicated_database/configs/disable_snapshots.xml
deleted file mode 100644
index 9a656bdcea1..00000000000
--- a/tests/integration/test_replicated_database/configs/disable_snapshots.xml
+++ /dev/null
@@ -1,3 +0,0 @@
-<yandex>
-    <database_replicated_snapshot_period>0</database_replicated_snapshot_period>
-</yandex>
diff --git a/tests/integration/test_replicated_database/configs/snapshot_each_query.xml b/tests/integration/test_replicated_database/configs/snapshot_each_query.xml
deleted file mode 100644
index 6eae1d9d992..00000000000
--- a/tests/integration/test_replicated_database/configs/snapshot_each_query.xml
+++ /dev/null
@@ -1,3 +0,0 @@
-<yandex>
-    <database_replicated_snapshot_period>1</database_replicated_snapshot_period>
-</yandex>
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 11bfbad393b..8c5a25b3fe7 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -7,11 +7,11 @@ from helpers.test_tools import assert_eq_with_retry
 
 cluster = ClickHouseCluster(__file__)
 
-main_node = cluster.add_instance('main_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 1})
-dummy_node = cluster.add_instance('dummy_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True, macros={"shard": 1, "replica": 2})
-competing_node = cluster.add_instance('competing_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True, macros={"shard": 1, "replica": 3})
-snapshotting_node = cluster.add_instance('snapshotting_node', main_configs=['configs/snapshot_each_query.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 1})
-snapshot_recovering_node = cluster.add_instance('snapshot_recovering_node', main_configs=['configs/disable_snapshots.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 2})
+main_node = cluster.add_instance('main_node', main_configs=['configs/config.xml'], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 1})
+dummy_node = cluster.add_instance('dummy_node', main_configs=['configs/config.xml'], with_zookeeper=True, macros={"shard": 1, "replica": 2})
+competing_node = cluster.add_instance('competing_node', main_configs=['configs/config.xml'], with_zookeeper=True, macros={"shard": 1, "replica": 3})
+snapshotting_node = cluster.add_instance('snapshotting_node', main_configs=['configs/config.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 1})
+snapshot_recovering_node = cluster.add_instance('snapshot_recovering_node', main_configs=['configs/config.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 2})
 
 uuid_regex = re.compile("[0-9a-f]{8}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{12}")
 def assert_create_query(nodes, table_name, expected):
@@ -70,9 +70,10 @@ def test_simple_alter_table(started_cluster, engine):
     assert_create_query([main_node, dummy_node], name, expected)
 
 
+@pytest.mark.dependency(depends=['test_simple_alter_table'])
 @pytest.mark.parametrize("engine", ['MergeTree', 'ReplicatedMergeTree'])
 def test_create_replica_after_delay(started_cluster, engine):
-    competing_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica3');")
+    competing_node.query("CREATE DATABASE IF NOT EXISTS testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica3');")
 
     name  = "testdb.alter_test_{}".format(engine)
     main_node.query("ALTER TABLE {} ADD COLUMN Added3 UInt32;".format(name))
@@ -113,6 +114,7 @@ def test_alters_from_different_replicas(started_cluster):
 
     assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
 
+@pytest.mark.dependency(depends=['test_alters_from_different_replicas'])
 def test_drop_and_create_table(started_cluster):
     main_node.query("DROP TABLE testdb.concurrent_test")
     main_node.query("CREATE TABLE testdb.concurrent_test "
@@ -125,6 +127,7 @@ def test_drop_and_create_table(started_cluster):
 
     assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
 
+@pytest.mark.dependency(depends=['test_drop_and_create_table'])
 def test_replica_restart(started_cluster):
     main_node.restart_clickhouse()
 
@@ -134,14 +137,18 @@ def test_replica_restart(started_cluster):
 
     assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
 
+
+@pytest.mark.dependency(depends=['test_create_replica_after_delay'])
 def test_snapshot_and_snapshot_recover(started_cluster):
     #FIXME bad test
     snapshotting_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica4');")
     time.sleep(5)
     snapshot_recovering_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica5');")
     time.sleep(5)
-    assert snapshotting_node.query("desc table testdb.alter_test") == snapshot_recovering_node.query("desc table testdb.alter_test")
+    assert snapshotting_node.query("desc table testdb.alter_test_MergeTree") == snapshot_recovering_node.query("desc table testdb.alter_test_MergeTree")
+    assert snapshotting_node.query("desc table testdb.alter_test_ReplicatedMergeTree") == snapshot_recovering_node.query("desc table testdb.alter_test_ReplicatedMergeTree")
 
+@pytest.mark.dependency(depends=['test_replica_restart'])
 def test_drop_and_create_replica(started_cluster):
     main_node.query("DROP DATABASE testdb")
     main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');")

From ab197a49c82db8c9e4aae3984a8da91a0e120728 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Sun, 29 Nov 2020 14:45:32 +0300
Subject: [PATCH 0063/1238] better code, fixes

---
 src/Databases/DatabaseAtomic.cpp              |  72 +++-----
 src/Databases/DatabaseReplicated.cpp          | 160 +++++++++---------
 src/Databases/DatabaseReplicated.h            |  31 ++--
 src/Databases/DatabaseReplicatedWorker.cpp    |  20 +--
 src/Databases/ya.make                         |   1 +
 src/Interpreters/DDLTask.cpp                  |  43 ++---
 src/Interpreters/DDLTask.h                    |  32 +---
 src/Interpreters/DDLWorker.cpp                |  59 ++++---
 src/Interpreters/DDLWorker.h                  |   5 +-
 src/Interpreters/InterpreterCreateQuery.cpp   |   2 +-
 src/Interpreters/executeDDLQueryOnCluster.cpp |  12 +-
 src/Interpreters/executeDDLQueryOnCluster.h   |   1 +
 .../test_replicated_database/test.py          |   9 +-
 13 files changed, 194 insertions(+), 253 deletions(-)

diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index a444d9cc200..b60adf44e51 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -120,13 +120,10 @@ void DatabaseAtomic::dropTable(const Context & context, const String & table_nam
         table_metadata_path_drop = DatabaseCatalog::instance().getPathForDroppedMetadata(table->getStorageID());
 
         if (auto txn = context.getMetadataTransaction())
-        {
-            String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_name);
-            txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
-            txn->current_zookeeper->multi(txn->ops);     /// Commit point (a sort of) for Replicated database
-            /// NOTE: replica will be lost if server crashes before the following rename
-            /// TODO better detection and recovery
-        }
+            txn->commit();      /// Commit point (a sort of) for Replicated database
+
+        /// NOTE: replica will be lost if server crashes before the following rename
+        /// TODO better detection and recovery
 
         Poco::File(table_metadata_path).renameTo(table_metadata_path_drop);    /// Mark table as dropped
         DatabaseWithDictionaries::detachTableUnlocked(table_name, lock);       /// Should never throw
@@ -245,31 +242,10 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
 
     /// Table renaming actually begins here
     if (auto txn = context.getMetadataTransaction())
-    {
-        String statement;
-        String statement_to;
-        {
-            ReadBufferFromFile in(old_metadata_path, 4096);
-            readStringUntilEOF(statement, in);
-            if (exchange)
-            {
-                ReadBufferFromFile in_to(new_metadata_path, 4096);
-                readStringUntilEOF(statement_to, in_to);
-            }
-        }
-        String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_name);
-        String metadata_zk_path_to = txn->zookeeper_path + "/metadata/" + escapeForFileName(to_table_name);
-        txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
-        if (exchange)
-        {
-            txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path_to, -1));
-            txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path, statement_to, zkutil::CreateMode::Persistent));
-        }
-        txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path_to, statement, zkutil::CreateMode::Persistent));
-        txn->current_zookeeper->multi(txn->ops);     /// Commit point (a sort of) for Replicated database
-        /// NOTE: replica will be lost if server crashes before the following rename
-        /// TODO better detection and recovery
-    }
+        txn->commit();     /// Commit point (a sort of) for Replicated database
+
+    /// NOTE: replica will be lost if server crashes before the following rename
+    /// TODO better detection and recovery
 
     if (exchange)
         renameExchange(old_metadata_path, new_metadata_path);
@@ -326,15 +302,10 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora
         locked_uuid = true;
 
         if (auto txn = query_context.getMetadataTransaction())
-        {
-            String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(query.table);
-            String statement = getObjectDefinitionFromCreateQuery(query.clone());
-            /// zk::multi(...) will throw if `metadata_zk_path` exists
-            txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path, statement, zkutil::CreateMode::Persistent));
-            txn->current_zookeeper->multi(txn->ops);     /// Commit point (a sort of) for Replicated database
-            /// NOTE: replica will be lost if server crashes before the following renameNoReplace(...)
-            /// TODO better detection and recovery
-        }
+            txn->commit();     /// Commit point (a sort of) for Replicated database
+
+        /// NOTE: replica will be lost if server crashes before the following renameNoReplace(...)
+        /// TODO better detection and recovery
 
         /// It throws if `table_metadata_path` already exists (it's possible if table was detached)
         renameNoReplace(table_metadata_tmp_path, table_metadata_path);  /// Commit point (a sort of)
@@ -352,7 +323,8 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora
         tryCreateSymlink(query.table, table_data_path);
 }
 
-void DatabaseAtomic::commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path, const String & statement, const Context & query_context)
+void DatabaseAtomic::commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path,
+                                      const String & /*statement*/, const Context & query_context)
 {
     bool check_file_exists = true;
     SCOPE_EXIT({ std::error_code code; if (check_file_exists) std::filesystem::remove(table_metadata_tmp_path, code); });
@@ -363,17 +335,11 @@ void DatabaseAtomic::commitAlterTable(const StorageID & table_id, const String &
     if (table_id.uuid != actual_table_id.uuid)
         throw Exception("Cannot alter table because it was renamed", ErrorCodes::CANNOT_ASSIGN_ALTER);
 
-    if (&query_context != &query_context.getGlobalContext())    // FIXME
-    {
-        if (auto txn = query_context.getMetadataTransaction())
-        {
-            String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_id.table_name);
-            txn->ops.emplace_back(zkutil::makeSetRequest(metadata_zk_path, statement, -1));
-            txn->current_zookeeper->multi(txn->ops); /// Commit point (a sort of) for Replicated database
-            /// NOTE: replica will be lost if server crashes before the following rename
-            /// TODO better detection and recovery
-        }
-    }
+    if (auto txn = query_context.getMetadataTransaction())
+        txn->commit();      /// Commit point (a sort of) for Replicated database
+
+    /// NOTE: replica will be lost if server crashes before the following rename
+    /// TODO better detection and recovery
 
     check_file_exists = renameExchangeIfSupported(table_metadata_tmp_path, table_metadata_path);
     if (!check_file_exists)
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index eef1b98afe2..418eaf567a4 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -34,6 +34,7 @@ namespace ErrorCodes
     extern const int REPLICA_IS_ALREADY_EXIST;
     extern const int DATABASE_REPLICATION_FAILED;
     extern const int UNKNOWN_DATABASE;
+    extern const int NOT_IMPLEMENTED;
 }
 
 zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
@@ -106,9 +107,6 @@ DatabaseReplicated::DatabaseReplicated(
         /// Throws if replica with the same name was created concurrently
         createReplicaNodesInZooKeeper(current_zookeeper);
     }
-
-    snapshot_period = 1; //context_.getConfigRef().getInt("database_replicated_snapshot_period", 10);
-    LOG_DEBUG(log, "Snapshot period is set to {} log entries per one snapshot", snapshot_period);
 }
 
 bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper)
@@ -171,8 +169,6 @@ void DatabaseReplicated::loadStoredObjects(Context & context, bool has_force_res
 {
     DatabaseAtomic::loadStoredObjects(context, has_force_restore_data_flag, force_attach);
 
-    //recoverLostReplica(global_context.getZooKeeper(), 0, true); //FIXME
-
     ddl_worker = std::make_unique<DatabaseReplicatedDDLWorker>(this, global_context);
 }
 
@@ -209,71 +205,6 @@ void DatabaseReplicated::onUnexpectedLogEntry(const String & entry_name, const Z
                                                "Got log entry '{}' when expected entry number {}");
 }
 
-void DatabaseReplicated::removeOutdatedSnapshotsAndLog()
-{
-    /// This method removes all snapshots and logged queries
-    /// that no longer will be in use by current replicas or
-    /// new coming ones.
-    /// Each registered replica has its state in ZooKeeper.
-    /// Therefore, snapshots and logged queries that are less
-    /// than a least advanced replica are removed.
-    /// It does not interfere with a new coming replica
-    /// metadata loading from snapshot
-    /// because the replica will use the latest snapshot available
-    /// and this snapshot will set the last executed log query
-    /// to a greater one than the least advanced current replica.
-    auto current_zookeeper = getZooKeeper();
-    Strings replica_states = current_zookeeper->getChildren(zookeeper_path + "/replicas");
-    //TODO do not use log pointers to determine which entries to remove if there are staled pointers.
-    // We can just remove all entries older than previous snapshot version.
-    // Possible invariant: store all entries since last snapshot, replica becomes lost when it cannot get log entry.
-    auto least_advanced = std::min_element(replica_states.begin(), replica_states.end());
-    Strings snapshots = current_zookeeper->getChildren(zookeeper_path + "/snapshots");
-
-    if (snapshots.size() < 2)
-    {
-        return;
-    }
-
-    std::sort(snapshots.begin(), snapshots.end());
-    auto still_useful = std::lower_bound(snapshots.begin(), snapshots.end(), *least_advanced);
-    snapshots.erase(still_useful, snapshots.end());
-    for (const String & snapshot : snapshots)
-    {
-        current_zookeeper->tryRemoveRecursive(zookeeper_path + "/snapshots/" + snapshot);
-    }
-
-    Strings log_entry_names = current_zookeeper->getChildren(zookeeper_path + "/log");
-    std::sort(log_entry_names.begin(), log_entry_names.end());
-    auto still_useful_log = std::upper_bound(log_entry_names.begin(), log_entry_names.end(), *still_useful);
-    log_entry_names.erase(still_useful_log, log_entry_names.end());
-    for (const String & log_entry_name : log_entry_names)
-    {
-        String log_entry_path = zookeeper_path + "/log/" + log_entry_name;
-        current_zookeeper->tryRemove(log_entry_path);
-    }
-}
-
-void DatabaseReplicated::onExecutedLogEntry(const String & /*entry_name*/, const ZooKeeperPtr & /*zookeeper*/)
-{
-
-}
-
-void DatabaseReplicated::writeLastExecutedToDiskAndZK()
-{
-    auto current_zookeeper = getZooKeeper();
-    current_zookeeper->createOrUpdate(
-        zookeeper_path + "/replicas/" + replica_name, last_executed_log_entry, zkutil::CreateMode::Persistent);
-
-    String metadata_file = getMetadataPath() + ".last_entry";
-    WriteBufferFromFile out(metadata_file, last_executed_log_entry.size(), O_WRONLY | O_CREAT);
-    writeString(last_executed_log_entry, out);
-    out.next();
-    if (global_context.getSettingsRef().fsync_metadata)
-        out.sync();
-    out.close();
-}
-
 
 BlockIO DatabaseReplicated::propose(const ASTPtr & query)
 {
@@ -302,14 +233,14 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query)
 
     //FIXME need list of all replicas, we can obtain it from zk
     Strings hosts_to_wait;
-    hosts_to_wait.emplace_back(shard_name + '|' +replica_name);
-    auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, global_context);
+    hosts_to_wait.emplace_back(getFullReplicaName());
+    auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, global_context, hosts_to_wait);
     io.in = std::move(stream);
     return io;
 }
 
 
-void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 from_snapshot, bool /*create*/)
+void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 from_snapshot)
 {
     LOG_WARNING(log, "Will recover replica");
 
@@ -339,14 +270,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
         InterpreterCreateQuery(query_ast, query_context).execute();
     }
 
-    //if (create)
-    //    return;
-
     current_zookeeper->set(replica_path + "/log_ptr", toString(from_snapshot));
-    last_executed_log_entry = from_snapshot;
-    //ddl_worker->setLogPointer(from_snapshot); //FIXME
-
-    //writeLastExecutedToDiskAndZK();
 }
 
 ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query)
@@ -384,4 +308,80 @@ void DatabaseReplicated::shutdown()
     DatabaseAtomic::shutdown();
 }
 
+
+void DatabaseReplicated::dropTable(const Context & context, const String & table_name, bool no_delay)
+{
+    auto txn = context.getMetadataTransaction();
+    //assert(!ddl_worker->isCurrentlyActive() || txn /*|| called from DROP DATABASE */);
+    if (txn && txn->is_initial_query)
+    {
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_name);
+        txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+    }
+    DatabaseAtomic::dropTable(context, table_name, no_delay);
+}
+
+void DatabaseReplicated::renameTable(const Context & context, const String & table_name, IDatabase & to_database,
+                                     const String & to_table_name, bool exchange, bool dictionary)
+{
+    auto txn = context.getMetadataTransaction();
+    assert(txn);
+
+    if (txn->is_initial_query)
+    {
+        String statement;
+        String statement_to;
+        {
+            //FIXME It's not atomic (however we have only one thread)
+            ReadBufferFromFile in(getObjectMetadataPath(table_name), 4096);
+            readStringUntilEOF(statement, in);
+            if (exchange)
+            {
+                ReadBufferFromFile in_to(to_database.getObjectMetadataPath(to_table_name), 4096);
+                readStringUntilEOF(statement_to, in_to);
+            }
+        }
+        String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_name);
+        String metadata_zk_path_to = txn->zookeeper_path + "/metadata/" + escapeForFileName(to_table_name);
+        txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+        if (exchange)
+        {
+            txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path_to, -1));
+            txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path, statement_to, zkutil::CreateMode::Persistent));
+        }
+        txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path_to, statement, zkutil::CreateMode::Persistent));
+    }
+
+    DatabaseAtomic::renameTable(context, table_name, to_database, to_table_name, exchange, dictionary);
+}
+
+void DatabaseReplicated::commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
+                       const String & table_metadata_tmp_path, const String & table_metadata_path,
+                       const Context & query_context)
+{
+    auto txn = query_context.getMetadataTransaction();
+    assert(!ddl_worker->isCurrentlyActive() || txn);
+    if (txn && txn->is_initial_query)
+    {
+        String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(query.table);
+        String statement = getObjectDefinitionFromCreateQuery(query.clone());
+        /// zk::multi(...) will throw if `metadata_zk_path` exists
+        txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path, statement, zkutil::CreateMode::Persistent));
+    }
+    DatabaseAtomic::commitCreateTable(query, table, table_metadata_tmp_path, table_metadata_path, query_context);
+}
+
+void DatabaseReplicated::commitAlterTable(const StorageID & table_id,
+                                          const String & table_metadata_tmp_path, const String & table_metadata_path,
+                                          const String & statement, const Context & query_context)
+{
+    auto txn = query_context.getMetadataTransaction();
+    if (txn && txn->is_initial_query)
+    {
+        String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_id.table_name);
+        txn->ops.emplace_back(zkutil::makeSetRequest(metadata_zk_path, statement, -1));
+    }
+    DatabaseAtomic::commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path, statement, query_context);
+}
+
 }
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index d6cd93773cf..8085c234af4 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -46,6 +46,16 @@ public:
 
     ~DatabaseReplicated() override;
 
+    void dropTable(const Context &, const String & table_name, bool no_delay) override;
+    void renameTable(const Context & context, const String & table_name, IDatabase & to_database,
+                     const String & to_table_name, bool exchange, bool dictionary) override;
+    void commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
+                           const String & table_metadata_tmp_path, const String & table_metadata_path,
+                           const Context & query_context) override;
+    void commitAlterTable(const StorageID & table_id,
+                          const String & table_metadata_tmp_path, const String & table_metadata_path,
+                          const String & statement, const Context & query_context) override;
+
     void drop(const Context & /*context*/) override;
 
     String getEngineName() const override { return "Replicated"; }
@@ -65,17 +75,8 @@ private:
     bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
     void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
 
-    //void runBackgroundLogExecutor();
-    void writeLastExecutedToDiskAndZK();
-
-    //void loadMetadataFromSnapshot();
-    void removeOutdatedSnapshotsAndLog();
-
-
     void onUnexpectedLogEntry(const String & entry_name, const ZooKeeperPtr & zookeeper);
-    void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 from_snapshot, bool create = false);
-
-    void onExecutedLogEntry(const String & entry_name, const ZooKeeperPtr & zookeeper);
+    void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 from_snapshot);
 
     ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query);
 
@@ -86,19 +87,9 @@ private:
 
     UInt32 log_entry_to_execute;
 
-    std::mutex log_name_mutex;
-    String log_name_to_exec_with_result;
-
-    int snapshot_period;
-
-    String last_executed_log_entry = "";
-
     zkutil::ZooKeeperPtr getZooKeeper() const;
 
     std::unique_ptr<DatabaseReplicatedDDLWorker> ddl_worker;
-
-
-
 };
 
 }
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index 869b888d3ad..29599d4d66d 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -96,19 +96,19 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
         throw Exception(ErrorCodes::LOGICAL_ERROR, "should be unreachable");
     }
 
-    auto error = task->tryParseEntry(node_data);
-    if (error)
-    {
-        LOG_ERROR(log, "Cannot parse query from '{}': {}", node_data, *error);
-        database->onUnexpectedLogEntry(entry_name, zookeeper);
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "should be unreachable");
-    }
+    task->entry.parse(node_data);
 
-    task->parseQueryFromEntry(context);
+    if (task->entry.query.empty())
+    {
+        //TODO better way to determine special entries
+        task->was_executed = true;
+    }
+    else
+    {
+        task->parseQueryFromEntry(context);
+    }
 
     return task;
 }
 
-
-
 }
diff --git a/src/Databases/ya.make b/src/Databases/ya.make
index 09d3dc38cb2..38f79532080 100644
--- a/src/Databases/ya.make
+++ b/src/Databases/ya.make
@@ -17,6 +17,7 @@ SRCS(
     DatabaseOnDisk.cpp
     DatabaseOrdinary.cpp
     DatabaseReplicated.cpp
+    DatabaseReplicatedWorker.cpp
     DatabaseWithDictionaries.cpp
     DatabasesCommon.cpp
     MySQL/ConnectionMySQLSettings.cpp
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 0bc98dfd0dd..9ef7352ceb4 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -86,20 +86,6 @@ void DDLLogEntry::parse(const String & data)
 }
 
 
-std::optional<String> DDLTaskBase::tryParseEntry(const String & data)
-{
-    std::optional<String> error;
-    try
-    {
-        entry.parse(data);
-    }
-    catch (...)
-    {
-        error = ExecutionStatus::fromCurrentException().serializeText();
-    }
-    return error;
-}
-
 void DDLTaskBase::parseQueryFromEntry(const Context & context)
 {
     const char * begin = entry.query.data();
@@ -313,22 +299,25 @@ std::unique_ptr<Context> DatabaseReplicatedTask::makeQueryContext(Context & from
     query_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY; //FIXME why do we need separate query kind?
     query_context->setCurrentDatabase(database->getDatabaseName());
 
+    auto txn = std::make_shared<MetadataTransaction>();
+    query_context->initMetadataTransaction(txn);
+    txn->current_zookeeper = from_context.getZooKeeper();
+    txn->zookeeper_path = database->zookeeper_path;
+    txn->is_initial_query = we_are_initiator;
+
     if (we_are_initiator)
     {
-        auto txn = std::make_shared<MetadataTransaction>();
-        query_context->initMetadataTransaction(txn);
-        txn->current_zookeeper = from_context.getZooKeeper();
-        txn->zookeeper_path = database->zookeeper_path;
         txn->ops.emplace_back(zkutil::makeRemoveRequest(entry_path + "/try", -1));
         txn->ops.emplace_back(zkutil::makeCreateRequest(entry_path + "/committed", host_id_str, zkutil::CreateMode::Persistent));
         txn->ops.emplace_back(zkutil::makeRemoveRequest(getActiveNodePath(), -1));
-        if (execute_on_leader)
-            txn->ops.emplace_back(zkutil::makeCreateRequest(getShardNodePath() + "/executed", host_id_str, zkutil::CreateMode::Persistent));
-        txn->ops.emplace_back(zkutil::makeCreateRequest(getFinishedNodePath(), execution_status.serializeText(), zkutil::CreateMode::Persistent));
-        txn->ops.emplace_back(zkutil::makeSetRequest(database->replica_path + "/log_ptr", toString(getLogEntryNumber(entry_name)), -1));
         txn->ops.emplace_back(zkutil::makeSetRequest(database->zookeeper_path + "/max_log_ptr", toString(getLogEntryNumber(entry_name)), -1));
     }
 
+    if (execute_on_leader)
+        txn->ops.emplace_back(zkutil::makeCreateRequest(getShardNodePath() + "/executed", host_id_str, zkutil::CreateMode::Persistent));
+    txn->ops.emplace_back(zkutil::makeCreateRequest(getFinishedNodePath(), execution_status.serializeText(), zkutil::CreateMode::Persistent));
+    txn->ops.emplace_back(zkutil::makeSetRequest(database->replica_path + "/log_ptr", toString(getLogEntryNumber(entry_name)), -1));
+
     return query_context;
 }
 
@@ -347,15 +336,9 @@ UInt32 DatabaseReplicatedTask::getLogEntryNumber(const String & log_entry_name)
     return parse<UInt32>(log_entry_name.substr(strlen(name)));
 }
 
-void DatabaseReplicatedTask::parseQueryFromEntry(const Context & context)
+void MetadataTransaction::commit()
 {
-    if (entry.query.empty())
-    {
-        was_executed = true;
-        return;
-    }
-
-    DDLTaskBase::parseQueryFromEntry(context);
+    current_zookeeper->multi(ops);
 }
 
 }
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 19d92a1bc78..2db1a696384 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -64,12 +64,6 @@ struct DDLTaskBase
     const String entry_name;
     const String entry_path;
 
-    DDLTaskBase(const String & name, const String & path) : entry_name(name), entry_path(path) {}
-    virtual ~DDLTaskBase() = default;
-
-    std::optional<String> tryParseEntry(const String & data);
-    virtual void parseQueryFromEntry(const Context & context);
-
     DDLLogEntry entry;
 
     String host_id_str;
@@ -81,6 +75,11 @@ struct DDLTaskBase
     ExecutionStatus execution_status;
     bool was_executed = false;
 
+    DDLTaskBase(const String & name, const String & path) : entry_name(name), entry_path(path) {}
+    virtual ~DDLTaskBase() = default;
+
+    void parseQueryFromEntry(const Context & context);
+
     virtual String getShardID() const = 0;
 
     virtual std::unique_ptr<Context> makeQueryContext(Context & from_context) const;
@@ -93,26 +92,12 @@ struct DDLTaskBase
 
 struct DDLTask : public DDLTaskBase
 {
-    /// Stages of task lifetime correspond ordering of these data fields:
-
     DDLTask(const String & name, const String & path) : DDLTaskBase(name, path) {}
 
     bool findCurrentHostID(const Context & global_context, Poco::Logger * log);
 
     void setClusterInfo(const Context & context, Poco::Logger * log);
 
-
-    /// Stage 2: resolve host_id and check that
-
-
-    /// Stage 3.1: parse query
-
-    /// Stage 3.2: check cluster and find the host in cluster
-
-    /// Stage 3.3: execute query
-
-    /// Stage 4: commit results to ZooKeeper
-
     String getShardID() const override;
 
 private:
@@ -131,8 +116,6 @@ struct DatabaseReplicatedTask : public DDLTaskBase
 {
     DatabaseReplicatedTask(const String & name, const String & path, DatabaseReplicated * database_);
 
-    void parseQueryFromEntry(const Context & context) override;
-
     String getShardID() const override;
     std::unique_ptr<Context> makeQueryContext(Context & from_context) const override;
 
@@ -148,14 +131,15 @@ struct MetadataTransaction
 {
     ZooKeeperPtr current_zookeeper;
     String zookeeper_path;
+    bool is_initial_query;
     Coordination::Requests ops;
 
-
-
     void addOps(Coordination::Requests & other_ops)
     {
         std::move(ops.begin(), ops.end(), std::back_inserter(other_ops));
     }
+
+    void commit();
 };
 
 }
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 0399687a4d8..12f4c42b467 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -36,11 +36,8 @@ namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
     extern const int LOGICAL_ERROR;
-    extern const int INCONSISTENT_CLUSTER_DEFINITION;
     extern const int TIMEOUT_EXCEEDED;
-    extern const int UNKNOWN_TYPE_OF_QUERY;
     extern const int UNFINISHED;
-    extern const int QUERY_IS_PROHIBITED;
 }
 
 
@@ -226,7 +223,6 @@ void DDLWorker::recoverZooKeeper()
     }
 }
 
-
 DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper)
 {
     String node_data;
@@ -241,36 +237,50 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
         return {};
     }
 
-    auto error = task->tryParseEntry(node_data);
-    if (error)
+    auto write_error_status = [&](const String & host_id, const String & error_message, const String & reason)
+    {
+        LOG_ERROR(log, "Cannot parse DDL task {}: {}. Will try to send error status: {}", entry_name, reason, error_message);
+        createStatusDirs(entry_path, zookeeper);
+        zookeeper->tryCreate(entry_path + "/finished/" + host_id, error_message, zkutil::CreateMode::Persistent);
+    };
+
+    try
+    {
+        /// Stage 1: parse entry
+        task->entry.parse(node_data);
+    }
+    catch (...)
     {
         /// What should we do if we even cannot parse host name and therefore cannot properly submit execution status?
         /// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be successful.
         /// Otherwise, that node will be ignored by DDLQueryStatusInputStream.
-        LOG_ERROR(log, "Cannot parse DDL task {}, will try to send error status: {}", entry_name, *error);
-        try
-        {
-            createStatusDirs(entry_path, zookeeper);
-            zookeeper->tryCreate(entry_path + "/finished/" + host_fqdn_id, *error, zkutil::CreateMode::Persistent);
-        }
-        catch (...)
-        {
-            tryLogCurrentException(log, "Can't report the task has invalid format");
-        }
-
         out_reason = "Incorrect task format";
+        write_error_status(host_fqdn_id, ExecutionStatus::fromCurrentException().serializeText(), out_reason);
         return {};
     }
 
+    /// Stage 2: resolve host_id and check if we should execute query or not
     if (!task->findCurrentHostID(context, log))
     {
         out_reason = "There is no a local address in host list";
         return {};
     }
 
-    task->parseQueryFromEntry(context);
-    task->setClusterInfo(context, log);
+    try
+    {
+        /// Stage 3.1: parse query
+        task->parseQueryFromEntry(context);
+        /// Stage 3.2: check cluster and find the host in cluster
+        task->setClusterInfo(context, log);
+    }
+    catch (...)
+    {
+        out_reason = "Cannot parse query or obtain cluster info";
+        write_error_status(task->host_id_str, ExecutionStatus::fromCurrentException().serializeText(), out_reason);
+        return {};
+    }
 
+    /// Now task is ready for execution
     return task;
 }
 
@@ -330,7 +340,8 @@ void DDLWorker::scheduleTasks()
             }
             else
             {
-                worker_pool.scheduleOrThrowOnError([this, task_ptr = task.release()]() {
+                worker_pool.scheduleOrThrowOnError([this, task_ptr = task.release()]()
+                {
                     setThreadName("DDLWorkerExec");
                     enqueueTask(DDLTaskPtr(task_ptr));
                 });
@@ -345,13 +356,6 @@ void DDLWorker::scheduleTasks()
     }
 }
 
-/// Parses query and resolves cluster and host in cluster
-void DDLWorker::parseQueryAndResolveHost(DDLTaskBase & /*task*/)
-{
-
-}
-
-
 bool DDLWorker::tryExecuteQuery(const String & query, const DDLTaskBase & task, ExecutionStatus & status)
 {
     /// Add special comment at the start of query to easily identify DDL-produced queries in query_log
@@ -792,7 +796,6 @@ void DDLWorker::runMainThread()
     setThreadName("DDLWorker");
     LOG_DEBUG(log, "Started DDLWorker thread");
 
-    bool initialized = false;
     do
     {
         try
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 39087d05fbb..02076ae1df1 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -53,6 +53,8 @@ public:
 
     void shutdown();
 
+    bool isCurrentlyActive() const { return initialized && !stop_flag; }
+
 protected:
 
     /// Returns cached ZooKeeper session (possibly expired).
@@ -87,8 +89,6 @@ protected:
         const String & node_path,
         const ZooKeeperPtr & zookeeper);
 
-    void parseQueryAndResolveHost(DDLTaskBase & task);
-
     bool tryExecuteQuery(const String & query, const DDLTaskBase & task, ExecutionStatus & status);
 
     /// Checks and cleanups queue's nodes
@@ -121,6 +121,7 @@ protected:
 
     std::shared_ptr<Poco::Event> queue_updated_event = std::make_shared<Poco::Event>();
     std::shared_ptr<Poco::Event> cleanup_event = std::make_shared<Poco::Event>();
+    std::atomic<bool> initialized = false;
     std::atomic<bool> stop_flag = false;
 
     ThreadFromGlobalPool main_thread;
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 8d695b29793..f79eb800b66 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -731,7 +731,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
 
     //TODO make code better if possible
     bool need_add_to_database = !create.temporary;
-    if(need_add_to_database && database->getEngineName() == "Replicated")
+    if (need_add_to_database && database->getEngineName() == "Replicated")
     {
         auto guard = DatabaseCatalog::instance().getDDLGuard(create.database, create.table);
         database = DatabaseCatalog::instance().getDatabase(create.database);
diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
index 03065245766..24405a5be27 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.cpp
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -23,6 +23,7 @@ namespace ErrorCodes
     extern const int TIMEOUT_EXCEEDED;
     extern const int UNFINISHED;
     extern const int QUERY_IS_PROHIBITED;
+    extern const int LOGICAL_ERROR;
 }
 
 bool isSupportedAlterType(int type)
@@ -189,6 +190,7 @@ DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path
     if (hosts_to_wait)
     {
         waiting_hosts = NameSet(hosts_to_wait->begin(), hosts_to_wait->end());
+        by_hostname = false;
     }
     else
     {
@@ -267,7 +269,15 @@ Block DDLQueryStatusInputStream::readImpl()
                     status.tryDeserializeText(status_data);
             }
 
-            auto [host, port] = Cluster::Address::fromString(host_id);
+            //FIXME
+            String host = host_id;
+            UInt16 port = 0;
+            if (by_hostname)
+            {
+                auto host_and_port = Cluster::Address::fromString(host_id);
+                host = host_and_port.first;
+                port = host_and_port.second;
+            }
 
             if (status.code != 0 && first_exception == nullptr)
                 first_exception = std::make_unique<Exception>(status.code, "There was an error on [{}:{}]: {}", host, port, status.message);
diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h
index 0f7a411ed92..f65abf33c4f 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.h
+++ b/src/Interpreters/executeDDLQueryOnCluster.h
@@ -61,6 +61,7 @@ private:
     std::unique_ptr<Exception> first_exception;
 
     Int64 timeout_seconds = 120;
+    bool by_hostname = true;
 };
 
 }
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 8c5a25b3fe7..f99f4517e5a 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -90,6 +90,7 @@ def test_create_replica_after_delay(started_cluster, engine):
 
     assert_create_query([main_node, dummy_node, competing_node], name, expected)
 
+@pytest.mark.dependency(depends=['test_create_replica_after_delay'])
 def test_alters_from_different_replicas(started_cluster):
     main_node.query("CREATE TABLE testdb.concurrent_test "
                     "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
@@ -138,13 +139,13 @@ def test_replica_restart(started_cluster):
     assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
 
 
-@pytest.mark.dependency(depends=['test_create_replica_after_delay'])
+@pytest.mark.dependency(depends=['test_replica_restart'])
 def test_snapshot_and_snapshot_recover(started_cluster):
-    #FIXME bad test
     snapshotting_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica4');")
-    time.sleep(5)
     snapshot_recovering_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica5');")
-    time.sleep(5)
+
+    assert_eq_with_retry(snapshotting_node, "select count() from system.tables where name like 'alter_test_%'", "2\n")
+    assert_eq_with_retry(snapshot_recovering_node, "select count() from system.tables where name like 'alter_test_%'", "2\n")
     assert snapshotting_node.query("desc table testdb.alter_test_MergeTree") == snapshot_recovering_node.query("desc table testdb.alter_test_MergeTree")
     assert snapshotting_node.query("desc table testdb.alter_test_ReplicatedMergeTree") == snapshot_recovering_node.query("desc table testdb.alter_test_ReplicatedMergeTree")
 

From c955542dce00478321a424e05f0ef777dfcc00e2 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Mon, 30 Nov 2020 23:22:25 +0300
Subject: [PATCH 0064/1238] run functional tests with Replicated engine

---
 src/Interpreters/InterpreterCreateQuery.cpp   | 10 +++++++++-
 src/Interpreters/executeDDLQueryOnCluster.cpp |  7 ++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index f79eb800b66..0b7fb3e5431 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -132,7 +132,15 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
         bool old_style_database = context.getSettingsRef().default_database_engine.value == DefaultDatabaseEngine::Ordinary;
         auto engine = std::make_shared<ASTFunction>();
         auto storage = std::make_shared<ASTStorage>();
-        engine->name = old_style_database ? "Ordinary" : "Atomic";
+        //FIXME revert it before merge
+        engine->name = "Atomic";
+        if (old_style_database)
+        {
+            engine = makeASTFunction("Replicated",
+                                     std::make_shared<ASTLiteral>(fmt::format("/clickhouse/db/{}/", create.database)),
+                                     std::make_shared<ASTLiteral>("s1"),
+                                     std::make_shared<ASTLiteral>("r1"));
+        }
         storage->set(storage->engine, engine);
         create.set(create.storage, storage);
     }
diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
index 24405a5be27..0b44206a2b2 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.cpp
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -294,7 +294,12 @@ Block DDLQueryStatusInputStream::readImpl()
         res = sample.cloneWithColumns(std::move(columns));
     }
 
-    return res;
+    //FIXME revert it before merge
+    bool is_functional_tests = !by_hostname && context.getSettingsRef().default_database_engine.value == DefaultDatabaseEngine::Ordinary;
+    if (is_functional_tests)
+        return {};
+    else
+        return res;
 }
 
 Strings DDLQueryStatusInputStream::getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)

From 1a4bd67736df1fdaec41df52bb4ca9d6ea5c4f81 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Tue, 1 Dec 2020 20:20:42 +0300
Subject: [PATCH 0065/1238] fixes

---
 src/Common/ZooKeeper/TestKeeper.cpp           |  8 ++++----
 src/Databases/DatabaseReplicated.cpp          |  1 +
 src/Interpreters/Context.cpp                  |  1 +
 src/Interpreters/DDLWorker.cpp                | 16 +++++++++++++---
 src/Interpreters/DDLWorker.h                  |  1 +
 src/Interpreters/InterpreterCreateQuery.cpp   |  5 ++++-
 src/Interpreters/executeDDLQueryOnCluster.cpp |  4 ++++
 7 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp
index 5f34a60c34e..2d89228c7ae 100644
--- a/src/Common/ZooKeeper/TestKeeper.cpp
+++ b/src/Common/ZooKeeper/TestKeeper.cpp
@@ -213,10 +213,11 @@ std::pair<ResponsePtr, Undo> TestKeeperCreateRequest::process(TestKeeper::Contai
             created_node.is_sequental = is_sequential;
             std::string path_created = path;
 
+            ++it->second.seq_num;
+
             if (is_sequential)
             {
                 auto seq_num = it->second.seq_num;
-                ++it->second.seq_num;
 
                 std::stringstream seq_num_str;      // STYLE_CHECK_ALLOW_STD_STRING_STREAM
                 seq_num_str.exceptions(std::ios::failbit);
@@ -228,15 +229,14 @@ std::pair<ResponsePtr, Undo> TestKeeperCreateRequest::process(TestKeeper::Contai
             response.path_created = path_created;
             container.emplace(path_created, std::move(created_node));
 
-            undo = [&container, path_created, is_sequential = is_sequential, parent_path = it->first]
+            undo = [&container, path_created, parent_path = it->first]
             {
                 container.erase(path_created);
                 auto & undo_parent = container.at(parent_path);
                 --undo_parent.stat.cversion;
                 --undo_parent.stat.numChildren;
 
-                if (is_sequential)
-                    --undo_parent.seq_num;
+                --undo_parent.seq_num;
             };
 
             ++it->second.stat.cversion;
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 418eaf567a4..a7e6c11ca4c 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -170,6 +170,7 @@ void DatabaseReplicated::loadStoredObjects(Context & context, bool has_force_res
     DatabaseAtomic::loadStoredObjects(context, has_force_restore_data_flag, force_attach);
 
     ddl_worker = std::make_unique<DatabaseReplicatedDDLWorker>(this, global_context);
+    ddl_worker->startup();
 }
 
 void DatabaseReplicated::onUnexpectedLogEntry(const String & entry_name, const ZooKeeperPtr & zookeeper)
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 27deb07d296..ef19c134854 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1487,6 +1487,7 @@ void Context::setDDLWorker(std::unique_ptr<DDLWorker> ddl_worker)
     auto lock = getLock();
     if (shared->ddl_worker)
         throw Exception("DDL background thread has already been initialized", ErrorCodes::LOGICAL_ERROR);
+    ddl_worker->startup();
     shared->ddl_worker = std::move(ddl_worker);
 }
 
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 12f4c42b467..188d38b8647 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -167,7 +167,10 @@ DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, const Cont
 
     host_fqdn = getFQDNOrHostName();
     host_fqdn_id = Cluster::Address::toString(host_fqdn, context.getTCPPort());
+}
 
+void DDLWorker::startup()
+{
     main_thread = ThreadFromGlobalPool(&DDLWorker::runMainThread, this);
     cleanup_thread = ThreadFromGlobalPool(&DDLWorker::runCleanupThread, this);
 }
@@ -183,8 +186,10 @@ DDLWorker::~DDLWorker()
 {
     shutdown();
     worker_pool.wait();
-    main_thread.join();
-    cleanup_thread.join();
+    if (main_thread.joinable())
+        main_thread.join();
+    if (cleanup_thread.joinable())
+        cleanup_thread.join();
 }
 
 
@@ -421,7 +426,12 @@ void DDLWorker::enqueueTask(DDLTaskPtr task_ptr)
             else if (e.code == Coordination::Error::ZNONODE)
             {
                 LOG_ERROR(log, "ZooKeeper error: {}", getCurrentExceptionMessage(true));
-                // TODO: retry?
+                if (!current_zookeeper->exists(task_ptr->entry_path))
+                {
+                    //FIXME race condition with cleanup thread
+                    LOG_ERROR(log, "Task {} is lost. It probably was removed by other server.", task_ptr->entry_path);
+                    return;
+                }
             }
             else
             {
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 02076ae1df1..f41ca0fce8f 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -51,6 +51,7 @@ public:
         return host_fqdn_id;
     }
 
+    void startup();
     void shutdown();
 
     bool isCurrentlyActive() const { return initialized && !stop_flag; }
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 0b7fb3e5431..f201e38be2e 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -136,7 +136,10 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
         engine->name = "Atomic";
         if (old_style_database)
         {
-            engine = makeASTFunction("Replicated",
+            if (database_name == "test")
+                engine->name = "Ordinary";      // for stateful tests
+            else
+                engine = makeASTFunction("Replicated",
                                      std::make_shared<ASTLiteral>(fmt::format("/clickhouse/db/{}/", create.database)),
                                      std::make_shared<ASTLiteral>("s1"),
                                      std::make_shared<ASTLiteral>("r1"));
diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
index 0b44206a2b2..2ca07349cbc 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.cpp
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -201,6 +201,10 @@ DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path
     addTotalRowsApprox(waiting_hosts.size());
 
     timeout_seconds = context.getSettingsRef().distributed_ddl_task_timeout;
+
+    //FIXME revert it before merge
+    if (context.getSettingsRef().default_database_engine.value == DefaultDatabaseEngine::Ordinary)
+        timeout_seconds = 10;
 }
 
 Block DDLQueryStatusInputStream::readImpl()

From 39532f7d9e47204a499ffa9200b91eaae9763aae Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Thu, 3 Dec 2020 21:14:27 +0300
Subject: [PATCH 0066/1238] slightly better DDLWorker initialization and
 restarting

---
 src/Common/ZooKeeper/TestKeeper.cpp        |   4 +-
 src/Databases/DatabaseAtomic.cpp           |   3 -
 src/Databases/DatabaseReplicatedWorker.cpp |  32 +++-
 src/Databases/DatabaseReplicatedWorker.h   |   3 +-
 src/Interpreters/DDLTask.h                 |   2 +
 src/Interpreters/DDLWorker.cpp             | 187 ++++++++-------------
 src/Interpreters/DDLWorker.h               |  15 +-
 7 files changed, 114 insertions(+), 132 deletions(-)

diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp
index 2d89228c7ae..86387417a3c 100644
--- a/src/Common/ZooKeeper/TestKeeper.cpp
+++ b/src/Common/ZooKeeper/TestKeeper.cpp
@@ -213,8 +213,6 @@ std::pair<ResponsePtr, Undo> TestKeeperCreateRequest::process(TestKeeper::Contai
             created_node.is_sequental = is_sequential;
             std::string path_created = path;
 
-            ++it->second.seq_num;
-
             if (is_sequential)
             {
                 auto seq_num = it->second.seq_num;
@@ -226,6 +224,8 @@ std::pair<ResponsePtr, Undo> TestKeeperCreateRequest::process(TestKeeper::Contai
                 path_created += seq_num_str.str();
             }
 
+            ++it->second.seq_num;
+
             response.path_created = path_created;
             container.emplace(path_created, std::move(created_node));
 
diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index b60adf44e51..438fa2d97bd 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -11,10 +11,7 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/ExternalDictionariesLoader.h>
 #include <filesystem>
-
-//FIXME it shouldn't be here
 #include <Interpreters/DDLTask.h>
-#include <Common/ZooKeeper/ZooKeeper.h>
 
 namespace DB
 {
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index 29599d4d66d..0c2368cdcf6 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -17,7 +17,26 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db
     /// Pool size must be 1 (to avoid reordering of log entries)
 }
 
-void DatabaseReplicatedDDLWorker::initialize()
+void DatabaseReplicatedDDLWorker::initializeMainThread()
+{
+    do
+    {
+        try
+        {
+            auto zookeeper = getAndSetZooKeeper();
+            initializeReplication();
+            initialized = true;
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, fmt::format("Error on initialization of {}", database->getDatabaseName()));
+            sleepForSeconds(5);
+        }
+    }
+    while (!initialized && !stop_flag);
+}
+
+void DatabaseReplicatedDDLWorker::initializeReplication()
 {
     /// Check if we need to recover replica.
     /// Invariant: replica is lost if it's log_ptr value is less then min_log_ptr value.
@@ -101,11 +120,16 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
     if (task->entry.query.empty())
     {
         //TODO better way to determine special entries
-        task->was_executed = true;
+        out_reason = "It's dummy task";
+        return {};
     }
-    else
+
+    task->parseQueryFromEntry(context);
+
+    if (zookeeper->exists(task->getFinishedNodePath()))
     {
-        task->parseQueryFromEntry(context);
+        out_reason = "Task has been already processed";
+        return {};
     }
 
     return task;
diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h
index d190bd1795d..7994104331e 100644
--- a/src/Databases/DatabaseReplicatedWorker.h
+++ b/src/Databases/DatabaseReplicatedWorker.h
@@ -15,7 +15,8 @@ public:
     String enqueueQuery(DDLLogEntry & entry) override;
 
 private:
-    void initialize() override;
+    void initializeMainThread() override;
+    void initializeReplication();
 
     DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper) override;
 
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 2db1a696384..94127b39b84 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -76,6 +76,8 @@ struct DDLTaskBase
     bool was_executed = false;
 
     DDLTaskBase(const String & name, const String & path) : entry_name(name), entry_path(path) {}
+    DDLTaskBase(const DDLTaskBase &) = delete;
+    DDLTaskBase(DDLTaskBase &&) = default;
     virtual ~DDLTaskBase() = default;
 
     void parseQueryFromEntry(const Context & context);
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 188d38b8647..e4ea5f8db17 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -143,9 +143,14 @@ DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, const Cont
                      const String & logger_name)
     : context(context_)
     , log(&Poco::Logger::get(logger_name))
-    , pool_size(pool_size_)     //FIXME make it optional
-    , worker_pool(pool_size_)
+    , pool_size(pool_size_)
 {
+    if (1 < pool_size)
+    {
+        LOG_WARNING(log, "DDLWorker is configured to use multiple threads. "
+                         "It's not recommended because queries can be reordered. Also it may cause some unknown issues to appear.");
+        worker_pool.emplace(pool_size);
+    }
     queue_dir = zk_root_dir;
     if (queue_dir.back() == '/')
         queue_dir.resize(queue_dir.size() - 1);
@@ -185,7 +190,8 @@ void DDLWorker::shutdown()
 DDLWorker::~DDLWorker()
 {
     shutdown();
-    worker_pool.wait();
+    if (worker_pool)
+        worker_pool->wait();
     if (main_thread.joinable())
         main_thread.join();
     if (cleanup_thread.joinable())
@@ -209,24 +215,6 @@ ZooKeeperPtr DDLWorker::getAndSetZooKeeper()
     return current_zookeeper;
 }
 
-void DDLWorker::recoverZooKeeper()
-{
-    LOG_DEBUG(log, "Recovering ZooKeeper session after: {}", getCurrentExceptionMessage(false));
-
-    while (!stop_flag)
-    {
-        try
-        {
-            getAndSetZooKeeper();
-            break;
-        }
-        catch (...)
-        {
-            tryLogCurrentException(__PRETTY_FUNCTION__);
-            sleepForSeconds(5);
-        }
-    }
-}
 
 DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper)
 {
@@ -285,6 +273,12 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
         return {};
     }
 
+    if (zookeeper->exists(task->getFinishedNodePath()))
+    {
+        out_reason = "Task has been already processed";
+        return {};
+    }
+
     /// Now task is ready for execution
     return task;
 }
@@ -309,11 +303,11 @@ void DDLWorker::scheduleTasks()
         return;
     }
 
-    bool server_startup = !last_entry_name.has_value();
+    bool server_startup = current_tasks.empty();
 
     auto begin_node = server_startup
         ? queue_nodes.begin()
-        : std::upper_bound(queue_nodes.begin(), queue_nodes.end(), *last_entry_name);
+        : std::upper_bound(queue_nodes.begin(), queue_nodes.end(), current_tasks.back()->entry_name);
 
     for (auto it = begin_node; it != queue_nodes.end() && !stop_flag; ++it)
     {
@@ -325,42 +319,39 @@ void DDLWorker::scheduleTasks()
         if (!task)
         {
             LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason);
-            last_entry_name = entry_name;
+            task->was_executed = true;
+            saveTask(std::move(task)); //FIXME questionable
             continue;
         }
 
-        bool already_processed = zookeeper->exists(task->entry_path + "/finished/" + task->host_id_str);
-        if (!server_startup && !task->was_executed && already_processed)
-        {
-            throw Exception(ErrorCodes::LOGICAL_ERROR,
-                "Server expects that DDL task {} should be processed, but it was already processed according to ZK",
-                entry_name);
-        }
+        auto & saved_task = saveTask(std::move(task));
 
-        if (!already_processed)
+        if (worker_pool)
         {
-            if (pool_size == 1)
+            worker_pool->scheduleOrThrowOnError([this, &saved_task]()
             {
-                enqueueTask(DDLTaskPtr(task.release()));
-            }
-            else
-            {
-                worker_pool.scheduleOrThrowOnError([this, task_ptr = task.release()]()
-                {
-                    setThreadName("DDLWorkerExec");
-                    enqueueTask(DDLTaskPtr(task_ptr));
-                });
-            }
+                setThreadName("DDLWorkerExec");
+                processTask(saved_task);
+            });
         }
         else
         {
-            LOG_DEBUG(log, "Task {} ({}) has been already processed", entry_name, task->entry.query);
+            processTask(saved_task);
         }
-
-        last_entry_name = entry_name;
     }
 }
 
+DDLTaskBase & DDLWorker::saveTask(DDLTaskPtr && task)
+{
+    if (current_tasks.size() == pool_size)
+    {
+        assert(current_tasks.front()->was_executed);
+        current_tasks.pop_front();
+    }
+    current_tasks.emplace_back(std::move(task));
+    return *current_tasks.back();
+}
+
 bool DDLWorker::tryExecuteQuery(const String & query, const DDLTaskBase & task, ExecutionStatus & status)
 {
     /// Add special comment at the start of query to easily identify DDL-produced queries in query_log
@@ -404,48 +395,6 @@ void DDLWorker::attachToThreadGroup()
     }
 }
 
-
-void DDLWorker::enqueueTask(DDLTaskPtr task_ptr)
-{
-    auto & task = *task_ptr;
-
-    while (!stop_flag)
-    {
-        try
-        {
-            processTask(task);
-            return;
-        }
-        /// TODO recover zk in runMainThread(...) and retry task (why do we need another place where session is recovered?)
-        catch (const Coordination::Exception & e)
-        {
-            if (Coordination::isHardwareError(e.code))
-            {
-                recoverZooKeeper();
-            }
-            else if (e.code == Coordination::Error::ZNONODE)
-            {
-                LOG_ERROR(log, "ZooKeeper error: {}", getCurrentExceptionMessage(true));
-                if (!current_zookeeper->exists(task_ptr->entry_path))
-                {
-                    //FIXME race condition with cleanup thread
-                    LOG_ERROR(log, "Task {} is lost. It probably was removed by other server.", task_ptr->entry_path);
-                    return;
-                }
-            }
-            else
-            {
-                LOG_ERROR(log, "Unexpected ZooKeeper error: {}.", getCurrentExceptionMessage(true));
-                return;
-            }
-        }
-        catch (...)
-        {
-            LOG_WARNING(log, "An error occurred while processing task {} ({}) : {}", task.entry_name, task.entry.query, getCurrentExceptionMessage(true));
-        }
-    }
-}
-
 void DDLWorker::processTask(DDLTaskBase & task)
 {
     auto zookeeper = tryGetZooKeeper();
@@ -458,22 +407,16 @@ void DDLWorker::processTask(DDLTaskBase & task)
     String dummy;
     auto code = zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy);
 
-    if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS)
-    {
-        // Ok
-    }
-    else if (code == Coordination::Error::ZNONODE)
+    if (code == Coordination::Error::ZNONODE)
     {
         /// There is no parent
-        //TODO why not to create parent before active_node?
         createStatusDirs(task.entry_path, zookeeper);
-        if (Coordination::Error::ZOK != zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy))
-            throw Coordination::Exception(code, active_node_path);
+        zookeeper->create(active_node_path, "", zkutil::CreateMode::Ephemeral);
     }
     else
         throw Coordination::Exception(code, active_node_path);
 
-    if (!task.was_executed)
+    if (!task.was_executed)     // FIXME always true
     {
         try
         {
@@ -513,6 +456,9 @@ void DDLWorker::processTask(DDLTaskBase & task)
     }
 
     /// FIXME: if server fails right here, the task will be executed twice. We need WAL here.
+    /// Another possible issue: if ZooKeeper session is lost here, we will recover connection and execute the task second time.
+
+
 
     /// Delete active flag and create finish flag
     Coordination::Requests ops;
@@ -787,7 +733,9 @@ String DDLWorker::enqueueQuery(DDLLogEntry & entry)
 
     String node_path = zookeeper->create(query_path_prefix, entry.toString(), zkutil::CreateMode::PersistentSequential);
 
-    /// Optional step
+    /// We cannot create status dirs in a single transaction with previous request,
+    /// because we don't know node_path until previous request is executed.
+    /// Se we try to create status dirs here or later when we will execute entry.
     try
     {
         createStatusDirs(node_path, zookeeper);
@@ -801,70 +749,80 @@ String DDLWorker::enqueueQuery(DDLLogEntry & entry)
 }
 
 
-void DDLWorker::runMainThread()
+void DDLWorker::initializeMainThread()
 {
-    setThreadName("DDLWorker");
-    LOG_DEBUG(log, "Started DDLWorker thread");
-
     do
     {
         try
         {
             auto zookeeper = getAndSetZooKeeper();
             zookeeper->createAncestors(queue_dir + "/");
-            initialize();
             initialized = true;
         }
         catch (const Coordination::Exception & e)
         {
             if (!Coordination::isHardwareError(e.code))
-                throw;  /// A logical error.
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected ZooKeeper error: {}", e.message());
 
             tryLogCurrentException(__PRETTY_FUNCTION__);
 
             /// Avoid busy loop when ZooKeeper is not available.
-            sleepForSeconds(1);
+            sleepForSeconds(5);
         }
         catch (...)
         {
-            tryLogCurrentException(log, "Terminating. Cannot initialize DDL queue.");
-            return;
+            tryLogCurrentException(log, "Cannot initialize main thread of DDLWorker, will try again");
+            sleepForSeconds(5);
         }
     }
     while (!initialized && !stop_flag);
+}
+
+void DDLWorker::runMainThread()
+{
+    setThreadName("DDLWorker");
+    attachToThreadGroup();
+    LOG_DEBUG(log, "Starting DDLWorker thread");
 
     while (!stop_flag)
     {
         try
         {
-            attachToThreadGroup();
+            /// Reinitialize DDLWorker state (including ZooKeeper connection) if required
+            if (!initialized)
+            {
+                initializeMainThread();
+                LOG_DEBUG(log, "Initialized DDLWorker thread");
+            }
 
             cleanup_event->set();
             scheduleTasks();
 
-            LOG_DEBUG(log, "Waiting a watch");
+            LOG_DEBUG(log, "Waiting for queue updates");
             queue_updated_event->wait();
         }
         catch (const Coordination::Exception & e)
         {
             if (Coordination::isHardwareError(e.code))
             {
-                recoverZooKeeper();
+                initialized = false;
             }
             else if (e.code == Coordination::Error::ZNONODE)
             {
+                // TODO add comment: when it happens and why it's expected?
+                // maybe because cleanup thread may remove nodes inside queue entry which are currently processed
                 LOG_ERROR(log, "ZooKeeper error: {}", getCurrentExceptionMessage(true));
             }
             else
             {
-                LOG_ERROR(log, "Unexpected ZooKeeper error: {}. Terminating.", getCurrentExceptionMessage(true));
-                return;
+                LOG_ERROR(log, "Unexpected ZooKeeper error: {}.", getCurrentExceptionMessage(true));
+                assert(false);
             }
         }
         catch (...)
         {
-            tryLogCurrentException(log, "Unexpected error, will terminate:");
-            return;
+            tryLogCurrentException(log, "Unexpected error, will try to restart main thread:");
+            initialized = false;
         }
     }
 }
@@ -891,6 +849,7 @@ void DDLWorker::runCleanupThread()
                 continue;
             }
 
+            /// ZooKeeper connection is recovered by main thread. We will wait for it on cleanup_event.
             auto zookeeper = tryGetZooKeeper();
             if (zookeeper->expired())
                 continue;
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index f41ca0fce8f..78921fa60e3 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -62,17 +62,16 @@ protected:
     ZooKeeperPtr tryGetZooKeeper() const;
     /// If necessary, creates a new session and caches it.
     ZooKeeperPtr getAndSetZooKeeper();
-    /// ZooKeeper recover loop (while not stopped).
-    void recoverZooKeeper();
 
-    void checkCurrentTasks();
+    /// Iterates through queue tasks in ZooKeeper, runs execution of new tasks
     void scheduleTasks();
 
+    DDLTaskBase & saveTask(DDLTaskPtr && task);
+
     /// Reads entry and check that the host belongs to host list of the task
     /// Returns non-empty DDLTaskPtr if entry parsed and the check is passed
     virtual DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper);
 
-    void enqueueTask(DDLTaskPtr task);
     void processTask(DDLTaskBase & task);
 
     /// Check that query should be executed on leader replica only
@@ -98,7 +97,7 @@ protected:
     /// Init task node
     static void createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper);
 
-    virtual void initialize() {}
+    virtual void initializeMainThread();
 
     void runMainThread();
     void runCleanupThread();
@@ -117,8 +116,8 @@ protected:
     ZooKeeperPtr current_zookeeper;
 
     /// Save state of executed task to avoid duplicate execution on ZK error
-    //std::vector<std::string> last_tasks;
-    std::optional<String> last_entry_name;
+    //std::optional<String> last_entry_name;
+    std::list<DDLTaskPtr> current_tasks;
 
     std::shared_ptr<Poco::Event> queue_updated_event = std::make_shared<Poco::Event>();
     std::shared_ptr<Poco::Event> cleanup_event = std::make_shared<Poco::Event>();
@@ -130,7 +129,7 @@ protected:
 
     /// Size of the pool for query execution.
     size_t pool_size = 1;
-    ThreadPool worker_pool;
+    std::optional<ThreadPool> worker_pool;
 
     /// Cleaning starts after new node event is received if the last cleaning wasn't made sooner than N seconds ago
     Int64 cleanup_delay_period = 60; // minute (in seconds)

From 9f3c77f62e281fbb6c14e23ec81bde5e7000f416 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Fri, 4 Dec 2020 23:12:32 +0300
Subject: [PATCH 0067/1238] add zk ops into task

---
 src/Common/ZooKeeper/ZooKeeper.h |   8 ++
 src/Interpreters/DDLTask.cpp     |  18 ++--
 src/Interpreters/DDLTask.h       |  18 +++-
 src/Interpreters/DDLWorker.cpp   | 172 ++++++++++++++++++++++---------
 src/Interpreters/DDLWorker.h     |   2 +-
 5 files changed, 160 insertions(+), 58 deletions(-)

diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index 1ad744102c6..e79553ed4d9 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -314,8 +314,15 @@ public:
         return std::make_shared<EphemeralNodeHolder>(path, zookeeper, false, false, "");
     }
 
+    void reset()
+    {
+        need_remove = false;
+    }
+
     ~EphemeralNodeHolder()
     {
+        if (!need_remove)
+            return;
         try
         {
             zookeeper.tryRemove(path);
@@ -331,6 +338,7 @@ private:
     std::string path;
     ZooKeeper & zookeeper;
     CurrentMetrics::Increment metric_increment{CurrentMetrics::EphemeralNode};
+    bool need_remove = true;
 };
 
 using EphemeralNodeHolderPtr = EphemeralNodeHolder::Ptr;
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 9ef7352ceb4..3d9297880c1 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -96,7 +96,7 @@ void DDLTaskBase::parseQueryFromEntry(const Context & context)
     query = parseQuery(parser_query, begin, end, description, 0, context.getSettingsRef().max_parser_depth);
 }
 
-std::unique_ptr<Context> DDLTaskBase::makeQueryContext(Context & from_context) const
+std::unique_ptr<Context> DDLTaskBase::makeQueryContext(Context & from_context)
 {
     auto query_context = std::make_unique<Context>(from_context);
     query_context->makeQueryContext();
@@ -293,7 +293,7 @@ String DatabaseReplicatedTask::getShardID() const
     return database->shard_name;
 }
 
-std::unique_ptr<Context> DatabaseReplicatedTask::makeQueryContext(Context & from_context) const
+std::unique_ptr<Context> DatabaseReplicatedTask::makeQueryContext(Context & from_context)
 {
     auto query_context = DDLTaskBase::makeQueryContext(from_context);
     query_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY; //FIXME why do we need separate query kind?
@@ -309,15 +309,18 @@ std::unique_ptr<Context> DatabaseReplicatedTask::makeQueryContext(Context & from
     {
         txn->ops.emplace_back(zkutil::makeRemoveRequest(entry_path + "/try", -1));
         txn->ops.emplace_back(zkutil::makeCreateRequest(entry_path + "/committed", host_id_str, zkutil::CreateMode::Persistent));
-        txn->ops.emplace_back(zkutil::makeRemoveRequest(getActiveNodePath(), -1));
+        //txn->ops.emplace_back(zkutil::makeRemoveRequest(getActiveNodePath(), -1));
         txn->ops.emplace_back(zkutil::makeSetRequest(database->zookeeper_path + "/max_log_ptr", toString(getLogEntryNumber(entry_name)), -1));
     }
 
-    if (execute_on_leader)
-        txn->ops.emplace_back(zkutil::makeCreateRequest(getShardNodePath() + "/executed", host_id_str, zkutil::CreateMode::Persistent));
-    txn->ops.emplace_back(zkutil::makeCreateRequest(getFinishedNodePath(), execution_status.serializeText(), zkutil::CreateMode::Persistent));
+    //if (execute_on_leader)
+    //    txn->ops.emplace_back(zkutil::makeCreateRequest(getShardNodePath() + "/executed", host_id_str, zkutil::CreateMode::Persistent));
+    //txn->ops.emplace_back(zkutil::makeCreateRequest(getFinishedNodePath(), execution_status.serializeText(), zkutil::CreateMode::Persistent));
     txn->ops.emplace_back(zkutil::makeSetRequest(database->replica_path + "/log_ptr", toString(getLogEntryNumber(entry_name)), -1));
 
+    std::move(ops.begin(), ops.end(), std::back_inserter(txn->ops));
+    ops.clear();
+
     return query_context;
 }
 
@@ -338,7 +341,10 @@ UInt32 DatabaseReplicatedTask::getLogEntryNumber(const String & log_entry_name)
 
 void MetadataTransaction::commit()
 {
+    assert(state == CREATED);
+    state = FAILED;
     current_zookeeper->multi(ops);
+    state = COMMITED;
 }
 
 }
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 94127b39b84..aa234d1bfdd 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -15,6 +15,9 @@ class ASTQueryWithOnCluster;
 using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
 class DatabaseReplicated;
 
+struct MetadataTransaction;
+using MetadataTransactionPtr = std::shared_ptr<MetadataTransaction>;
+
 struct HostID
 {
     String host_name;
@@ -72,6 +75,8 @@ struct DDLTaskBase
     bool is_circular_replicated = false;
     bool execute_on_leader = false;
 
+    //MetadataTransactionPtr txn;
+    Coordination::Requests ops;
     ExecutionStatus execution_status;
     bool was_executed = false;
 
@@ -84,7 +89,7 @@ struct DDLTaskBase
 
     virtual String getShardID() const = 0;
 
-    virtual std::unique_ptr<Context> makeQueryContext(Context & from_context) const;
+    virtual std::unique_ptr<Context> makeQueryContext(Context & from_context);
 
     inline String getActiveNodePath() const { return entry_path + "/active/" + host_id_str; }
     inline String getFinishedNodePath() const { return entry_path + "/finished/" + host_id_str; }
@@ -119,7 +124,7 @@ struct DatabaseReplicatedTask : public DDLTaskBase
     DatabaseReplicatedTask(const String & name, const String & path, DatabaseReplicated * database_);
 
     String getShardID() const override;
-    std::unique_ptr<Context> makeQueryContext(Context & from_context) const override;
+    std::unique_ptr<Context> makeQueryContext(Context & from_context) override;
 
     static String getLogEntryName(UInt32 log_entry_number);
     static UInt32 getLogEntryNumber(const String & log_entry_name);
@@ -131,6 +136,14 @@ struct DatabaseReplicatedTask : public DDLTaskBase
 
 struct MetadataTransaction
 {
+    enum State
+    {
+        CREATED,
+        COMMITED,
+        FAILED
+    };
+
+    State state = CREATED;
     ZooKeeperPtr current_zookeeper;
     String zookeeper_path;
     bool is_initial_query;
@@ -142,6 +155,7 @@ struct MetadataTransaction
     }
 
     void commit();
+
 };
 
 }
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index e4ea5f8db17..a3262c238fc 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -38,6 +38,11 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
     extern const int TIMEOUT_EXCEEDED;
     extern const int UNFINISHED;
+    extern const int NOT_A_LEADER;
+    extern const int KEEPER_EXCEPTION;
+    extern const int CANNOT_ASSIGN_ALTER;
+    extern const int CANNOT_ALLOCATE_MEMORY;
+    extern const int MEMORY_LIMIT_EXCEEDED;
 }
 
 
@@ -295,6 +300,19 @@ void DDLWorker::scheduleTasks()
     LOG_DEBUG(log, "Scheduling tasks");
     auto zookeeper = tryGetZooKeeper();
 
+    for (auto & task : current_tasks)
+    {
+        /// Main thread of DDLWorker was restarted, probably due to lost connection with ZooKeeper.
+        /// We have some unfinished tasks. To avoid duplication of some queries, try to write execution status.
+        bool status_written = task->ops.empty();
+        bool task_still_exists = zookeeper->exists(task->entry_path);
+        if (task->was_executed && !status_written && task_still_exists)
+        {
+            assert(!zookeeper->exists(task->getFinishedNodePath()));
+            processTask(*task);
+        }
+    }
+
     Strings queue_nodes = zookeeper->getChildren(queue_dir, nullptr, queue_updated_event);
     filterAndSortQueueNodes(queue_nodes);
     if (queue_nodes.empty())
@@ -304,10 +322,16 @@ void DDLWorker::scheduleTasks()
     }
 
     bool server_startup = current_tasks.empty();
+    auto begin_node = queue_nodes.begin();
 
-    auto begin_node = server_startup
-        ? queue_nodes.begin()
-        : std::upper_bound(queue_nodes.begin(), queue_nodes.end(), current_tasks.back()->entry_name);
+    if (!server_startup)
+    {
+        /// We will recheck status of last executed tasks. It's useful if main thread was just restarted.
+        auto & min_task = *std::min_element(current_tasks.begin(), current_tasks.end());
+        begin_node = std::upper_bound(queue_nodes.begin(), queue_nodes.end(), min_task->entry_name);
+        current_tasks.clear();
+        //FIXME better way of maintaning current tasks list and min_task name;
+    }
 
     for (auto it = begin_node; it != queue_nodes.end() && !stop_flag; ++it)
     {
@@ -319,8 +343,8 @@ void DDLWorker::scheduleTasks()
         if (!task)
         {
             LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason);
-            task->was_executed = true;
-            saveTask(std::move(task)); //FIXME questionable
+            //task->was_executed = true;
+            //saveTask(std::move(task));
             continue;
         }
 
@@ -343,16 +367,17 @@ void DDLWorker::scheduleTasks()
 
 DDLTaskBase & DDLWorker::saveTask(DDLTaskPtr && task)
 {
-    if (current_tasks.size() == pool_size)
-    {
-        assert(current_tasks.front()->was_executed);
-        current_tasks.pop_front();
-    }
+    //assert(current_tasks.size() <= pool_size + 1);
+    //if (current_tasks.size() == pool_size)
+    //{
+    //    assert(current_tasks.front()->ops.empty()); //FIXME
+    //    current_tasks.pop_front();
+    //}
     current_tasks.emplace_back(std::move(task));
     return *current_tasks.back();
 }
 
-bool DDLWorker::tryExecuteQuery(const String & query, const DDLTaskBase & task, ExecutionStatus & status)
+bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task)
 {
     /// Add special comment at the start of query to easily identify DDL-produced queries in query_log
     String query_prefix = "/* ddl_entry=" + task.entry_name + " */ ";
@@ -367,15 +392,34 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTaskBase & task,
         auto query_context = task.makeQueryContext(context);
         executeQuery(istr, ostr, false, *query_context, {});
     }
-    catch (...)
+    catch (const DB::Exception & e)
     {
-        status = ExecutionStatus::fromCurrentException();
+        task.execution_status = ExecutionStatus::fromCurrentException();
         tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
 
+        /// We use return value of tryExecuteQuery(...) in tryExecuteQueryOnLeaderReplica(...) to determine
+        /// if replica has stopped being leader and we should retry query.
+        /// However, for the majority of exceptions there is no sense to retry, because most likely we will just
+        /// get the same exception again. So we return false only for several special exception codes,
+        /// and consider query as executed with status "failed" and return true in other cases.
+        bool no_sense_to_retry = e.code() != ErrorCodes::KEEPER_EXCEPTION &&
+                                 e.code() != ErrorCodes::NOT_A_LEADER &&
+                                 e.code() != ErrorCodes::CANNOT_ASSIGN_ALTER &&
+                                 e.code() != ErrorCodes::CANNOT_ALLOCATE_MEMORY &&
+                                 e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED;
+        return no_sense_to_retry;
+    }
+    catch (...)
+    {
+        task.execution_status = ExecutionStatus::fromCurrentException();
+        tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
+
+        /// We don't know what exactly happened, but maybe it's Poco::NetException or std::bad_alloc,
+        /// so we consider unknown exception as retryable error.
         return false;
     }
 
-    status = ExecutionStatus(0);
+    task.execution_status = ExecutionStatus(0);
     LOG_DEBUG(log, "Executed query: {}", query);
 
     return true;
@@ -405,19 +449,18 @@ void DDLWorker::processTask(DDLTaskBase & task)
     String finished_node_path = task.getFinishedNodePath();
 
     String dummy;
-    auto code = zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy);
+    zookeeper->createAncestors(active_node_path);
+    auto active_node = zkutil::EphemeralNodeHolder::create(active_node_path, *zookeeper, "");
 
-    if (code == Coordination::Error::ZNONODE)
+    if (!task.was_executed)
     {
-        /// There is no parent
-        createStatusDirs(task.entry_path, zookeeper);
-        zookeeper->create(active_node_path, "", zkutil::CreateMode::Ephemeral);
-    }
-    else
-        throw Coordination::Exception(code, active_node_path);
+        /// If table and database engine supports it, they will execute task.ops by their own in a single transaction
+        /// with other zk operations (such as appending something to ReplicatedMergeTree log, or
+        /// updating metadata in Replicated database), so we make create request for finished_node_path with status "0",
+        /// which means that query executed successfully.
+        task.ops.emplace_back(zkutil::makeRemoveRequest(active_node_path, -1));
+        task.ops.emplace_back(zkutil::makeCreateRequest(finished_node_path, "0", zkutil::CreateMode::Persistent));
 
-    if (!task.was_executed)     // FIXME always true
-    {
         try
         {
             String rewritten_query = queryToString(task.query);
@@ -439,7 +482,7 @@ void DDLWorker::processTask(DDLTaskBase & task)
             if (task.execute_on_leader)
                 tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper);
             else
-                tryExecuteQuery(rewritten_query, task, task.execution_status);
+                tryExecuteQuery(rewritten_query, task);
         }
         catch (const Coordination::Exception &)
         {
@@ -451,25 +494,35 @@ void DDLWorker::processTask(DDLTaskBase & task)
             task.execution_status = ExecutionStatus::fromCurrentException("An error occurred before execution");
         }
 
+        if (task.execution_status.code != 0)
+        {
+            bool status_written_by_table_or_db = task.ops.empty();
+            if (status_written_by_table_or_db)
+            {
+                throw Exception(ErrorCodes::UNFINISHED, "Unexpected error: {}", task.execution_status.serializeText());
+            }
+            else
+            {
+                /// task.ops where not executed by table or database engine, se DDLWorker is responsible for
+                /// writing query execution status into ZooKeeper.
+                task.ops.emplace_back(zkutil::makeSetRequest(finished_node_path, task.execution_status.serializeText(), -1));
+            }
+        }
+
         /// We need to distinguish ZK errors occurred before and after query executing
         task.was_executed = true;
     }
 
     /// FIXME: if server fails right here, the task will be executed twice. We need WAL here.
-    /// Another possible issue: if ZooKeeper session is lost here, we will recover connection and execute the task second time.
+    /// If ZooKeeper connection is lost here, we will try again to write query status.
 
-
-
-    /// Delete active flag and create finish flag
-    Coordination::Requests ops;
-    ops.emplace_back(zkutil::makeRemoveRequest(active_node_path, -1));
-    ops.emplace_back(zkutil::makeCreateRequest(finished_node_path, task.execution_status.serializeText(), zkutil::CreateMode::Persistent));
-
-    //FIXME replace with multi(...) or use MetadataTransaction
-    Coordination::Responses responses;
-    auto res = zookeeper->tryMulti(ops, responses);
-    if (res != Coordination::Error::ZNODEEXISTS && res != Coordination::Error::ZNONODE)
-        zkutil::KeeperMultiException::check(res, ops, responses);
+    bool status_written = task.ops.empty();
+    if (!status_written)
+    {
+        zookeeper->multi(task.ops);
+        active_node->reset();
+        task.ops.clear();
+    }
 }
 
 
@@ -496,13 +549,17 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
 
     /// If we will develop new replicated storage
     if (!replicated_storage)
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Storage type '{}' is not supported by distributed DDL", storage->getName());
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Storage type '{}' is not supported by distributed DDL", storage->getName());
 
     String shard_path = task.getShardNodePath();
     String is_executed_path = shard_path + "/executed";
     String tries_to_execute_path = shard_path + "/tries_to_execute";
     zookeeper->createAncestors(shard_path + "/");
 
+    /// Leader replica creates is_executed_path node on successful query execution.
+    /// We will remove create_shard_flag from zk operations list, if current replica is just waiting for leader to execute the query.
+    auto create_shard_flag = zkutil::makeCreateRequest(is_executed_path, task.host_id_str, zkutil::CreateMode::Persistent);
+
     /// Node exists, or we will create or we will get an exception
     zookeeper->tryCreate(tries_to_execute_path, "0", zkutil::CreateMode::Persistent);
 
@@ -526,7 +583,9 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
 
     Stopwatch stopwatch;
 
-    bool executed_by_leader = false;
+    bool executed_by_us = false;
+    bool executed_by_other_leader = false;
+
     /// Defensive programming. One hour is more than enough to execute almost all DDL queries.
     /// If it will be very long query like ALTER DELETE for a huge table it's still will be executed,
     /// but DDL worker can continue processing other queries.
@@ -544,7 +603,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
             if (zookeeper->tryGet(is_executed_path, executed_by))
             {
                 LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, executed_by);
-                executed_by_leader = true;
+                executed_by_other_leader = true;
                 break;
             }
 
@@ -555,13 +614,14 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
 
             zookeeper->set(tries_to_execute_path, toString(counter + 1));
 
+            task.ops.push_back(create_shard_flag);
+            SCOPE_EXIT({ if (!executed_by_us && !task.ops.empty()) task.ops.pop_back(); });
+
             /// If the leader will unexpectedly changed this method will return false
             /// and on the next iteration new leader will take lock
-            if (tryExecuteQuery(rewritten_query, task, task.execution_status))
+            if (tryExecuteQuery(rewritten_query, task))
             {
-                //FIXME replace with create(...) or remove and use MetadataTransaction
-                zookeeper->createIfNotExists(is_executed_path, task.host_id_str);
-                executed_by_leader = true;
+                executed_by_us = true;
                 break;
             }
 
@@ -572,7 +632,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
         if (event->tryWait(std::uniform_int_distribution<int>(0, 1000)(rng)))
         {
             LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path));
-            executed_by_leader = true;
+            executed_by_other_leader = true;
             break;
         }
         else
@@ -593,8 +653,10 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
         }
     }
 
+    assert(!(executed_by_us && executed_by_other_leader));
+
     /// Not executed by leader so was not executed at all
-    if (!executed_by_leader)
+    if (!executed_by_us && !executed_by_other_leader)
     {
         /// If we failed with timeout
         if (stopwatch.elapsedSeconds() >= MAX_EXECUTION_TIMEOUT_SEC)
@@ -610,7 +672,11 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
         return false;
     }
 
-    LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path));
+    if (executed_by_us)
+        LOG_DEBUG(log, "Task {} executed by current replica", task.entry_name);
+    else // if (executed_by_other_leader)
+        LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path));
+
     return true;
 }
 
@@ -816,9 +882,17 @@ void DDLWorker::runMainThread()
             else
             {
                 LOG_ERROR(log, "Unexpected ZooKeeper error: {}.", getCurrentExceptionMessage(true));
-                assert(false);
+                //assert(false);
             }
         }
+        catch (const Exception & e)
+        {
+            if (e.code() == ErrorCodes::LOGICAL_ERROR)
+                throw;  /// Something terrible happened. Will terminate DDLWorker.
+
+            tryLogCurrentException(log, "Unexpected error, will try to restart main thread:");
+            initialized = false;
+        }
         catch (...)
         {
             tryLogCurrentException(log, "Unexpected error, will try to restart main thread:");
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 78921fa60e3..4145e0754e8 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -89,7 +89,7 @@ protected:
         const String & node_path,
         const ZooKeeperPtr & zookeeper);
 
-    bool tryExecuteQuery(const String & query, const DDLTaskBase & task, ExecutionStatus & status);
+    bool tryExecuteQuery(const String & query, DDLTaskBase & task);
 
     /// Checks and cleanups queue's nodes
     void cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper);

From 18fe1c796b6e2995d4de51e28f769bc0ae0ebf58 Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Tue, 22 Dec 2020 21:47:47 +0300
Subject: [PATCH 0068/1238] Ability to backup-restore metadata files for DiskS3
 (WIP)

---
 src/Disks/DiskCacheWrapper.cpp |  13 --
 src/Disks/DiskCacheWrapper.h   |   1 -
 src/Disks/DiskLocal.cpp        |   5 -
 src/Disks/DiskLocal.h          |   2 -
 src/Disks/DiskMemory.cpp       |   5 -
 src/Disks/DiskMemory.h         |   2 -
 src/Disks/IDisk.h              |   3 -
 src/Disks/S3/DiskS3.cpp        | 342 +++++++++++++++++++++++++++++----
 src/Disks/S3/DiskS3.h          |  31 ++-
 9 files changed, 331 insertions(+), 73 deletions(-)

diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp
index 7ce963380d4..89bab7cfa98 100644
--- a/src/Disks/DiskCacheWrapper.cpp
+++ b/src/Disks/DiskCacheWrapper.cpp
@@ -239,19 +239,6 @@ void DiskCacheWrapper::replaceFile(const String & from_path, const String & to_p
     DiskDecorator::replaceFile(from_path, to_path);
 }
 
-void DiskCacheWrapper::copyFile(const String & from_path, const String & to_path)
-{
-    if (cache_disk->exists(from_path))
-    {
-        auto dir_path = getDirectoryPath(to_path);
-        if (!cache_disk->exists(dir_path))
-            cache_disk->createDirectories(dir_path);
-
-        cache_disk->copyFile(from_path, to_path);
-    }
-    DiskDecorator::copyFile(from_path, to_path);
-}
-
 void DiskCacheWrapper::remove(const String & path)
 {
     if (cache_disk->exists(path))
diff --git a/src/Disks/DiskCacheWrapper.h b/src/Disks/DiskCacheWrapper.h
index b0b373d900c..711ad5280ec 100644
--- a/src/Disks/DiskCacheWrapper.h
+++ b/src/Disks/DiskCacheWrapper.h
@@ -32,7 +32,6 @@ public:
     void moveDirectory(const String & from_path, const String & to_path) override;
     void moveFile(const String & from_path, const String & to_path) override;
     void replaceFile(const String & from_path, const String & to_path) override;
-    void copyFile(const String & from_path, const String & to_path) override;
     std::unique_ptr<ReadBufferFromFileBase>
     readFile(const String & path, size_t buf_size, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold) const override;
     std::unique_ptr<WriteBufferFromFileBase>
diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp
index cde9b3c5a41..364b5bf4e2f 100644
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@@ -220,11 +220,6 @@ void DiskLocal::replaceFile(const String & from_path, const String & to_path)
         from_file.renameTo(to_file.path());
 }
 
-void DiskLocal::copyFile(const String & from_path, const String & to_path)
-{
-    Poco::File(disk_path + from_path).copyTo(disk_path + to_path);
-}
-
 std::unique_ptr<ReadBufferFromFileBase>
 DiskLocal::readFile(const String & path, size_t buf_size, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold) const
 {
diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h
index 762a8502faa..eac95c543ef 100644
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@@ -67,8 +67,6 @@ public:
 
     void replaceFile(const String & from_path, const String & to_path) override;
 
-    void copyFile(const String & from_path, const String & to_path) override;
-
     void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
 
     void listFiles(const String & path, std::vector<String> & file_names) override;
diff --git a/src/Disks/DiskMemory.cpp b/src/Disks/DiskMemory.cpp
index d185263d48c..ef68ad19191 100644
--- a/src/Disks/DiskMemory.cpp
+++ b/src/Disks/DiskMemory.cpp
@@ -314,11 +314,6 @@ void DiskMemory::replaceFileImpl(const String & from_path, const String & to_pat
     files.insert(std::move(node));
 }
 
-void DiskMemory::copyFile(const String & /*from_path*/, const String & /*to_path*/)
-{
-    throw Exception("Method copyFile is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED);
-}
-
 std::unique_ptr<ReadBufferFromFileBase> DiskMemory::readFile(const String & path, size_t /*buf_size*/, size_t, size_t, size_t) const
 {
     std::lock_guard lock(mutex);
diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h
index 4d4b947098b..5c81051eaa4 100644
--- a/src/Disks/DiskMemory.h
+++ b/src/Disks/DiskMemory.h
@@ -60,8 +60,6 @@ public:
 
     void replaceFile(const String & from_path, const String & to_path) override;
 
-    void copyFile(const String & from_path, const String & to_path) override;
-
     void listFiles(const String & path, std::vector<String> & file_names) override;
 
     std::unique_ptr<ReadBufferFromFileBase> readFile(
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index ac0f5a2ae8f..d20c1327509 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -127,9 +127,6 @@ public:
     /// If a file with `to_path` path already exists, it will be replaced.
     virtual void replaceFile(const String & from_path, const String & to_path) = 0;
 
-    /// Copy the file from `from_path` to `to_path`.
-    virtual void copyFile(const String & from_path, const String & to_path) = 0;
-
     /// Recursively copy data containing at `from_path` to `to_path` located at `to_disk`.
     virtual void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path);
 
diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index 4786c05f8b0..d4b2f43b70a 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -23,6 +23,8 @@
 #include <aws/s3/model/CopyObjectRequest.h>
 #include <aws/s3/model/DeleteObjectsRequest.h>
 #include <aws/s3/model/GetObjectRequest.h>
+#include <aws/s3/model/ListObjectsV2Request.h>
+#include <aws/s3/model/HeadObjectRequest.h>
 
 #include <boost/algorithm/string.hpp>
 
@@ -32,6 +34,7 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int S3_ERROR;
     extern const int FILE_ALREADY_EXISTS;
     extern const int CANNOT_SEEK_THROUGH_FILE;
     extern const int UNKNOWN_FORMAT;
@@ -76,12 +79,12 @@ String getRandomName()
 }
 
 template <typename Result, typename Error>
-void throwIfError(Aws::Utils::Outcome<Result, Error> && response)
+void throwIfError(Aws::Utils::Outcome<Result, Error> & response)
 {
     if (!response.IsSuccess())
     {
         const auto & err = response.GetError();
-        throw Exception(err.GetMessage(), static_cast<int>(err.GetErrorType()));
+        throw Exception(std::to_string(static_cast<int>(err.GetErrorType())) + ": " + err.GetMessage(), ErrorCodes::S3_ERROR);
     }
 }
 
@@ -613,45 +616,31 @@ void DiskS3::moveFile(const String & from_path, const String & to_path)
 {
     if (exists(to_path))
         throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS);
+
+    if (send_metadata)
+    {
+        auto revision = ++revision_counter;
+        const DiskS3::ObjectMetadata object_metadata {
+            {"from_path", from_path},
+            {"to_path", to_path}
+        };
+        createFileOperationObject("rename", revision, object_metadata);
+    }
+
     Poco::File(metadata_path + from_path).renameTo(metadata_path + to_path);
 }
 
 void DiskS3::replaceFile(const String & from_path, const String & to_path)
 {
-    Poco::File from_file(metadata_path + from_path);
-    Poco::File to_file(metadata_path + to_path);
-    if (to_file.exists())
+    if (exists(to_path))
     {
-        Poco::File tmp_file(metadata_path + to_path + ".old");
-        to_file.renameTo(tmp_file.path());
-        from_file.renameTo(metadata_path + to_path);
-        remove(to_path + ".old");
+        const String tmp_path = to_path + ".old";
+        moveFile(to_path, tmp_path);
+        moveFile(from_path, to_path);
+        remove(tmp_path);
     }
     else
-        from_file.renameTo(to_file.path());
-}
-
-void DiskS3::copyFile(const String & from_path, const String & to_path)
-{
-    if (exists(to_path))
-        remove(to_path);
-
-    auto from = readMeta(from_path);
-    auto to = createMeta(to_path);
-
-    for (const auto & [path, size] : from.s3_objects)
-    {
-        auto new_path = getRandomName();
-        Aws::S3::Model::CopyObjectRequest req;
-        req.SetCopySource(bucket + "/" + s3_root_path + path);
-        req.SetBucket(bucket);
-        req.SetKey(s3_root_path + new_path);
-        throwIfError(client->CopyObject(req));
-
-        to.addObject(new_path, size);
-    }
-
-    to.save();
+        moveFile(from_path, to_path);
 }
 
 std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, size_t buf_size, size_t, size_t, size_t) const
@@ -673,7 +662,17 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
 
     /// Path to store new S3 object.
     auto s3_path = getRandomName();
-    auto object_metadata = createObjectMetadata(path);
+
+    std::optional<ObjectMetadata> object_metadata;
+    if (send_metadata)
+    {
+        auto revision = ++revision_counter;
+        object_metadata = {
+            {"path", path}
+        };
+        s3_path = "r" + revisionToString(revision) + "-file-" + s3_path;
+    }
+
     if (!exist || mode == WriteMode::Rewrite)
     {
         /// If metadata file exists - remove and create new.
@@ -727,6 +726,15 @@ void DiskS3::removeMeta(const String & path, AwsS3KeyKeeper & keys)
         }
         else /// In other case decrement number of references, save metadata and delete file.
         {
+            if (send_metadata)
+            {
+                auto revision = ++revision_counter;
+                const ObjectMetadata object_metadata {
+                    {"path", path}
+                };
+                createFileOperationObject("remove", revision, object_metadata);
+            }
+
             --metadata.ref_count;
             metadata.save();
             file.remove();
@@ -780,7 +788,8 @@ void DiskS3::removeAws(const AwsS3KeyKeeper & keys)
             Aws::S3::Model::DeleteObjectsRequest request;
             request.SetBucket(bucket);
             request.SetDelete(delkeys);
-            throwIfError(client->DeleteObjects(request));
+            auto outcome = client->DeleteObjects(request);
+            throwIfError(outcome);
         }
     }
 }
@@ -840,6 +849,16 @@ Poco::Timestamp DiskS3::getLastModified(const String & path)
 
 void DiskS3::createHardLink(const String & src_path, const String & dst_path)
 {
+    if (send_metadata)
+    {
+        auto revision = ++revision_counter;
+        const ObjectMetadata object_metadata {
+            {"src_path", src_path},
+            {"dst_path", dst_path}
+        };
+        createFileOperationObject("hardlink", revision, object_metadata);
+    }
+
     /// Increment number of references.
     auto src = readMeta(src_path);
     ++src.ref_count;
@@ -889,12 +908,257 @@ void DiskS3::shutdown()
     client->DisableRequestProcessing();
 }
 
-std::optional<DiskS3::ObjectMetadata> DiskS3::createObjectMetadata(const String & path) const
+void DiskS3::createFileOperationObject(const String & operation_name, UInt64 revision, const DiskS3::ObjectMetadata & metadata)
 {
-    if (send_metadata)
-        return (DiskS3::ObjectMetadata){{"path", path}};
+    const String key = "meta/r" + revisionToString(revision) + "-" + operation_name;
+    WriteBufferFromS3 buffer(client, bucket, s3_root_path + key, min_upload_part_size, max_single_part_upload_size, metadata);
+    buffer.write('0');
+    buffer.finalize();
+}
 
-    return {};
+void DiskS3::startup()
+{
+    if (!send_metadata)
+        return;
+
+    LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting up disk {}", name);
+
+    /// Find last revision.
+    UInt64 l = 0, r = (static_cast<UInt64>(1)) << 63;
+    while (r - l > 1)
+    {
+        auto revision = (r - l) >> 1;
+        auto revision_str = revisionToString(revision);
+        /// Check that object or metaobject with such revision exists.
+        if (checkObjectExists(s3_root_path + "r" + revision_str)
+            || checkObjectExists(s3_root_path + "meta/r" + revision_str))
+            l = revision;
+        else
+            r = revision;
+    }
+    revision_counter = l;
+    LOG_INFO(&Poco::Logger::get("DiskS3"), "Found last revision number {}", revision_counter);
+}
+
+bool DiskS3::checkObjectExists(const String & prefix)
+{
+    Aws::S3::Model::ListObjectsV2Request request;
+    request.SetBucket(bucket);
+    request.SetPrefix(prefix);
+    request.SetMaxKeys(1);
+
+    auto outcome = client->ListObjectsV2(request);
+    throwIfError(outcome);
+
+    return !outcome.GetResult().GetContents().empty();
+}
+
+struct DiskS3::RestoreInformation
+{
+    UInt64 revision = (static_cast<UInt64>(1)) << 63;
+    String bucket;
+    String path;
+};
+
+void DiskS3::restore()
+{
+    if (!exists(restore_file))
+        return;
+
+    RestoreInformation information;
+    ///TODO: read restore information from restore_file.
+
+    restoreFiles(information.bucket, information.path, information.revision);
+    restoreFileOperations(information.bucket, information.path, information.revision);
+}
+
+Aws::S3::Model::HeadObjectResult DiskS3::headObject(const String & source_bucket, const String & key)
+{
+    Aws::S3::Model::HeadObjectRequest request;
+    request.SetBucket(source_bucket);
+    request.SetKey(key);
+
+    auto outcome = client->HeadObject(request);
+    throwIfError(outcome);
+
+    return outcome.GetResultWithOwnership();
+}
+
+void DiskS3::listObjects(const String & source_bucket, const String & source_path, std::function<bool(const Aws::S3::Model::ListObjectsV2Result &)> callback)
+{
+    Aws::S3::Model::ListObjectsV2Request request;
+    request.SetBucket(source_bucket);
+    request.SetPrefix(source_path);
+    request.SetMaxKeys(1000);
+
+    Aws::S3::Model::ListObjectsV2Outcome outcome;
+    do
+    {
+        outcome = client->ListObjectsV2(request);
+        throwIfError(outcome);
+
+        bool should_continue = callback(outcome.GetResult());
+
+        if (!should_continue)
+            break;
+
+        request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
+    } while (outcome.GetResult().GetIsTruncated());
+}
+
+void DiskS3::restoreFiles(const String & source_bucket, const String & source_path, UInt64 revision)
+{
+    LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting restore files for disk {}", name);
+
+    std::vector<std::future<void>> results;
+
+    listObjects(source_bucket, source_path, [this, &source_bucket, &revision, &results](auto list_result) {
+        std::vector<String> keys;
+        for (const auto & row : list_result.GetContents())
+        {
+            const String & key = row.GetKey();
+
+            /// Skip meta objects. They will be processed separately.
+            if (key.find("/meta/") != String::npos)
+                continue;
+
+            /// Filter early if it's possible to get revision from key.
+            if (extractRevisionFromKey(key) > revision)
+                continue;
+
+            keys.push_back(key);
+        }
+
+        if (!keys.empty())
+        {
+            auto result = getExecutor().execute([this, &source_bucket, keys]() { processRestoreFiles(source_bucket, keys);
+            });
+
+            results.push_back(std::move(result));
+        }
+
+        return true;
+    });
+
+    for (auto & result : results)
+        result.wait();
+    for (auto & result : results)
+        result.get();
+
+    LOG_INFO(&Poco::Logger::get("DiskS3"), "Files are restored for disk {}", name);
+}
+
+inline String getDirectoryPath(const String & path)
+{
+    return Poco::Path{path}.setFileName("").toString();
+}
+
+void DiskS3::processRestoreFiles(const String & source_bucket, Strings keys)
+{
+    for (const auto & key : keys)
+    {
+        Aws::S3::Model::HeadObjectRequest request;
+        request.SetBucket(source_bucket);
+        request.SetKey(key);
+
+        auto outcome = client->HeadObject(request);
+        throwIfError(outcome);
+
+        auto object_metadata = outcome.GetResult().GetMetadata();
+
+        /// If object has 'path' in metadata then restore it.
+        auto path = object_metadata.find("path");
+        if (path == object_metadata.end())
+            continue;
+
+        createDirectories(getDirectoryPath(path->second));
+        auto metadata = createMeta(path->second);
+
+        /// TODO: shrink common prefix of s3_root_path and key.
+        auto relative_key = key;
+        metadata.addObject(relative_key, outcome.GetResult().GetContentLength());
+
+        /// TODO: Copy object to configured bucket if source_bucket is different.
+
+        metadata.save();
+    }
+}
+
+void DiskS3::restoreFileOperations(const String & source_bucket, const String & source_path, UInt64 revision)
+{
+    LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting restore file operations for disk {}", name);
+
+    /// Temporarily disable sending metadata.
+    send_metadata = false;
+
+    listObjects(source_bucket, source_path + "meta/", [this, &source_bucket, &revision](auto list_result) {
+        const String rename = "rename";
+        const String remove = "remove";
+        const String hardlink = "hardlink";
+
+        for (const auto & row : list_result.GetContents())
+        {
+            const String & key = row.GetKey();
+
+            /// Stop processing when get revision more than required.
+            /// S3 ensures that keys will be listed in ascending UTF-8 bytes order.
+            if (extractRevisionFromKey(key) > revision)
+                return false;
+
+            auto operation = extractOperationFromKey(key);
+            auto object_metadata = headObject(source_bucket, key).GetMetadata();
+            if (operation == rename)
+            {
+                auto from_path = object_metadata["from_path"];
+                auto to_path = object_metadata["to_path"];
+                if (exists(from_path))
+                    moveFile(from_path, to_path);
+            }
+            else if (operation == remove)
+            {
+                removeIfExists(object_metadata["path"]);
+            }
+            else if (operation == hardlink)
+            {
+                auto src_path = object_metadata["src_path"];
+                auto dst_path = object_metadata["dst_path"];
+                /// Skip hardlinks to shadow (backup) directory.
+                if (exists(src_path) && dst_path.find("/shadow/") != String::npos)
+                    createHardLink(src_path, dst_path);
+            }
+        }
+
+        return true;
+    });
+
+    send_metadata = true;
+
+    LOG_INFO(&Poco::Logger::get("DiskS3"), "File operations restored for disk {}", name);
+}
+
+UInt64 DiskS3::extractRevisionFromKey(const String & key)
+{
+    /// TODO: Implement.
+    return 0;
+}
+
+String DiskS3::extractOperationFromKey(const String & key)
+{
+    /// TODO: Implement.
+    return "";
+}
+
+String DiskS3::revisionToString(UInt64 revision)
+{
+    static constexpr size_t max_digits = 19;
+
+    /// Align revision number with leading zeroes to have strict lexicographical order of them.
+    auto revision_str = std::to_string(revision);
+    auto digits_to_align = max_digits - revision_str.length();
+    for (size_t i = 0; i < digits_to_align; ++i)
+        revision_str = "0" + revision_str;
+
+    return revision_str;
 }
 
 }
diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h
index f62c603adda..dfaa3136642 100644
--- a/src/Disks/S3/DiskS3.h
+++ b/src/Disks/S3/DiskS3.h
@@ -1,10 +1,14 @@
 #pragma once
 
+#include <atomic>
 #include "Disks/DiskFactory.h"
 #include "Disks/Executor.h"
 #include "ProxyConfiguration.h"
 
 #include <aws/s3/S3Client.h>
+#include <aws/s3/model/HeadObjectResult.h>
+#include <aws/s3/model/ListObjectsV2Result.h>
+
 #include <Poco/DirectoryIterator.h>
 
 
@@ -19,12 +23,16 @@ namespace DB
 class DiskS3 : public IDisk
 {
 public:
+    /// File contains restore information
+    const String restore_file = "restore";
+
     using ObjectMetadata = std::map<std::string, std::string>;
 
     friend class DiskS3Reservation;
 
     class AwsS3KeyKeeper;
     struct Metadata;
+    struct RestoreInformation;
 
     DiskS3(
         String name_,
@@ -74,8 +82,6 @@ public:
 
     void replaceFile(const String & from_path, const String & to_path) override;
 
-    void copyFile(const String & from_path, const String & to_path) override;
-
     void listFiles(const String & path, std::vector<String> & file_names) override;
 
     std::unique_ptr<ReadBufferFromFileBase> readFile(
@@ -114,17 +120,34 @@ public:
 
     void shutdown() override;
 
+    /// Actions performed after disk creation.
+    void startup();
+
+    /// Restore S3 metadata files on file system.
+    void restore();
+
 private:
     bool tryReserve(UInt64 bytes);
 
     void removeMeta(const String & path, AwsS3KeyKeeper & keys);
     void removeMetaRecursive(const String & path, AwsS3KeyKeeper & keys);
     void removeAws(const AwsS3KeyKeeper & keys);
-    std::optional<ObjectMetadata> createObjectMetadata(const String & path) const;
 
     Metadata readMeta(const String & path) const;
     Metadata createMeta(const String & path) const;
 
+    void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectMetadata & metadata);
+    String revisionToString(UInt64 revision);
+    bool checkObjectExists(const String & prefix);
+
+    Aws::S3::Model::HeadObjectResult headObject(const String & source_bucket, const String & key);
+    void listObjects(const String & source_bucket, const String & source_path, std::function<bool(const Aws::S3::Model::ListObjectsV2Result &)> callback);
+    void restoreFiles(const String & source_bucket, const String & source_path, UInt64 revision);
+    void processRestoreFiles(const String & source_bucket, std::vector<String> keys);
+    void restoreFileOperations(const String & source_bucket, const String & source_path, UInt64 revision);
+    UInt64 extractRevisionFromKey(const String & key);
+    String extractOperationFromKey(const String & key);
+
 private:
     const String name;
     std::shared_ptr<Aws::S3::S3Client> client;
@@ -140,6 +163,8 @@ private:
     UInt64 reserved_bytes = 0;
     UInt64 reservation_count = 0;
     std::mutex reservation_mutex;
+
+    std::atomic<UInt64> revision_counter;
 };
 
 }

From cc3b5958b047fc7c7f41557a9148deb63330e38f Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Wed, 23 Dec 2020 15:35:52 +0300
Subject: [PATCH 0069/1238] Ability to backup-restore metadata files for DiskS3
 (WIP)

---
 src/Disks/DiskCacheWrapper.cpp  |  15 +--
 src/Disks/DiskCacheWrapper.h    |   1 -
 src/Disks/DiskDecorator.cpp     |   5 -
 src/Disks/DiskDecorator.h       |   1 -
 src/Disks/IDisk.h               |   7 ++
 src/Disks/S3/DiskS3.cpp         | 207 +++++++++++++++++++++-----------
 src/Disks/S3/DiskS3.h           |  23 ++--
 src/Disks/S3/registerDiskS3.cpp |   3 +
 8 files changed, 171 insertions(+), 91 deletions(-)

diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp
index 89bab7cfa98..d44f5a8e0d4 100644
--- a/src/Disks/DiskCacheWrapper.cpp
+++ b/src/Disks/DiskCacheWrapper.cpp
@@ -139,7 +139,7 @@ DiskCacheWrapper::readFile(const String & path, size_t buf_size, size_t estimate
         {
             try
             {
-                auto dir_path = getDirectoryPath(path);
+                auto dir_path = directoryPath(path);
                 if (!cache_disk->exists(dir_path))
                     cache_disk->createDirectories(dir_path);
 
@@ -182,7 +182,7 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode
 
     LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Write file {} to cache", backQuote(path));
 
-    auto dir_path = getDirectoryPath(path);
+    auto dir_path = directoryPath(path);
     if (!cache_disk->exists(dir_path))
         cache_disk->createDirectories(dir_path);
 
@@ -217,7 +217,7 @@ void DiskCacheWrapper::moveFile(const String & from_path, const String & to_path
 {
     if (cache_disk->exists(from_path))
     {
-        auto dir_path = getDirectoryPath(to_path);
+        auto dir_path = directoryPath(to_path);
         if (!cache_disk->exists(dir_path))
             cache_disk->createDirectories(dir_path);
 
@@ -230,7 +230,7 @@ void DiskCacheWrapper::replaceFile(const String & from_path, const String & to_p
 {
     if (cache_disk->exists(from_path))
     {
-        auto dir_path = getDirectoryPath(to_path);
+        auto dir_path = directoryPath(to_path);
         if (!cache_disk->exists(dir_path))
             cache_disk->createDirectories(dir_path);
 
@@ -257,7 +257,7 @@ void DiskCacheWrapper::createHardLink(const String & src_path, const String & ds
 {
     if (cache_disk->exists(src_path))
     {
-        auto dir_path = getDirectoryPath(dst_path);
+        auto dir_path = directoryPath(dst_path);
         if (!cache_disk->exists(dir_path))
             cache_disk->createDirectories(dir_path);
 
@@ -278,11 +278,6 @@ void DiskCacheWrapper::createDirectories(const String & path)
     DiskDecorator::createDirectories(path);
 }
 
-inline String DiskCacheWrapper::getDirectoryPath(const String & path)
-{
-    return Poco::Path{path}.setFileName("").toString();
-}
-
 /// TODO: Current reservation mechanism leaks IDisk abstraction details.
 /// This hack is needed to return proper disk pointer (wrapper instead of implementation) from reservation object.
 class ReservationDelegate : public IReservation
diff --git a/src/Disks/DiskCacheWrapper.h b/src/Disks/DiskCacheWrapper.h
index 711ad5280ec..0722c2dab84 100644
--- a/src/Disks/DiskCacheWrapper.h
+++ b/src/Disks/DiskCacheWrapper.h
@@ -43,7 +43,6 @@ public:
 
 private:
     std::shared_ptr<FileDownloadMetadata> acquireDownloadMetadata(const String & path) const;
-    static String getDirectoryPath(const String & path);
 
     /// Disk to cache files.
     std::shared_ptr<DiskLocal> cache_disk;
diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp
index aaa54005f6f..8441803a2af 100644
--- a/src/Disks/DiskDecorator.cpp
+++ b/src/Disks/DiskDecorator.cpp
@@ -103,11 +103,6 @@ void DiskDecorator::replaceFile(const String & from_path, const String & to_path
     delegate->replaceFile(from_path, to_path);
 }
 
-void DiskDecorator::copyFile(const String & from_path, const String & to_path)
-{
-    delegate->copyFile(from_path, to_path);
-}
-
 void DiskDecorator::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
 {
     delegate->copy(from_path, to_disk, to_path);
diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h
index 1ce3c3ea773..eed3c77abf6 100644
--- a/src/Disks/DiskDecorator.h
+++ b/src/Disks/DiskDecorator.h
@@ -32,7 +32,6 @@ public:
     void createFile(const String & path) override;
     void moveFile(const String & from_path, const String & to_path) override;
     void replaceFile(const String & from_path, const String & to_path) override;
-    void copyFile(const String & from_path, const String & to_path) override;
     void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
     void listFiles(const String & path, std::vector<String> & file_names) override;
     std::unique_ptr<ReadBufferFromFileBase>
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index d20c1327509..7d3e498a40b 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -262,4 +262,11 @@ inline String fileName(const String & path)
 {
     return Poco::Path(path).getFileName();
 }
+
+/// Return directory path for the specified path.
+inline String directoryPath(const String & path)
+{
+    return Poco::Path(path).setFileName("").toString();
+}
+
 }
diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index d4b2f43b70a..318fda72368 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -41,6 +41,7 @@ namespace ErrorCodes
     extern const int INCORRECT_DISK_INDEX;
     extern const int NOT_IMPLEMENTED;
     extern const int PATH_ACCESS_DENIED;
+    extern const int LOGICAL_ERROR;
 }
 
 
@@ -849,7 +850,8 @@ Poco::Timestamp DiskS3::getLastModified(const String & path)
 
 void DiskS3::createHardLink(const String & src_path, const String & dst_path)
 {
-    if (send_metadata)
+    /// We don't need to record hardlinks created to shadow folder.
+    if (send_metadata && dst_path.find("/shadow/") != String::npos)
     {
         auto revision = ++revision_counter;
         const ObjectMetadata object_metadata {
@@ -910,7 +912,7 @@ void DiskS3::shutdown()
 
 void DiskS3::createFileOperationObject(const String & operation_name, UInt64 revision, const DiskS3::ObjectMetadata & metadata)
 {
-    const String key = "meta/r" + revisionToString(revision) + "-" + operation_name;
+    const String key = "operations/r" + revisionToString(revision) + "-" + operation_name;
     WriteBufferFromS3 buffer(client, bucket, s3_root_path + key, min_upload_part_size, max_single_part_upload_size, metadata);
     buffer.write('0');
     buffer.finalize();
@@ -929,9 +931,9 @@ void DiskS3::startup()
     {
         auto revision = (r - l) >> 1;
         auto revision_str = revisionToString(revision);
-        /// Check that object or metaobject with such revision exists.
+        /// Check that file or operation with such revision exists.
         if (checkObjectExists(s3_root_path + "r" + revision_str)
-            || checkObjectExists(s3_root_path + "meta/r" + revision_str))
+            || checkObjectExists(s3_root_path + "operations/r" + revision_str))
             l = revision;
         else
             r = revision;
@@ -953,25 +955,6 @@ bool DiskS3::checkObjectExists(const String & prefix)
     return !outcome.GetResult().GetContents().empty();
 }
 
-struct DiskS3::RestoreInformation
-{
-    UInt64 revision = (static_cast<UInt64>(1)) << 63;
-    String bucket;
-    String path;
-};
-
-void DiskS3::restore()
-{
-    if (!exists(restore_file))
-        return;
-
-    RestoreInformation information;
-    ///TODO: read restore information from restore_file.
-
-    restoreFiles(information.bucket, information.path, information.revision);
-    restoreFileOperations(information.bucket, information.path, information.revision);
-}
-
 Aws::S3::Model::HeadObjectResult DiskS3::headObject(const String & source_bucket, const String & key)
 {
     Aws::S3::Model::HeadObjectRequest request;
@@ -1006,24 +989,102 @@ void DiskS3::listObjects(const String & source_bucket, const String & source_pat
     } while (outcome.GetResult().GetIsTruncated());
 }
 
-void DiskS3::restoreFiles(const String & source_bucket, const String & source_path, UInt64 revision)
+void DiskS3::copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key)
+{
+    Aws::S3::Model::CopyObjectRequest request;
+    request.SetCopySource(src_bucket + "/" + src_key);
+    request.SetBucket(dst_bucket);
+    request.SetKey(dst_key);
+
+    auto outcome = client->CopyObject(request);
+    throwIfError(outcome);
+}
+
+struct DiskS3::RestoreInformation
+{
+    UInt64 revision = (static_cast<UInt64>(1)) << 63;
+    String bucket;
+    String path;
+};
+
+void DiskS3::readRestoreInformation(DiskS3::RestoreInformation & restore_information)
+{
+    ReadBufferFromFile buffer(metadata_path + restore_file, 512);
+    buffer.next();
+
+    /// Empty file - just restore all metadata.
+    if (!buffer.hasPendingData())
+        return;
+
+    try
+    {
+        readIntText(restore_information.revision, buffer);
+        assertChar('\n', buffer);
+
+        if (!buffer.hasPendingData())
+            return;
+
+        readText(restore_information.bucket, buffer);
+        assertChar('\n', buffer);
+
+        if (!buffer.hasPendingData())
+            return;
+
+        readText(restore_information.path, buffer);
+        assertChar('\n', buffer);
+
+        if (buffer.hasPendingData())
+            throw Exception("Extra information at the end of restore file", ErrorCodes::UNKNOWN_FORMAT);
+    }
+    catch (const Exception & e)
+    {
+        throw Exception("Failed to read restore information", e, ErrorCodes::UNKNOWN_FORMAT);
+    }
+}
+
+void DiskS3::restore()
+{
+    if (!exists(restore_file))
+        return;
+
+    try
+    {
+        RestoreInformation information;
+        information.bucket = bucket;
+        information.path = s3_root_path;
+
+        readRestoreInformation(information);
+
+        ///TODO: Cleanup FS and bucket if previous restore was failed.
+
+        restoreFiles(information.bucket, information.path, information.revision);
+        restoreFileOperations(information.bucket, information.path, information.revision);
+    }
+    catch (const Exception & e)
+    {
+        throw Exception("Failed to restore disk: " + name, e, ErrorCodes::LOGICAL_ERROR);
+    }
+}
+
+void DiskS3::restoreFiles(const String & source_bucket, const String & source_path, UInt64 target_revision)
 {
     LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting restore files for disk {}", name);
 
     std::vector<std::future<void>> results;
-
-    listObjects(source_bucket, source_path, [this, &source_bucket, &revision, &results](auto list_result) {
+    listObjects(source_bucket, source_path, [this, &source_bucket, &source_path, &target_revision, &results](auto list_result)
+    {
         std::vector<String> keys;
         for (const auto & row : list_result.GetContents())
         {
             const String & key = row.GetKey();
 
-            /// Skip meta objects. They will be processed separately.
-            if (key.find("/meta/") != String::npos)
+            /// Skip file operations objects. They will be processed separately.
+            if (key.find("/operations/") != String::npos)
                 continue;
 
+            auto [revision, _] = extractRevisionAndOperationFromKey(key);
             /// Filter early if it's possible to get revision from key.
-            if (extractRevisionFromKey(key) > revision)
+            if (revision > target_revision)
                 continue;
 
             keys.push_back(key);
@@ -1031,7 +1092,9 @@ void DiskS3::restoreFiles(const String & source_bucket, const String & source_pa
 
         if (!keys.empty())
         {
-            auto result = getExecutor().execute([this, &source_bucket, keys]() { processRestoreFiles(source_bucket, keys);
+            auto result = getExecutor().execute([this, &source_bucket, &source_path, keys]()
+            {
+                processRestoreFiles(source_bucket, source_path, keys);
             });
 
             results.push_back(std::move(result));
@@ -1048,50 +1111,45 @@ void DiskS3::restoreFiles(const String & source_bucket, const String & source_pa
     LOG_INFO(&Poco::Logger::get("DiskS3"), "Files are restored for disk {}", name);
 }
 
-inline String getDirectoryPath(const String & path)
-{
-    return Poco::Path{path}.setFileName("").toString();
-}
-
-void DiskS3::processRestoreFiles(const String & source_bucket, Strings keys)
+void DiskS3::processRestoreFiles(const String & source_bucket, const String & source_path, Strings keys)
 {
     for (const auto & key : keys)
     {
-        Aws::S3::Model::HeadObjectRequest request;
-        request.SetBucket(source_bucket);
-        request.SetKey(key);
-
-        auto outcome = client->HeadObject(request);
-        throwIfError(outcome);
-
-        auto object_metadata = outcome.GetResult().GetMetadata();
+        auto head_result = headObject(source_bucket, key);
+        auto object_metadata = head_result.GetMetadata();
 
         /// If object has 'path' in metadata then restore it.
-        auto path = object_metadata.find("path");
-        if (path == object_metadata.end())
+        auto path_entry = object_metadata.find("path");
+        if (path_entry == object_metadata.end())
+        {
+            LOG_WARNING(&Poco::Logger::get("DiskS3"), "Skip key {} because it doesn't have path key in metadata", key);
             continue;
+        }
 
-        createDirectories(getDirectoryPath(path->second));
-        auto metadata = createMeta(path->second);
+        const auto & path = path_entry->second;
 
-        /// TODO: shrink common prefix of s3_root_path and key.
-        auto relative_key = key;
-        metadata.addObject(relative_key, outcome.GetResult().GetContentLength());
+        createDirectories(directoryPath(path));
+        auto metadata = createMeta(path);
 
-        /// TODO: Copy object to configured bucket if source_bucket is different.
+        auto relative_key = shrinkKey(source_path, key);
+        metadata.addObject(relative_key, head_result.GetContentLength());
+
+        /// Copy object to bucket configured for current DiskS3 instance.
+        if (bucket != source_bucket)
+            copyObject(source_bucket, key, bucket, s3_root_path + relative_key);
 
         metadata.save();
     }
 }
 
-void DiskS3::restoreFileOperations(const String & source_bucket, const String & source_path, UInt64 revision)
+void DiskS3::restoreFileOperations(const String & source_bucket, const String & source_path, UInt64 target_revision)
 {
     LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting restore file operations for disk {}", name);
 
-    /// Temporarily disable sending metadata.
-    send_metadata = false;
+    /// Disable sending metadata if we restore metadata to the same bucket.
+    send_metadata = bucket != source_bucket;
 
-    listObjects(source_bucket, source_path + "meta/", [this, &source_bucket, &revision](auto list_result) {
+    listObjects(source_bucket, source_path + "operations/", [this, &source_bucket, &target_revision](auto list_result) {
         const String rename = "rename";
         const String remove = "remove";
         const String hardlink = "hardlink";
@@ -1100,12 +1158,22 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String &
         {
             const String & key = row.GetKey();
 
+            auto [revision, operation] = extractRevisionAndOperationFromKey(key);
+            if (revision == 0)
+            {
+                LOG_WARNING(&Poco::Logger::get("DiskS3"), "Skip key {} with unknown revision", revision);
+                continue;
+            }
+
             /// Stop processing when get revision more than required.
             /// S3 ensures that keys will be listed in ascending UTF-8 bytes order.
-            if (extractRevisionFromKey(key) > revision)
+            if (revision > target_revision)
                 return false;
 
-            auto operation = extractOperationFromKey(key);
+            /// Keep original revision if restore to different bucket.
+            if (send_metadata)
+                revision_counter = revision - 1;
+
             auto object_metadata = headObject(source_bucket, key).GetMetadata();
             if (operation == rename)
             {
@@ -1122,8 +1190,7 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String &
             {
                 auto src_path = object_metadata["src_path"];
                 auto dst_path = object_metadata["dst_path"];
-                /// Skip hardlinks to shadow (backup) directory.
-                if (exists(src_path) && dst_path.find("/shadow/") != String::npos)
+                if (exists(src_path))
                     createHardLink(src_path, dst_path);
             }
         }
@@ -1136,21 +1203,27 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String &
     LOG_INFO(&Poco::Logger::get("DiskS3"), "File operations restored for disk {}", name);
 }
 
-UInt64 DiskS3::extractRevisionFromKey(const String & key)
+std::tuple<UInt64, String> DiskS3::extractRevisionAndOperationFromKey(const String & key)
 {
-    /// TODO: Implement.
-    return 0;
+    UInt64 revision = 0;
+    String operation;
+
+    re2::RE2::FullMatch(key, key_regexp, &revision, &operation);
+
+    return {revision, operation};
 }
 
-String DiskS3::extractOperationFromKey(const String & key)
+String DiskS3::shrinkKey(const String & path, const String & key)
 {
-    /// TODO: Implement.
-    return "";
+    if (!key.starts_with(path))
+        throw Exception("The key " + key + " prefix mismatch with given " + path, ErrorCodes::LOGICAL_ERROR);
+
+    return key.substr(path.length());
 }
 
 String DiskS3::revisionToString(UInt64 revision)
 {
-    static constexpr size_t max_digits = 19;
+    static constexpr size_t max_digits = 19; /// UInt64 max digits in decimal representation.
 
     /// Align revision number with leading zeroes to have strict lexicographical order of them.
     auto revision_str = std::to_string(revision);
diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h
index dfaa3136642..532ddcbd858 100644
--- a/src/Disks/S3/DiskS3.h
+++ b/src/Disks/S3/DiskS3.h
@@ -10,6 +10,7 @@
 #include <aws/s3/model/ListObjectsV2Result.h>
 
 #include <Poco/DirectoryIterator.h>
+#include <re2/re2.h>
 
 
 namespace DB
@@ -137,16 +138,22 @@ private:
     Metadata createMeta(const String & path) const;
 
     void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectMetadata & metadata);
-    String revisionToString(UInt64 revision);
-    bool checkObjectExists(const String & prefix);
+    static String revisionToString(UInt64 revision);
 
+    bool checkObjectExists(const String & prefix);
     Aws::S3::Model::HeadObjectResult headObject(const String & source_bucket, const String & key);
     void listObjects(const String & source_bucket, const String & source_path, std::function<bool(const Aws::S3::Model::ListObjectsV2Result &)> callback);
-    void restoreFiles(const String & source_bucket, const String & source_path, UInt64 revision);
-    void processRestoreFiles(const String & source_bucket, std::vector<String> keys);
-    void restoreFileOperations(const String & source_bucket, const String & source_path, UInt64 revision);
-    UInt64 extractRevisionFromKey(const String & key);
-    String extractOperationFromKey(const String & key);
+    void copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key);
+
+    void readRestoreInformation(RestoreInformation & restore_information);
+    void restoreFiles(const String & source_bucket, const String & source_path, UInt64 target_revision);
+    void processRestoreFiles(const String & source_bucket, const String & source_path, std::vector<String> keys);
+    void restoreFileOperations(const String & source_bucket, const String & source_path, UInt64 target_revision);
+
+    /// Remove 'path' prefix from 'key' to get relative key.
+    /// It's needed to store keys to metadata files in RELATIVE_PATHS version.
+    static String shrinkKey(const String & path, const String & key);
+    std::tuple<UInt64, String> extractRevisionAndOperationFromKey(const String & key);
 
 private:
     const String name;
@@ -165,6 +172,8 @@ private:
     std::mutex reservation_mutex;
 
     std::atomic<UInt64> revision_counter;
+    /// Key has format: ../../r{revision}-{operation}
+    const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+).*"};
 };
 
 }
diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp
index fd658d95327..14aecb89517 100644
--- a/src/Disks/S3/registerDiskS3.cpp
+++ b/src/Disks/S3/registerDiskS3.cpp
@@ -160,6 +160,9 @@ void registerDiskS3(DiskFactory & factory)
             checkRemoveAccess(*s3disk);
         }
 
+        s3disk->restore();
+        s3disk->startup();
+
         bool cache_enabled = config.getBool(config_prefix + ".cache_enabled", true);
 
         if (cache_enabled)

From 2848b32af1768ad0b681550a7b967c72d4e6a0fb Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Wed, 23 Dec 2020 18:11:37 +0300
Subject: [PATCH 0070/1238] Ability to backup-restore metadata files for DiskS3
 (WIP)

---
 src/Disks/S3/DiskS3.cpp         | 71 ++++++++++++++++++++-------------
 src/Disks/S3/DiskS3.h           | 14 +++++--
 src/Disks/S3/registerDiskS3.cpp |  4 +-
 3 files changed, 56 insertions(+), 33 deletions(-)

diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index 318fda72368..97a7dc4939f 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -504,17 +504,17 @@ private:
     CurrentMetrics::Increment metric_increment;
 };
 
-/// Runs tasks asynchronously using global thread pool.
+/// Runs tasks asynchronously using thread pool.
 class AsyncExecutor : public Executor
 {
 public:
-    explicit AsyncExecutor() = default;
+    explicit AsyncExecutor(int thread_pool_size) : pool(ThreadPool(thread_pool_size)) { }
 
     std::future<void> execute(std::function<void()> task) override
     {
         auto promise = std::make_shared<std::promise<void>>();
 
-        GlobalThreadPool::instance().scheduleOrThrowOnError(
+        pool.scheduleOrThrowOnError(
             [promise, task]()
             {
                 try
@@ -535,6 +535,9 @@ public:
 
         return promise->get_future();
     }
+
+private:
+    ThreadPool pool;
 };
 
 
@@ -548,8 +551,10 @@ DiskS3::DiskS3(
     size_t min_upload_part_size_,
     size_t max_single_part_upload_size_,
     size_t min_bytes_for_seek_,
-    bool send_metadata_)
-    : IDisk(std::make_unique<AsyncExecutor>())
+    bool send_metadata_,
+    int thread_pool_size_,
+    int list_object_keys_size_)
+    : IDisk(std::make_unique<AsyncExecutor>(thread_pool_size_))
     , name(std::move(name_))
     , client(std::move(client_))
     , proxy_configuration(std::move(proxy_configuration_))
@@ -560,6 +565,7 @@ DiskS3::DiskS3(
     , max_single_part_upload_size(max_single_part_upload_size_)
     , min_bytes_for_seek(min_bytes_for_seek_)
     , send_metadata(send_metadata_)
+    , list_object_keys_size(list_object_keys_size_)
 {
 }
 
@@ -727,15 +733,6 @@ void DiskS3::removeMeta(const String & path, AwsS3KeyKeeper & keys)
         }
         else /// In other case decrement number of references, save metadata and delete file.
         {
-            if (send_metadata)
-            {
-                auto revision = ++revision_counter;
-                const ObjectMetadata object_metadata {
-                    {"path", path}
-                };
-                createFileOperationObject("remove", revision, object_metadata);
-            }
-
             --metadata.ref_count;
             metadata.save();
             file.remove();
@@ -926,7 +923,7 @@ void DiskS3::startup()
     LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting up disk {}", name);
 
     /// Find last revision.
-    UInt64 l = 0, r = (static_cast<UInt64>(1)) << 63;
+    UInt64 l = 0, r = LATEST_REVISION;
     while (r - l > 1)
     {
         auto revision = (r - l) >> 1;
@@ -1002,7 +999,7 @@ void DiskS3::copyObject(const String & src_bucket, const String & src_key, const
 
 struct DiskS3::RestoreInformation
 {
-    UInt64 revision = (static_cast<UInt64>(1)) << 63;
+    UInt64 revision = LATEST_REVISION;
     String bucket;
     String path;
 };
@@ -1054,6 +1051,20 @@ void DiskS3::restore()
         information.path = s3_root_path;
 
         readRestoreInformation(information);
+        if (information.revision == 0)
+            information.revision = LATEST_REVISION;
+
+        if (information.bucket == bucket)
+        {
+            /// In this case we need to additionally cleanup S3 from objects with later revision.
+            /// Will be simply just restore to different path.
+            if (information.path == s3_root_path && information.revision != LATEST_REVISION)
+                throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS);
+
+            /// This case complicates S3 cleanup in case of unsuccessful restore.
+            if (information.path != s3_root_path && (information.path.starts_with(s3_root_path) || s3_root_path.starts_with(information.path)))
+                throw Exception("Restoring to the same bucket is allowed only if restore paths are same or not prefixes of each other", ErrorCodes::BAD_ARGUMENTS);
+        }
 
         ///TODO: Cleanup FS and bucket if previous restore was failed.
 
@@ -1122,7 +1133,7 @@ void DiskS3::processRestoreFiles(const String & source_bucket, const String & so
         auto path_entry = object_metadata.find("path");
         if (path_entry == object_metadata.end())
         {
-            LOG_WARNING(&Poco::Logger::get("DiskS3"), "Skip key {} because it doesn't have path key in metadata", key);
+            LOG_WARNING(&Poco::Logger::get("DiskS3"), "Skip key {} because it doesn't have 'path' key in metadata", key);
             continue;
         }
 
@@ -1134,11 +1145,13 @@ void DiskS3::processRestoreFiles(const String & source_bucket, const String & so
         auto relative_key = shrinkKey(source_path, key);
         metadata.addObject(relative_key, head_result.GetContentLength());
 
-        /// Copy object to bucket configured for current DiskS3 instance.
-        if (bucket != source_bucket)
+        /// Copy object if we restore to different bucket / path.
+        if (bucket != source_bucket || s3_root_path != source_path)
             copyObject(source_bucket, key, bucket, s3_root_path + relative_key);
 
         metadata.save();
+
+        LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Restored {} file", path);
     }
 }
 
@@ -1146,12 +1159,12 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String &
 {
     LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting restore file operations for disk {}", name);
 
-    /// Disable sending metadata if we restore metadata to the same bucket.
-    send_metadata = bucket != source_bucket;
+    /// Enable record file operations if we restore to different bucket / path.
+    send_metadata = bucket != source_bucket || s3_root_path != source_path;
 
-    listObjects(source_bucket, source_path + "operations/", [this, &source_bucket, &target_revision](auto list_result) {
+    listObjects(source_bucket, source_path + "operations/", [this, &source_bucket, &target_revision](auto list_result)
+    {
         const String rename = "rename";
-        const String remove = "remove";
         const String hardlink = "hardlink";
 
         for (const auto & row : list_result.GetContents())
@@ -1170,7 +1183,7 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String &
             if (revision > target_revision)
                 return false;
 
-            /// Keep original revision if restore to different bucket.
+            /// Keep original revision if restore to different bucket / path.
             if (send_metadata)
                 revision_counter = revision - 1;
 
@@ -1180,18 +1193,20 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String &
                 auto from_path = object_metadata["from_path"];
                 auto to_path = object_metadata["to_path"];
                 if (exists(from_path))
+                {
                     moveFile(from_path, to_path);
-            }
-            else if (operation == remove)
-            {
-                removeIfExists(object_metadata["path"]);
+                    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Restored rename {} -> {}", from_path, to_path);
+                }
             }
             else if (operation == hardlink)
             {
                 auto src_path = object_metadata["src_path"];
                 auto dst_path = object_metadata["dst_path"];
                 if (exists(src_path))
+                {
                     createHardLink(src_path, dst_path);
+                    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Restored hardlink {} -> {}", src_path, dst_path);
+                }
             }
         }
 
diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h
index 532ddcbd858..0140104c10f 100644
--- a/src/Disks/S3/DiskS3.h
+++ b/src/Disks/S3/DiskS3.h
@@ -24,9 +24,6 @@ namespace DB
 class DiskS3 : public IDisk
 {
 public:
-    /// File contains restore information
-    const String restore_file = "restore";
-
     using ObjectMetadata = std::map<std::string, std::string>;
 
     friend class DiskS3Reservation;
@@ -45,7 +42,9 @@ public:
         size_t min_upload_part_size_,
         size_t max_single_part_upload_size_,
         size_t min_bytes_for_seek_,
-        bool send_metadata_);
+        bool send_metadata_,
+        int thread_pool_size_,
+        int list_object_keys_size_);
 
     const String & getName() const override { return name; }
 
@@ -172,6 +171,13 @@ private:
     std::mutex reservation_mutex;
 
     std::atomic<UInt64> revision_counter;
+    static constexpr UInt64 LATEST_REVISION = (static_cast<UInt64>(1)) << 63;
+
+    /// File contains restore information
+    const String restore_file = "restore";
+    /// The number of keys listed in one request (1000 is max value).
+    int list_object_keys_size;
+
     /// Key has format: ../../r{revision}-{operation}
     const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+).*"};
 };
diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp
index 14aecb89517..88344b975bd 100644
--- a/src/Disks/S3/registerDiskS3.cpp
+++ b/src/Disks/S3/registerDiskS3.cpp
@@ -150,7 +150,9 @@ void registerDiskS3(DiskFactory & factory)
             context.getSettingsRef().s3_min_upload_part_size,
             context.getSettingsRef().s3_max_single_part_upload_size,
             config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
-            config.getBool(config_prefix + ".send_object_metadata", false));
+            config.getBool(config_prefix + ".send_object_metadata", false),
+            config.getInt(config_prefix + ".thread_pool_size", 16),
+            config.getInt(config_prefix + ".list_object_keys_size", 1000));
 
         /// This code is used only to check access to the corresponding disk.
         if (!config.getBool(config_prefix + ".skip_access_check", false))

From ded199ce2768246467a001abff74ae2b3b547d95 Mon Sep 17 00:00:00 2001
From: Daria Mozhaeva <dmozhaeva@yandex-team.ru>
Date: Wed, 23 Dec 2020 18:32:35 +0300
Subject: [PATCH 0071/1238] Edit and translate to Russia

---
 .../integrations/embedded-rocksdb.md          |  2 +-
 docs/en/operations/settings/settings.md       |  2 +-
 .../integrations/embedded-rocksdb.md          | 45 +++++++++++++++++++
 docs/ru/operations/settings/settings.md       | 25 +++++++++++
 4 files changed, 72 insertions(+), 2 deletions(-)
 create mode 100644 docs/ru/engines/table-engines/integrations/embedded-rocksdb.md

diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
index 857e148277c..79e0e040377 100644
--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@@ -40,6 +40,6 @@ PRIMARY KEY key
 
 ## Description {#description}
 
-- `primary key` must be specified, it only supports one column in primary key. The primary key will serialized in binary as rocksdb key.
+- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a rocksdb key.
 - columns other than the primary key will be serialized in binary as rocksdb value in corresponding order.
 - queries with key `equals` or `in` filtering will be optimized to multi keys lookup from rocksdb.
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index fc921f2ef7e..1ff2ea77fd0 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -445,7 +445,7 @@ Possible values:
 
 -   `'simple'` - Simple output format.
 
-    Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `'2019-08-20 10:18:56'`. Calculation is performed according to the data type's time zone (if present) or server time zone.
+    Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `'2019-08-20 10:18:56'`. The calculation is performed according to the data type's time zone (if present) or server time zone.
 
 -   `'iso'` - ISO output format.
 
diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
new file mode 100644
index 00000000000..e160eb2bdf5
--- /dev/null
+++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
@@ -0,0 +1,45 @@
+---
+toc_priority: 6
+toc_title: EmbeddedRocksDB
+---
+
+# EmbeddedRocksDB Engine {#EmbeddedRocksDB-engine}
+
+Этот движок позволяет интегрировать ClickHouse с [rocksdb](http://rocksdb.org/).
+
+`EmbeddedRocksDB` дает возможность:
+
+## Создавать таблицу {#table_engine-EmbeddedRocksDB-creating-a-table}
+
+``` sql
+CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
+(
+    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
+    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
+    ...
+) ENGINE = EmbeddedRocksDB PRIMARY KEY(primary_key_name)
+```
+
+Обязательные параметры:
+
+-  `primary_key_name` – любое имя столбца в списке столбцов.
+
+Пример:
+
+``` sql
+CREATE TABLE test
+(
+    `key` String,
+    `v1` UInt32,
+    `v2` String,
+    `v3` Float32,
+)
+ENGINE = EmbeddedRocksDB
+PRIMARY KEY key
+```
+
+## Описание {#description}
+
+- должен быть указан `primary key`, он поддерживает только один столбец в первичном ключе. Первичный ключ будет сериализован в двоичном формате как ключ rocksdb.
+- столбцы, отличные от первичного ключа, будут сериализованы в двоичном формате как значение rockdb в соответствующем порядке.
+- запросы с фильтрацией по ключу `equals` или `in` будут оптимизированы для поиска по нескольким ключам из rocksdb.
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 0a8094231c2..82051a9f999 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -421,6 +421,31 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102	2;
 -   [Тип данных DateTime.](../../sql-reference/data-types/datetime.md)
 -   [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md)
 
+## date_time_output_format {#settings-date_time_output_format}
+
+Позволяет выбрать разные выходные форматы текстового представления даты и времени.
+
+Возможные значения:
+
+-   `'simple'` - простой выходной формат.
+
+    Выходные дата и время Clickhouse в формате `YYYY-MM-DD hh:mm:ss`. Например, `'2019-08-20 10:18:56'`. Расчет выполняется в соответствии с часовым поясом типа данных (если он есть) или часовым поясом сервера.
+
+-   `'iso'` - выходной формат ISO.
+
+    Выходные дата и время Clickhouse в формате [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ`. Например, `'2019-08-20T10:18:56Z'`. Обратите внимание, что выходные данные отображаются в формате UTC (`Z` означает UTC).
+
+-   `'unix_timestamp'` - выходной формат Unix.
+
+    Выходные дата и время в формате [Unix](https://en.wikipedia.org/wiki/Unix_time). Например `'1566285536'`.
+
+Значение по умолчанию: `'simple'`.
+
+См. также:
+
+-   [Тип данных DateTime.](../../sql-reference/data-types/datetime.md)
+-   [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md)
+
 ## join_default_strictness {#settings-join_default_strictness}
 
 Устанавливает строгость по умолчанию для [JOIN](../../sql-reference/statements/select/join.md#select-join).

From 5c9fe8ff7e6c826bfbcb7fbb42a757ab33728afe Mon Sep 17 00:00:00 2001
From: Daria Mozhaeva <dmozhaeva@yandex-team.ru>
Date: Wed, 23 Dec 2020 18:35:32 +0300
Subject: [PATCH 0072/1238] Edit and translate to Russian.

---
 docs/ru/sql-reference/data-types/datetime.md   | 3 ++-
 docs/ru/sql-reference/data-types/datetime64.md | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md
index 87c5da68f35..74cec551c3f 100644
--- a/docs/ru/sql-reference/data-types/datetime.md
+++ b/docs/ru/sql-reference/data-types/datetime.md
@@ -27,7 +27,7 @@ DateTime([timezone])
 
 Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`.
 
-ClickHouse отображает значения типа `DateTime` в формате `YYYY-MM-DD hh:mm:ss`. Отображение можно поменять с помощью функции [formatDateTime](../../sql-reference/data-types/datetime.md#formatdatetime).
+ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime).
 
 При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format).
 
@@ -120,6 +120,7 @@ FROM dt
 -   [Функции для работы с датой и временем](../../sql-reference/data-types/datetime.md)
 -   [Функции для работы с массивами](../../sql-reference/data-types/datetime.md)
 -   [Настройка `date_time_input_format`](../../operations/settings/settings.md#settings-date_time_input_format)
+-   [Настройка `date_time_output_format`](../../operations/settings/settings.md#settings-date_time_output_format)
 -   [Конфигурационный параметр сервера `timezone`](../../sql-reference/data-types/datetime.md#server_configuration_parameters-timezone)
 -   [Операторы для работы с датой и временем](../../sql-reference/data-types/datetime.md#operators-datetime)
 -   [Тип данных `Date`](date.md)
diff --git a/docs/ru/sql-reference/data-types/datetime64.md b/docs/ru/sql-reference/data-types/datetime64.md
index 0a602e44636..275783f0097 100644
--- a/docs/ru/sql-reference/data-types/datetime64.md
+++ b/docs/ru/sql-reference/data-types/datetime64.md
@@ -96,6 +96,7 @@ FROM dt
 -   [Функции для работы с датой и временем](../../sql-reference/data-types/datetime64.md)
 -   [Функции для работы с массивами](../../sql-reference/data-types/datetime64.md)
 -   [Настройка `date_time_input_format`](../../operations/settings/settings.md#settings-date_time_input_format)
+-   [Настройка `date_time_output_format`](../../operations/settings/settings.md#settings-date_time_output_format)
 -   [Конфигурационный параметр сервера `timezone`](../../sql-reference/data-types/datetime64.md#server_configuration_parameters-timezone)
 -   [Операторы для работы с датой и временем](../../sql-reference/data-types/datetime64.md#operators-datetime)
 -   [Тип данных `Date`](date.md)

From 49631a39ae843426a87bd94baa2398b125838e3a Mon Sep 17 00:00:00 2001
From: damozhaeva <68770561+damozhaeva@users.noreply.github.com>
Date: Fri, 25 Dec 2020 15:05:23 +0300
Subject: [PATCH 0073/1238] Update
 docs/ru/engines/table-engines/integrations/embedded-rocksdb.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/ru/engines/table-engines/integrations/embedded-rocksdb.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
index e160eb2bdf5..2074021121a 100644
--- a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
@@ -9,7 +9,7 @@ toc_title: EmbeddedRocksDB
 
 `EmbeddedRocksDB` дает возможность:
 
-## Создавать таблицу {#table_engine-EmbeddedRocksDB-creating-a-table}
+## Создание таблицы {#table_engine-EmbeddedRocksDB-creating-a-table}
 
 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]

From e00b0117410d7e024889e82f825757bf769b8a18 Mon Sep 17 00:00:00 2001
From: damozhaeva <68770561+damozhaeva@users.noreply.github.com>
Date: Fri, 25 Dec 2020 15:05:52 +0300
Subject: [PATCH 0074/1238] Update docs/ru/operations/settings/settings.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/ru/operations/settings/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 82051a9f999..2f940758e09 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -443,7 +443,7 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102	2;
 
 См. также:
 
--   [Тип данных DateTime.](../../sql-reference/data-types/datetime.md)
+-   [Тип данных DateTime](../../sql-reference/data-types/datetime.md)
 -   [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md)
 
 ## join_default_strictness {#settings-join_default_strictness}

From 5bc3d563d56bc837c28d177af7eb5066e4a24970 Mon Sep 17 00:00:00 2001
From: damozhaeva <68770561+damozhaeva@users.noreply.github.com>
Date: Fri, 25 Dec 2020 15:06:54 +0300
Subject: [PATCH 0075/1238] Update docs/ru/operations/settings/settings.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/ru/operations/settings/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 2f940758e09..b48ca668aa4 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -444,7 +444,7 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102	2;
 См. также:
 
 -   [Тип данных DateTime](../../sql-reference/data-types/datetime.md)
--   [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md)
+-   [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md)
 
 ## join_default_strictness {#settings-join_default_strictness}
 

From bc3e8f77f67ec0bd76533abefd0f4707185e82d6 Mon Sep 17 00:00:00 2001
From: damozhaeva <68770561+damozhaeva@users.noreply.github.com>
Date: Fri, 25 Dec 2020 15:07:20 +0300
Subject: [PATCH 0076/1238] Update
 docs/ru/engines/table-engines/integrations/embedded-rocksdb.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/ru/engines/table-engines/integrations/embedded-rocksdb.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
index 2074021121a..e57b83070dc 100644
--- a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
@@ -22,7 +22,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 
 Обязательные параметры:
 
--  `primary_key_name` – любое имя столбца в списке столбцов.
+-  `primary_key_name` – любое имя столбца из списка столбцов.
 
 Пример:
 

From 8088b17ae25a76ae10ea74ac01aaba172500b38a Mon Sep 17 00:00:00 2001
From: damozhaeva <68770561+damozhaeva@users.noreply.github.com>
Date: Fri, 25 Dec 2020 15:08:22 +0300
Subject: [PATCH 0077/1238] Update
 docs/ru/engines/table-engines/integrations/embedded-rocksdb.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/ru/engines/table-engines/integrations/embedded-rocksdb.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
index e57b83070dc..3fd1b1e8d89 100644
--- a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
@@ -17,7 +17,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
     name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
     name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
     ...
-) ENGINE = EmbeddedRocksDB PRIMARY KEY(primary_key_name)
+) ENGINE = EmbeddedRocksDB 
+PRIMARY KEY(primary_key_name);
 ```
 
 Обязательные параметры:

From 47e8783f5be5a133ab133a18b90ced056aa00b4c Mon Sep 17 00:00:00 2001
From: damozhaeva <68770561+damozhaeva@users.noreply.github.com>
Date: Fri, 25 Dec 2020 15:08:35 +0300
Subject: [PATCH 0078/1238] Update
 docs/ru/engines/table-engines/integrations/embedded-rocksdb.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/ru/engines/table-engines/integrations/embedded-rocksdb.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
index 3fd1b1e8d89..575fc279b74 100644
--- a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
@@ -36,7 +36,7 @@ CREATE TABLE test
     `v3` Float32,
 )
 ENGINE = EmbeddedRocksDB
-PRIMARY KEY key
+PRIMARY KEY key;
 ```
 
 ## Описание {#description}

From b60c00ba7477ff4db5a9c9b7c962332c5248a4ce Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Fri, 25 Dec 2020 17:52:46 +0300
Subject: [PATCH 0079/1238] refactoring of TTL stream

---
 src/DataStreams/ITTLAlgorithm.cpp             |  61 +++
 src/DataStreams/ITTLAlgorithm.h               |  43 ++
 src/DataStreams/TTLAggregationAlgorithm.cpp   | 173 +++++++
 src/DataStreams/TTLAggregationAlgorithm.h     |  40 ++
 src/DataStreams/TTLBlockInputStream.cpp       | 441 +++---------------
 src/DataStreams/TTLBlockInputStream.h         |  61 +--
 src/DataStreams/TTLColumnAlgorithm.cpp        |  88 ++++
 src/DataStreams/TTLColumnAlgorithm.h          |  29 ++
 src/DataStreams/TTLDeleteAlgorithm.cpp        |  58 +++
 src/DataStreams/TTLDeleteAlgorithm.h          |  21 +
 src/DataStreams/TTLUpdateInfoAlgorithm.cpp    |  47 ++
 src/DataStreams/TTLUpdateInfoAlgorithm.h      |  31 ++
 src/DataStreams/ya.make                       |   5 +
 src/Storages/MergeTree/IMergeTreeDataPart.cpp |   6 +
 .../MergeTree/MergeTreeDataPartTTLInfo.cpp    |  87 ++--
 .../MergeTree/MergeTreeDataPartTTLInfo.h      |   5 +-
 .../MergeTree/MergeTreeDataWriter.cpp         |   3 +
 src/Storages/StorageInMemoryMetadata.cpp      |  12 +-
 src/Storages/StorageInMemoryMetadata.h        |   4 +
 src/Storages/System/StorageSystemParts.cpp    |   5 +
 src/Storages/TTLDescription.cpp               |   8 +-
 src/Storages/TTLDescription.h                 |   2 +
 22 files changed, 759 insertions(+), 471 deletions(-)
 create mode 100644 src/DataStreams/ITTLAlgorithm.cpp
 create mode 100644 src/DataStreams/ITTLAlgorithm.h
 create mode 100644 src/DataStreams/TTLAggregationAlgorithm.cpp
 create mode 100644 src/DataStreams/TTLAggregationAlgorithm.h
 create mode 100644 src/DataStreams/TTLColumnAlgorithm.cpp
 create mode 100644 src/DataStreams/TTLColumnAlgorithm.h
 create mode 100644 src/DataStreams/TTLDeleteAlgorithm.cpp
 create mode 100644 src/DataStreams/TTLDeleteAlgorithm.h
 create mode 100644 src/DataStreams/TTLUpdateInfoAlgorithm.cpp
 create mode 100644 src/DataStreams/TTLUpdateInfoAlgorithm.h

diff --git a/src/DataStreams/ITTLAlgorithm.cpp b/src/DataStreams/ITTLAlgorithm.cpp
new file mode 100644
index 00000000000..f0e98e9ab1c
--- /dev/null
+++ b/src/DataStreams/ITTLAlgorithm.cpp
@@ -0,0 +1,61 @@
+#include <DataStreams/ITTLAlgorithm.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnConst.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+ITTLAlgorithm::ITTLAlgorithm(
+    const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    : description(description_)
+    , old_ttl_info(old_ttl_info_)
+    , current_time(current_time_)
+    , force(force_)
+    , date_lut(DateLUT::instance())
+{
+}
+
+bool ITTLAlgorithm::isTTLExpired(time_t ttl) const
+{
+    return (ttl && (ttl <= current_time));
+}
+
+ColumnPtr ITTLAlgorithm::extractRequieredColumn(const ExpressionActionsPtr & expression, const Block & block, const String & result_column)
+{
+    if (!expression)
+        return nullptr;
+
+    if (block.has(result_column))
+        return block.getByName(result_column).column;
+
+    Block block_copy;
+    for (const auto & column_name : expression->getRequiredColumns())
+        block_copy.insert(block.getByName(column_name));
+
+    expression->execute(block_copy);
+    return block_copy.getByName(result_column).column;
+}
+
+UInt32 ITTLAlgorithm::getTimestampByIndex(const IColumn * column, size_t index) const
+{
+    if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
+        return date_lut.fromDayNum(DayNum(column_date->getData()[index]));
+    else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
+        return column_date_time->getData()[index];
+    else if (const ColumnConst * column_const = typeid_cast<const ColumnConst *>(column))
+    {
+        if (typeid_cast<const ColumnUInt16 *>(&column_const->getDataColumn()))
+            return date_lut.fromDayNum(DayNum(column_const->getValue<UInt16>()));
+        else if (typeid_cast<const ColumnUInt32 *>(&column_const->getDataColumn()))
+            return column_const->getValue<UInt32>();
+    }
+
+    throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR);
+}
+
+}
diff --git a/src/DataStreams/ITTLAlgorithm.h b/src/DataStreams/ITTLAlgorithm.h
new file mode 100644
index 00000000000..28a371e9289
--- /dev/null
+++ b/src/DataStreams/ITTLAlgorithm.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <Storages/TTLDescription.h>
+#include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
+#include <Storages/MergeTree/IMergeTreeDataPart.h>
+#include <common/DateLUT.h>
+
+namespace DB
+{
+
+class ITTLAlgorithm
+{
+public:
+    using TTLInfo = IMergeTreeDataPart::TTLInfo;
+    using MutableDataPartPtr = MergeTreeMutableDataPartPtr;
+
+    ITTLAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+    virtual ~ITTLAlgorithm() = default;
+
+    virtual void execute(Block & block) = 0;
+    virtual void finalize(const MutableDataPartPtr & data_part) const = 0;
+
+    bool isMinTTLExpired() const { return force || isTTLExpired(old_ttl_info.min); }
+    bool isMaxTTLExpired() const { return isTTLExpired(old_ttl_info.max); }
+
+protected:
+    bool isTTLExpired(time_t ttl) const;
+    UInt32 getTimestampByIndex(const IColumn * column, size_t index) const;
+    static ColumnPtr extractRequieredColumn(const ExpressionActionsPtr & expression, const Block & block, const String & result_column);
+
+    const TTLDescription description;
+    const TTLInfo old_ttl_info;
+    const time_t current_time;
+    const bool force;
+    TTLInfo new_ttl_info;
+
+private:
+    const DateLUTImpl & date_lut;
+};
+
+using TTLAlgorithmPtr = std::unique_ptr<ITTLAlgorithm>;
+
+}
diff --git a/src/DataStreams/TTLAggregationAlgorithm.cpp b/src/DataStreams/TTLAggregationAlgorithm.cpp
new file mode 100644
index 00000000000..6cc1ac00b7e
--- /dev/null
+++ b/src/DataStreams/TTLAggregationAlgorithm.cpp
@@ -0,0 +1,173 @@
+#include <DataStreams/TTLAggregationAlgorithm.h>
+
+namespace DB
+{
+
+TTLAggregationAlgorithm::TTLAggregationAlgorithm(
+    const TTLDescription & description_,
+    const TTLInfo & old_ttl_info_,
+    time_t current_time_,
+    bool force_,
+    const Block & header_,
+    const MergeTreeData & storage_)
+    : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
+    , header(header_)
+{
+    current_key_value.resize(description.group_by_keys.size());
+
+    ColumnNumbers keys;
+    for (const auto & key : description.group_by_keys)
+        keys.push_back(header.getPositionByName(key));
+
+    key_columns.resize(description.group_by_keys.size());
+    AggregateDescriptions aggregates = description.aggregate_descriptions;
+
+    for (auto & descr : aggregates)
+        if (descr.arguments.empty())
+            for (const auto & name : descr.argument_names)
+                descr.arguments.push_back(header.getPositionByName(name));
+
+    columns_for_aggregator.resize(description.aggregate_descriptions.size());
+    const Settings & settings = storage_.global_context.getSettingsRef();
+
+    Aggregator::Params params(header, keys, aggregates,
+        false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, 0, 0,
+        settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
+        storage_.global_context.getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
+
+    aggregator = std::make_unique<Aggregator>(params);
+}
+
+void TTLAggregationAlgorithm::execute(Block & block)
+{
+    if (!block)
+    {
+        if (!aggregation_result.empty())
+        {
+            MutableColumns result_columns = header.cloneEmptyColumns();
+            finalizeAggregates(result_columns);
+            block = header.cloneWithColumns(std::move(result_columns));
+        }
+
+        return;
+    }
+
+    const auto & column_names = header.getNames();
+    MutableColumns result_columns = header.cloneEmptyColumns();
+    MutableColumns aggregate_columns = header.cloneEmptyColumns();
+
+    auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column);
+    auto where_column = extractRequieredColumn(description.where_expression, block, description.where_result_column);
+
+    size_t rows_aggregated = 0;
+    size_t current_key_start = 0;
+    size_t rows_with_current_key = 0;
+
+    for (size_t i = 0; i < block.rows(); ++i)
+    {
+        UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
+        bool where_filter_passed = !where_column || where_column->getBool(i);
+        bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed;
+
+        bool same_as_current = true;
+        for (size_t j = 0; j < description.group_by_keys.size(); ++j)
+        {
+            const String & key_column = description.group_by_keys[j];
+            const IColumn * values_column = block.getByName(key_column).column.get();
+            if (!same_as_current || (*values_column)[i] != current_key_value[j])
+            {
+                values_column->get(i, current_key_value[j]);
+                same_as_current = false;
+            }
+        }
+
+        if (!same_as_current)
+        {
+            if (rows_with_current_key)
+                calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
+            finalizeAggregates(result_columns);
+
+            current_key_start = rows_aggregated;
+            rows_with_current_key = 0;
+        }
+
+        if (ttl_expired)
+        {
+            ++rows_with_current_key;
+            ++rows_aggregated;
+            for (const auto & name : column_names)
+            {
+                const IColumn * values_column = block.getByName(name).column.get();
+                auto & column = aggregate_columns[header.getPositionByName(name)];
+                column->insertFrom(*values_column, i);
+            }
+        }
+        else
+        {
+            new_ttl_info.update(cur_ttl);
+            for (const auto & name : column_names)
+            {
+                const IColumn * values_column = block.getByName(name).column.get();
+                auto & column = result_columns[header.getPositionByName(name)];
+                column->insertFrom(*values_column, i);
+            }
+        }
+    }
+
+    if (rows_with_current_key)
+        calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
+
+    block = header.cloneWithColumns(std::move(result_columns));
+}
+
+void TTLAggregationAlgorithm::calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length)
+{
+    Columns aggregate_chunk;
+    aggregate_chunk.reserve(aggregate_columns.size());
+    for (const auto & name : header.getNames())
+    {
+        const auto & column = aggregate_columns[header.getPositionByName(name)];
+        ColumnPtr chunk_column = column->cut(start_pos, length);
+        aggregate_chunk.emplace_back(std::move(chunk_column));
+    }
+
+    aggregator->executeOnBlock(aggregate_chunk, length, aggregation_result, key_columns,
+                               columns_for_aggregator, no_more_keys);
+}
+
+void TTLAggregationAlgorithm::finalizeAggregates(MutableColumns & result_columns)
+{
+    if (!aggregation_result.empty())
+    {
+        auto aggregated_res = aggregator->convertToBlocks(aggregation_result, true, 1);
+        for (auto & agg_block : aggregated_res)
+        {
+            for (const auto & it : description.set_parts)
+                it.expression->execute(agg_block);
+
+            for (const auto & name : description.group_by_keys)
+            {
+                const IColumn * values_column = agg_block.getByName(name).column.get();
+                auto & result_column = result_columns[header.getPositionByName(name)];
+                result_column->insertRangeFrom(*values_column, 0, agg_block.rows());
+            }
+
+            for (const auto & it : description.set_parts)
+            {
+                const IColumn * values_column = agg_block.getByName(it.expression_result_column_name).column.get();
+                auto & result_column = result_columns[header.getPositionByName(it.column_name)];
+                result_column->insertRangeFrom(*values_column, 0, agg_block.rows());
+            }
+        }
+    }
+
+    aggregation_result.invalidate();
+}
+
+void TTLAggregationAlgorithm::finalize(const MutableDataPartPtr & data_part) const
+{
+    data_part->ttl_infos.group_by_ttl[description.result_column] = new_ttl_info;
+    data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
+}
+
+}
diff --git a/src/DataStreams/TTLAggregationAlgorithm.h b/src/DataStreams/TTLAggregationAlgorithm.h
new file mode 100644
index 00000000000..977e755ca8b
--- /dev/null
+++ b/src/DataStreams/TTLAggregationAlgorithm.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <DataStreams/ITTLAlgorithm.h>
+#include <Interpreters/Aggregator.h>
+#include <Storages/MergeTree/MergeTreeData.h>
+
+namespace DB
+{
+
+class TTLAggregationAlgorithm final : public ITTLAlgorithm
+{
+public:
+    TTLAggregationAlgorithm(
+        const TTLDescription & description_,
+        const TTLInfo & old_ttl_info_,
+        time_t current_time_,
+        bool force_,
+        const Block & header_,
+        const MergeTreeData & storage_);
+
+    void execute(Block & block) override;
+    void finalize(const MutableDataPartPtr & data_part) const override;
+
+private:
+    // Calculate aggregates of aggregate_columns into aggregation_result
+    void calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length);
+
+    /// Finalize aggregation_result into result_columns
+    void finalizeAggregates(MutableColumns & result_columns);
+
+    const Block header;
+    std::unique_ptr<Aggregator> aggregator;
+    Row current_key_value;
+    AggregatedDataVariants aggregation_result;
+    ColumnRawPtrs key_columns;
+    Aggregator::AggregateColumns columns_for_aggregator;
+    bool no_more_keys = false;
+};
+
+}
diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp
index ab80f69d4d2..8c680f6875b 100644
--- a/src/DataStreams/TTLBlockInputStream.cpp
+++ b/src/DataStreams/TTLBlockInputStream.cpp
@@ -8,15 +8,14 @@
 #include <Storages/TTLMode.h>
 #include <Interpreters/Context.h>
 
+#include <DataStreams/TTLDeleteAlgorithm.h>
+#include <DataStreams/TTLColumnAlgorithm.h>
+#include <DataStreams/TTLAggregationAlgorithm.h>
+#include <DataStreams/TTLUpdateInfoAlgorithm.h>
+
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-
 TTLBlockInputStream::TTLBlockInputStream(
     const BlockInputStreamPtr & input_,
     const MergeTreeData & storage_,
@@ -24,83 +23,62 @@ TTLBlockInputStream::TTLBlockInputStream(
     const MergeTreeData::MutableDataPartPtr & data_part_,
     time_t current_time_,
     bool force_)
-    : storage(storage_)
-    , metadata_snapshot(metadata_snapshot_)
-    , data_part(data_part_)
-    , current_time(current_time_)
-    , force(force_)
-    , old_ttl_infos(data_part->ttl_infos)
-    , log(&Poco::Logger::get(storage.getLogName() + " (TTLBlockInputStream)"))
-    , date_lut(DateLUT::instance())
+    : data_part(data_part_)
+    , log(&Poco::Logger::get(storage_.getLogName() + " (TTLBlockInputStream)"))
 {
     children.push_back(input_);
     header = children.at(0)->getHeader();
+    auto old_ttl_infos = data_part->ttl_infos;
 
-    const auto & storage_columns = metadata_snapshot->getColumns();
-    const auto & column_defaults = storage_columns.getDefaults();
-
-    ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
-    for (const auto & [name, _] : metadata_snapshot->getColumnTTLs())
+    if (metadata_snapshot_->hasRowsTTL())
     {
-        auto it = column_defaults.find(name);
-        if (it != column_defaults.end())
+        const auto & rows_ttl = metadata_snapshot_->getRowsTTL();
+        auto algorithm = std::make_unique<TTLDeleteAlgorithm>(
+            rows_ttl, old_ttl_infos.table_ttl, current_time_, force_);
+
+        /// Skip all data if table ttl is expired for part
+        if (algorithm->isMaxTTLExpired() && !rows_ttl.where_expression)
+            all_data_dropped = true;
+
+        delete_algorithm = algorithm.get();
+        algorithms.emplace_back(std::move(algorithm));
+    }
+
+    for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs())
+        algorithms.emplace_back(std::make_unique<TTLAggregationAlgorithm>(
+            group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_, header, storage_));
+
+    if (metadata_snapshot_->hasAnyColumnTTL())
+    {
+        const auto & storage_columns = metadata_snapshot_->getColumns();
+        const auto & column_defaults = storage_columns.getDefaults();
+
+        for (const auto & [name, description] : metadata_snapshot_->getColumnTTLs())
         {
-            auto column = storage_columns.get(name);
-            auto expression = it->second.expression->clone();
-            default_expr_list->children.emplace_back(setAlias(addTypeConversionToAST(std::move(expression), column.type->getName()), it->first));
+            ExpressionActionsPtr default_expression;
+            auto it = column_defaults.find(name);
+            if (it != column_defaults.end())
+            {
+                const auto & column = storage_columns.get(name);
+                auto default_ast = it->second.expression->clone();
+                default_ast = setAlias(addTypeConversionToAST(std::move(default_ast), column.type->getName()), it->first);
+
+                auto syntax_result = TreeRewriter(storage_.global_context).analyze(default_ast, metadata_snapshot_->getColumns().getAllPhysical());
+                default_expression = ExpressionAnalyzer{default_ast, syntax_result, storage_.global_context}.getActions(true);
+            }
+
+            algorithms.emplace_back(std::make_unique<TTLColumnAlgorithm>(
+                description, old_ttl_infos.columns_ttl[name], current_time_, force_, name, default_expression));
         }
     }
 
-    for (const auto & [name, ttl_info] : old_ttl_infos.columns_ttl)
-    {
-        if (force || isTTLExpired(ttl_info.min))
-        {
-            new_ttl_infos.columns_ttl.emplace(name, IMergeTreeDataPart::TTLInfo{});
-            empty_columns.emplace(name);
-        }
-        else
-            new_ttl_infos.columns_ttl.emplace(name, ttl_info);
-    }
+    for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs())
+        algorithms.emplace_back(std::make_unique<TTLMoveAlgorithm>(
+            move_ttl, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
 
-    if (!force && !isTTLExpired(old_ttl_infos.table_ttl.min))
-        new_ttl_infos.table_ttl = old_ttl_infos.table_ttl;
-
-    if (!default_expr_list->children.empty())
-    {
-        auto syntax_result = TreeRewriter(storage.global_context).analyze(default_expr_list, metadata_snapshot->getColumns().getAllPhysical());
-        defaults_expression = ExpressionAnalyzer{default_expr_list, syntax_result, storage.global_context}.getActions(true);
-    }
-
-    auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
-    if (metadata_snapshot->hasRowsTTL() && storage_rows_ttl.mode == TTLMode::GROUP_BY)
-    {
-        current_key_value.resize(storage_rows_ttl.group_by_keys.size());
-
-        ColumnNumbers keys;
-        for (const auto & key : storage_rows_ttl.group_by_keys)
-            keys.push_back(header.getPositionByName(key));
-        agg_key_columns.resize(storage_rows_ttl.group_by_keys.size());
-
-        AggregateDescriptions aggregates = storage_rows_ttl.aggregate_descriptions;
-        for (auto & descr : aggregates)
-            if (descr.arguments.empty())
-                for (const auto & name : descr.argument_names)
-                    descr.arguments.push_back(header.getPositionByName(name));
-
-        agg_aggregate_columns.resize(storage_rows_ttl.aggregate_descriptions.size());
-        const Settings & settings = storage.global_context.getSettingsRef();
-
-        Aggregator::Params params(header, keys, aggregates,
-            false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, 0, 0,
-            settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
-            storage.global_context.getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
-        aggregator = std::make_unique<Aggregator>(params);
-    }
-}
-
-bool TTLBlockInputStream::isTTLExpired(time_t ttl) const
-{
-    return (ttl && (ttl <= current_time));
+    for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs())
+        algorithms.emplace_back(std::make_unique<TTLRecompressionAlgorithm>(
+            recompression_ttl, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
 }
 
 Block reorderColumns(Block block, const Block & header)
@@ -114,325 +92,30 @@ Block reorderColumns(Block block, const Block & header)
 
 Block TTLBlockInputStream::readImpl()
 {
-    /// Skip all data if table ttl is expired for part
-    auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
-    if (metadata_snapshot->hasRowsTTL()
-        && !storage_rows_ttl.where_expression
-        && storage_rows_ttl.mode != TTLMode::GROUP_BY
-        && isTTLExpired(old_ttl_infos.table_ttl.max))
-    {
-        rows_removed = data_part->rows_count;
+    if (all_data_dropped)
         return {};
-    }
 
-    Block block = children.at(0)->read();
+    auto block = children.at(0)->read();
+    for (const auto & algorithm : algorithms)
+        algorithm->execute(block);
+
     if (!block)
-    {
-        if (aggregator && !agg_result.empty())
-        {
-            MutableColumns result_columns = header.cloneEmptyColumns();
-            finalizeAggregates(result_columns);
-            block = header.cloneWithColumns(std::move(result_columns));
-        }
-
         return block;
-    }
-
-    if (metadata_snapshot->hasRowsTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min)))
-        executeRowsTTL(block);
-
-    removeValuesWithExpiredColumnTTL(block);
-    updateMovesTTL(block);
-    updateRecompressionTTL(block);
 
     return reorderColumns(std::move(block), header);
 }
 
 void TTLBlockInputStream::readSuffixImpl()
 {
-    for (const auto & elem : new_ttl_infos.columns_ttl)
-        new_ttl_infos.updatePartMinMaxTTL(elem.second.min, elem.second.max);
+    data_part->ttl_infos = {};
+    for (const auto & algorithm : algorithms)
+        algorithm->finalize(data_part);
 
-    new_ttl_infos.updatePartMinMaxTTL(new_ttl_infos.table_ttl.min, new_ttl_infos.table_ttl.max);
-
-    data_part->ttl_infos = std::move(new_ttl_infos);
-    data_part->expired_columns = std::move(empty_columns);
-
-    if (rows_removed)
+    if (delete_algorithm)
+    {
+        size_t rows_removed = all_data_dropped ? data_part->rows_count : delete_algorithm->getNumberOfRemovedRows();
         LOG_DEBUG(log, "Removed {} rows with expired TTL from part {}", rows_removed, data_part->name);
-}
-
-static ColumnPtr extractRequieredColumn(const ExpressionActions & expression, const Block & block, const String & result_column)
-{
-    if (block.has(result_column))
-        return block.getByName(result_column).column;
-
-    Block block_copy;
-    for (const auto & column_name : expression.getRequiredColumns())
-        block_copy.insert(block.getByName(column_name));
-
-    expression.execute(block_copy);
-    return block_copy.getByName(result_column).column;
-}
-
-void TTLBlockInputStream::executeRowsTTL(Block & block)
-{
-    auto rows_ttl = metadata_snapshot->getRowsTTL();
-    auto ttl_column = extractRequieredColumn(*rows_ttl.expression, block, rows_ttl.result_column);
-
-    auto where_result_column = rows_ttl.where_expression ?
-        extractRequieredColumn(*rows_ttl.where_expression, block, rows_ttl.where_result_column): nullptr;
-
-    if (aggregator)
-        aggregateRowsWithExpiredTTL(block, ttl_column, where_result_column);
-    else
-        removeRowsWithExpiredTTL(block, ttl_column, where_result_column);
-}
-
-void TTLBlockInputStream::removeRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column)
-{
-    MutableColumns result_columns;
-    const auto & column_names = header.getNames();
-
-    result_columns.reserve(column_names.size());
-    for (auto it = column_names.begin(); it != column_names.end(); ++it)
-    {
-        const IColumn * values_column = block.getByName(*it).column.get();
-        MutableColumnPtr result_column = values_column->cloneEmpty();
-        result_column->reserve(block.rows());
-
-        for (size_t i = 0; i < block.rows(); ++i)
-        {
-            UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
-            bool where_filter_passed = !where_column || where_column->getBool(i);
-            if (!isTTLExpired(cur_ttl) || !where_filter_passed)
-            {
-                new_ttl_infos.table_ttl.update(cur_ttl);
-                result_column->insertFrom(*values_column, i);
-            }
-            else if (it == column_names.begin())
-                ++rows_removed;
-        }
-
-        result_columns.emplace_back(std::move(result_column));
-    }
-
-    block = header.cloneWithColumns(std::move(result_columns));
-}
-
-void TTLBlockInputStream::aggregateRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column)
-{
-    const auto & column_names = header.getNames();
-    MutableColumns result_columns = header.cloneEmptyColumns();
-    MutableColumns aggregate_columns = header.cloneEmptyColumns();
-
-    size_t rows_aggregated = 0;
-    size_t current_key_start = 0;
-    size_t rows_with_current_key = 0;
-    auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
-
-    for (size_t i = 0; i < block.rows(); ++i)
-    {
-        UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
-        bool where_filter_passed = !where_column || where_column->getBool(i);
-        bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed;
-
-        bool same_as_current = true;
-        for (size_t j = 0; j < storage_rows_ttl.group_by_keys.size(); ++j)
-        {
-            const String & key_column = storage_rows_ttl.group_by_keys[j];
-            const IColumn * values_column = block.getByName(key_column).column.get();
-            if (!same_as_current || (*values_column)[i] != current_key_value[j])
-            {
-                values_column->get(i, current_key_value[j]);
-                same_as_current = false;
-            }
-        }
-
-        if (!same_as_current)
-        {
-            if (rows_with_current_key)
-                calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
-            finalizeAggregates(result_columns);
-
-            current_key_start = rows_aggregated;
-            rows_with_current_key = 0;
-        }
-
-        if (ttl_expired)
-        {
-            ++rows_with_current_key;
-            ++rows_aggregated;
-            for (const auto & name : column_names)
-            {
-                const IColumn * values_column = block.getByName(name).column.get();
-                auto & column = aggregate_columns[header.getPositionByName(name)];
-                column->insertFrom(*values_column, i);
-            }
-        }
-        else
-        {
-            new_ttl_infos.table_ttl.update(cur_ttl);
-            for (const auto & name : column_names)
-            {
-                const IColumn * values_column = block.getByName(name).column.get();
-                auto & column = result_columns[header.getPositionByName(name)];
-                column->insertFrom(*values_column, i);
-            }
-        }
-    }
-
-    if (rows_with_current_key)
-        calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key);
-
-    block = header.cloneWithColumns(std::move(result_columns));
-}
-
-void TTLBlockInputStream::calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length)
-{
-    Columns aggregate_chunk;
-    aggregate_chunk.reserve(aggregate_columns.size());
-    for (const auto & name : header.getNames())
-    {
-        const auto & column = aggregate_columns[header.getPositionByName(name)];
-        ColumnPtr chunk_column = column->cut(start_pos, length);
-        aggregate_chunk.emplace_back(std::move(chunk_column));
-    }
-    aggregator->executeOnBlock(aggregate_chunk, length, agg_result, agg_key_columns,
-                               agg_aggregate_columns, agg_no_more_keys);
-}
-
-void TTLBlockInputStream::finalizeAggregates(MutableColumns & result_columns)
-{
-    if (!agg_result.empty())
-    {
-        auto aggregated_res = aggregator->convertToBlocks(agg_result, true, 1);
-        auto storage_rows_ttl = metadata_snapshot->getRowsTTL();
-        for (auto & agg_block : aggregated_res)
-        {
-            for (const auto & it : storage_rows_ttl.set_parts)
-                it.expression->execute(agg_block);
-
-            for (const auto & name : storage_rows_ttl.group_by_keys)
-            {
-                const IColumn * values_column = agg_block.getByName(name).column.get();
-                auto & result_column = result_columns[header.getPositionByName(name)];
-                result_column->insertRangeFrom(*values_column, 0, agg_block.rows());
-            }
-
-            for (const auto & it : storage_rows_ttl.set_parts)
-            {
-                const IColumn * values_column = agg_block.getByName(it.expression_result_column_name).column.get();
-                auto & result_column = result_columns[header.getPositionByName(it.column_name)];
-                result_column->insertRangeFrom(*values_column, 0, agg_block.rows());
-            }
-        }
-    }
-
-    agg_result.invalidate();
-}
-
-void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
-{
-    Block block_with_defaults;
-    if (defaults_expression)
-    {
-        block_with_defaults = block;
-        defaults_expression->execute(block_with_defaults);
-    }
-
-    for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
-    {
-        /// If we read not all table columns. E.g. while mutation.
-        if (!block.has(name))
-            continue;
-
-        const auto & old_ttl_info = old_ttl_infos.columns_ttl[name];
-        auto & new_ttl_info = new_ttl_infos.columns_ttl[name];
-
-        /// Nothing to do
-        if (!force && !isTTLExpired(old_ttl_info.min))
-            continue;
-
-        /// Later drop full column
-        if (isTTLExpired(old_ttl_info.max))
-            continue;
-
-        auto ttl_column = extractRequieredColumn(*ttl_entry.expression, block, ttl_entry.result_column);
-
-        ColumnPtr default_column = nullptr;
-        if (block_with_defaults.has(name))
-            default_column = block_with_defaults.getByName(name).column->convertToFullColumnIfConst();
-
-        auto & column_with_type = block.getByName(name);
-        const IColumn * values_column = column_with_type.column.get();
-        MutableColumnPtr result_column = values_column->cloneEmpty();
-        result_column->reserve(block.rows());
-
-        for (size_t i = 0; i < block.rows(); ++i)
-        {
-            UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
-            if (isTTLExpired(cur_ttl))
-            {
-                if (default_column)
-                    result_column->insertFrom(*default_column, i);
-                else
-                    result_column->insertDefault();
-            }
-            else
-            {
-                new_ttl_info.update(cur_ttl);
-                empty_columns.erase(name);
-                result_column->insertFrom(*values_column, i);
-            }
-        }
-        column_with_type.column = std::move(result_column);
     }
 }
 
-void TTLBlockInputStream::updateTTLWithDescriptions(Block & block, const TTLDescriptions & descriptions, TTLInfoMap & ttl_info_map)
-{
-    for (const auto & ttl_entry : descriptions)
-    {
-        auto & new_ttl_info = ttl_info_map[ttl_entry.result_column];
-        if (!block.has(ttl_entry.result_column))
-            ttl_entry.expression->execute(block);
-
-        auto ttl_column = extractRequieredColumn(*ttl_entry.expression, block, ttl_entry.result_column);
-
-        for (size_t i = 0; i < block.rows(); ++i)
-        {
-            UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
-            new_ttl_info.update(cur_ttl);
-        }
-    }
-}
-
-void TTLBlockInputStream::updateMovesTTL(Block & block)
-{
-    updateTTLWithDescriptions(block, metadata_snapshot->getMoveTTLs(), new_ttl_infos.moves_ttl);
-}
-
-void TTLBlockInputStream::updateRecompressionTTL(Block & block)
-{
-    updateTTLWithDescriptions(block, metadata_snapshot->getRecompressionTTLs(), new_ttl_infos.recompression_ttl);
-}
-
-UInt32 TTLBlockInputStream::getTimestampByIndex(const IColumn * column, size_t ind)
-{
-    if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
-        return date_lut.fromDayNum(DayNum(column_date->getData()[ind]));
-    else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
-        return column_date_time->getData()[ind];
-    else if (const ColumnConst * column_const = typeid_cast<const ColumnConst *>(column))
-    {
-        if (typeid_cast<const ColumnUInt16 *>(&column_const->getDataColumn()))
-            return date_lut.fromDayNum(DayNum(column_const->getValue<UInt16>()));
-        else if (typeid_cast<const ColumnUInt32 *>(&column_const->getDataColumn()))
-            return column_const->getValue<UInt32>();
-    }
-
-    throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR);
-}
-
 }
diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/DataStreams/TTLBlockInputStream.h
index bbe1f8782a4..da86b8d5710 100644
--- a/src/DataStreams/TTLBlockInputStream.h
+++ b/src/DataStreams/TTLBlockInputStream.h
@@ -3,8 +3,9 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Core/Block.h>
-#include <Interpreters/Aggregator.h>
 #include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
+#include <DataStreams/ITTLAlgorithm.h>
+#include <DataStreams/TTLDeleteAlgorithm.h>
 
 #include <common/DateLUT.h>
 
@@ -24,7 +25,6 @@ public:
     );
 
     String getName() const override { return "TTL"; }
-
     Block getHeader() const override { return header; }
 
 protected:
@@ -34,65 +34,14 @@ protected:
     void readSuffixImpl() override;
 
 private:
-    const MergeTreeData & storage;
-    StorageMetadataPtr metadata_snapshot;
+    std::vector<TTLAlgorithmPtr> algorithms;
+    const TTLDeleteAlgorithm * delete_algorithm = nullptr;
+    bool all_data_dropped = false;
 
     /// ttl_infos and empty_columns are updating while reading
     const MergeTreeData::MutableDataPartPtr & data_part;
-
-    time_t current_time;
-    bool force;
-
-    std::unique_ptr<Aggregator> aggregator;
-    std::vector<Field> current_key_value;
-    AggregatedDataVariants agg_result;
-    ColumnRawPtrs agg_key_columns;
-    Aggregator::AggregateColumns agg_aggregate_columns;
-    bool agg_no_more_keys = false;
-
-    IMergeTreeDataPart::TTLInfos old_ttl_infos;
-    IMergeTreeDataPart::TTLInfos new_ttl_infos;
-    NameSet empty_columns;
-
-    size_t rows_removed = 0;
     Poco::Logger * log;
-    const DateLUTImpl & date_lut;
-
-    /// TODO rewrite defaults logic to evaluteMissingDefaults
-    std::unordered_map<String, String> defaults_result_column;
-    ExpressionActionsPtr defaults_expression;
-
     Block header;
-private:
-    /// Removes values with expired ttl and computes new_ttl_infos and empty_columns for part
-    void removeValuesWithExpiredColumnTTL(Block & block);
-
-    void executeRowsTTL(Block & block);
-
-    /// Removes rows with expired table ttl and computes new ttl_infos for part
-    void removeRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column);
-
-    /// Aggregates rows with expired table ttl and computes new ttl_infos for part
-    void aggregateRowsWithExpiredTTL(Block & block, ColumnPtr ttl_column, ColumnPtr where_column);
-
-    // Calculate aggregates of aggregate_columns into agg_result
-    void calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length);
-
-    /// Finalize agg_result into result_columns
-    void finalizeAggregates(MutableColumns & result_columns);
-
-    /// Execute description expressions on block and update ttl's in
-    /// ttl_info_map with expression results.
-    void updateTTLWithDescriptions(Block & block, const TTLDescriptions & descriptions, TTLInfoMap & ttl_info_map);
-
-    /// Updates TTL for moves
-    void updateMovesTTL(Block & block);
-
-    /// Update values for recompression TTL using data from block.
-    void updateRecompressionTTL(Block & block);
-
-    UInt32 getTimestampByIndex(const IColumn * column, size_t ind);
-    bool isTTLExpired(time_t ttl) const;
 };
 
 }
diff --git a/src/DataStreams/TTLColumnAlgorithm.cpp b/src/DataStreams/TTLColumnAlgorithm.cpp
new file mode 100644
index 00000000000..4747a605e3b
--- /dev/null
+++ b/src/DataStreams/TTLColumnAlgorithm.cpp
@@ -0,0 +1,88 @@
+#include <DataStreams/TTLColumnAlgorithm.h>
+
+namespace DB
+{
+
+TTLColumnAlgorithm::TTLColumnAlgorithm(
+    const TTLDescription & description_,
+    const TTLInfo & old_ttl_info_,
+    time_t current_time_,
+    bool force_,
+    const String & column_name_,
+    const ExpressionActionsPtr & default_expression_)
+    : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
+    , column_name(column_name_)
+    , default_expression(default_expression_)
+{
+    if (!isMinTTLExpired())
+    {
+        new_ttl_info = old_ttl_info;
+        is_fully_empty = false;
+    }
+}
+
+void TTLColumnAlgorithm::execute(Block & block)
+{
+    if (!block)
+        return;
+
+
+    /// If we read not all table columns. E.g. while mutation.
+    if (!block.has(column_name))
+        return;
+
+    /// Nothing to do
+    if (!isMinTTLExpired())
+        return;
+
+    /// Later drop full column
+    if (isMaxTTLExpired())
+        return;
+
+    //// TODO: use extractRequiredColumn
+    ColumnPtr default_column;
+    if (default_expression)
+    {
+        Block block_with_defaults;
+        block_with_defaults = block;
+        default_expression->execute(block_with_defaults);
+        default_column = block_with_defaults.getByName(column_name).column->convertToFullColumnIfConst();
+    }
+
+    auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column);
+
+    auto & column_with_type = block.getByName(column_name);
+    const IColumn * values_column = column_with_type.column.get();
+    MutableColumnPtr result_column = values_column->cloneEmpty();
+    result_column->reserve(block.rows());
+
+    for (size_t i = 0; i < block.rows(); ++i)
+    {
+        UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
+        if (isTTLExpired(cur_ttl))
+        {
+            if (default_column)
+                result_column->insertFrom(*default_column, i);
+            else
+                result_column->insertDefault();
+        }
+        else
+        {
+            new_ttl_info.update(cur_ttl);
+            is_fully_empty = false;
+            result_column->insertFrom(*values_column, i);
+        }
+    }
+
+    column_with_type.column = std::move(result_column);
+}
+
+void TTLColumnAlgorithm::finalize(const MutableDataPartPtr & data_part) const
+{
+    data_part->ttl_infos.columns_ttl[column_name] = new_ttl_info;
+    data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
+    if (is_fully_empty)
+        data_part->expired_columns.insert(column_name);
+}
+
+}
diff --git a/src/DataStreams/TTLColumnAlgorithm.h b/src/DataStreams/TTLColumnAlgorithm.h
new file mode 100644
index 00000000000..b2824dba9b0
--- /dev/null
+++ b/src/DataStreams/TTLColumnAlgorithm.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <DataStreams/ITTLAlgorithm.h>
+
+namespace DB
+{
+
+class TTLColumnAlgorithm final : public ITTLAlgorithm
+{
+public:
+    TTLColumnAlgorithm(
+        const TTLDescription & description_,
+        const TTLInfo & old_ttl_info_,
+        time_t current_time_,
+        bool force_,
+        const String & column_name_,
+        const ExpressionActionsPtr & default_expression_);
+
+    void execute(Block & block) override;
+    void finalize(const MutableDataPartPtr & data_part) const override;
+
+private:
+    const String column_name;
+    const ExpressionActionsPtr default_expression;
+
+    bool is_fully_empty = true;
+};
+
+}
diff --git a/src/DataStreams/TTLDeleteAlgorithm.cpp b/src/DataStreams/TTLDeleteAlgorithm.cpp
new file mode 100644
index 00000000000..9ff4eb767df
--- /dev/null
+++ b/src/DataStreams/TTLDeleteAlgorithm.cpp
@@ -0,0 +1,58 @@
+#include <DataStreams/TTLDeleteAlgorithm.h>
+
+namespace DB
+{
+
+TTLDeleteAlgorithm::TTLDeleteAlgorithm(
+    const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
+{
+    if (!isMinTTLExpired())
+        new_ttl_info = old_ttl_info;
+}
+
+void TTLDeleteAlgorithm::execute(Block & block)
+{
+    if (!block || !isMinTTLExpired())
+        return;
+
+    auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column);
+    auto where_column = extractRequieredColumn(description.where_expression, block, description.where_result_column);
+
+    MutableColumns result_columns;
+    const auto & column_names = block.getNames();
+
+    result_columns.reserve(column_names.size());
+    for (auto it = column_names.begin(); it != column_names.end(); ++it)
+    {
+        const IColumn * values_column = block.getByName(*it).column.get();
+        MutableColumnPtr result_column = values_column->cloneEmpty();
+        result_column->reserve(block.rows());
+
+        for (size_t i = 0; i < block.rows(); ++i)
+        {
+            UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
+            bool where_filter_passed = !where_column || where_column->getBool(i);
+
+            if (!isTTLExpired(cur_ttl) || !where_filter_passed)
+            {
+                new_ttl_info.update(cur_ttl);
+                result_column->insertFrom(*values_column, i);
+            }
+            else if (it == column_names.begin())
+                ++rows_removed;
+        }
+
+        result_columns.emplace_back(std::move(result_column));
+    }
+
+    block = block.cloneWithColumns(std::move(result_columns));
+}
+
+void TTLDeleteAlgorithm::finalize(const MutableDataPartPtr & data_part) const
+{
+    data_part->ttl_infos.table_ttl = new_ttl_info;
+    data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
+}
+
+}
diff --git a/src/DataStreams/TTLDeleteAlgorithm.h b/src/DataStreams/TTLDeleteAlgorithm.h
new file mode 100644
index 00000000000..36da59da46e
--- /dev/null
+++ b/src/DataStreams/TTLDeleteAlgorithm.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <DataStreams/ITTLAlgorithm.h>
+
+namespace DB
+{
+
+class TTLDeleteAlgorithm final : public ITTLAlgorithm
+{
+public:
+    TTLDeleteAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+
+    void execute(Block & block) override;
+    void finalize(const MutableDataPartPtr & data_part) const override;
+    size_t getNumberOfRemovedRows() const { return rows_removed; }
+
+private:
+    size_t rows_removed = 0;
+};
+
+}
diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.cpp b/src/DataStreams/TTLUpdateInfoAlgorithm.cpp
new file mode 100644
index 00000000000..ce4d4128eec
--- /dev/null
+++ b/src/DataStreams/TTLUpdateInfoAlgorithm.cpp
@@ -0,0 +1,47 @@
+#include <DataStreams/TTLUpdateInfoAlgorithm.h>
+
+namespace DB
+{
+
+TTLUpdateInfoAlgorithm::TTLUpdateInfoAlgorithm(
+    const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
+{
+}
+
+void TTLUpdateInfoAlgorithm::execute(Block & block)
+{
+    if (!block)
+        return;
+
+    auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column);
+    for (size_t i = 0; i < block.rows(); ++i)
+    {
+        UInt32 cur_ttl = ITTLAlgorithm::getTimestampByIndex(ttl_column.get(), i);
+        new_ttl_info.update(cur_ttl);
+    }
+}
+
+TTLMoveAlgorithm::TTLMoveAlgorithm(
+    const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    : TTLUpdateInfoAlgorithm(description_, old_ttl_info_, current_time_, force_)
+{
+}
+
+void TTLMoveAlgorithm::finalize(const MutableDataPartPtr & data_part) const
+{
+    data_part->ttl_infos.moves_ttl[description.result_column] = new_ttl_info;
+}
+
+TTLRecompressionAlgorithm::TTLRecompressionAlgorithm(
+    const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
+    : TTLUpdateInfoAlgorithm(description_, old_ttl_info_, current_time_, force_)
+{
+}
+
+void TTLRecompressionAlgorithm::finalize(const MutableDataPartPtr & data_part) const
+{
+    data_part->ttl_infos.recompression_ttl[description.result_column] = new_ttl_info;
+}
+
+}
diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.h b/src/DataStreams/TTLUpdateInfoAlgorithm.h
new file mode 100644
index 00000000000..4a680c5bb3a
--- /dev/null
+++ b/src/DataStreams/TTLUpdateInfoAlgorithm.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <DataStreams/ITTLAlgorithm.h>
+
+namespace DB
+{
+
+class TTLUpdateInfoAlgorithm : public ITTLAlgorithm
+{
+public:
+    TTLUpdateInfoAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+
+    void execute(Block & block) override;
+    void finalize(const MutableDataPartPtr & data_part) const override = 0;
+};
+
+class TTLMoveAlgorithm final : public TTLUpdateInfoAlgorithm
+{
+public:
+    TTLMoveAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+    void finalize(const MutableDataPartPtr & data_part) const override;
+};
+
+class TTLRecompressionAlgorithm final : public TTLUpdateInfoAlgorithm
+{
+public:
+    TTLRecompressionAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
+    void finalize(const MutableDataPartPtr & data_part) const override;
+};
+
+}
diff --git a/src/DataStreams/ya.make b/src/DataStreams/ya.make
index 858bf7081e7..b0a7755c7f9 100644
--- a/src/DataStreams/ya.make
+++ b/src/DataStreams/ya.make
@@ -27,6 +27,7 @@ SRCS(
     ExecutionSpeedLimits.cpp
     ExpressionBlockInputStream.cpp
     IBlockInputStream.cpp
+    ITTLAlgorithm.cpp
     InputStreamFromASTInsertQuery.cpp
     InternalTextLogsRowOutputStream.cpp
     LimitBlockInputStream.cpp
@@ -44,7 +45,11 @@ SRCS(
     SquashingBlockInputStream.cpp
     SquashingBlockOutputStream.cpp
     SquashingTransform.cpp
+    TTLAggregationAlgorithm.cpp
     TTLBlockInputStream.cpp
+    TTLColumnAlgorithm.cpp
+    TTLDeleteAlgorithm.cpp
+    TTLUpdateInfoAlgorithm.cpp
     copyData.cpp
     finalizeBlock.cpp
     materializeBlock.cpp
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 104eedf060e..cdf66ec43f6 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -1136,6 +1136,12 @@ bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metada
             return false;
     }
 
+    for (const auto & group_by_desc : metadata_snapshot->getGroupByTTLs())
+    {
+        if (!ttl_infos.group_by_ttl.count(group_by_desc.result_column))
+            return false;
+    }
+
     return true;
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
index 92c8a66e828..3a0bb283b63 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
@@ -17,13 +17,17 @@ void MergeTreeDataPartTTLInfos::update(const MergeTreeDataPartTTLInfos & other_i
         updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
     }
 
+    for (const auto & [name, ttl_info] : other_infos.group_by_ttl)
+    {
+        group_by_ttl[name].update(ttl_info);
+        updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
+    }
+
     for (const auto & [name, ttl_info] : other_infos.recompression_ttl)
         recompression_ttl[name].update(ttl_info);
 
     for (const auto & [expression, ttl_info] : other_infos.moves_ttl)
-    {
         moves_ttl[expression].update(ttl_info);
-    }
 
     table_ttl.update(other_infos.table_ttl);
     updatePartMinMaxTTL(table_ttl.min, table_ttl.max);
@@ -59,29 +63,33 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in)
 
         updatePartMinMaxTTL(table_ttl.min, table_ttl.max);
     }
+
+    auto fill_ttl_info_map = [](const JSON & json_part, TTLInfoMap & ttl_info_map)
+    {
+        for (auto elem : json_part) // NOLINT
+        {
+            MergeTreeDataPartTTLInfo ttl_info;
+            ttl_info.min = elem["min"].getUInt();
+            ttl_info.max = elem["max"].getUInt();
+            String expression = elem["expression"].getString();
+            ttl_info_map.emplace(expression, ttl_info);
+        }
+    };
+
     if (json.has("moves"))
     {
         const JSON & moves = json["moves"];
-        for (auto move : moves) // NOLINT
-        {
-            MergeTreeDataPartTTLInfo ttl_info;
-            ttl_info.min = move["min"].getUInt();
-            ttl_info.max = move["max"].getUInt();
-            String expression = move["expression"].getString();
-            moves_ttl.emplace(expression, ttl_info);
-        }
+        fill_ttl_info_map(moves, moves_ttl);
     }
     if (json.has("recompression"))
     {
         const JSON & recompressions = json["recompression"];
-        for (auto recompression : recompressions) // NOLINT
-        {
-            MergeTreeDataPartTTLInfo ttl_info;
-            ttl_info.min = recompression["min"].getUInt();
-            ttl_info.max = recompression["max"].getUInt();
-            String expression = recompression["expression"].getString();
-            recompression_ttl.emplace(expression, ttl_info);
-        }
+        fill_ttl_info_map(recompressions, recompression_ttl);
+    }
+    if (json.has("group_by"))
+    {
+        const JSON & group_by = json["group_by"];
+        fill_ttl_info_map(group_by, group_by_ttl);
     }
 }
 
@@ -118,6 +126,18 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const
         writeIntText(table_ttl.max, out);
         writeString("}", out);
     }
+
+    auto write_info_for_expression = [&out](const auto & name, const auto & info)
+    {
+        writeString(R"({"expression":)", out);
+        writeString(doubleQuoteString(name), out);
+        writeString(R"(,"min":)", out);
+        writeIntText(info.min, out);
+        writeString(R"(,"max":)", out);
+        writeIntText(info.max, out);
+        writeString("}", out);
+    };
+
     if (!moves_ttl.empty())
     {
         if (!columns_ttl.empty() || table_ttl.min)
@@ -128,13 +148,7 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const
             if (it != moves_ttl.begin())
                 writeString(",", out);
 
-            writeString(R"({"expression":)", out);
-            writeString(doubleQuoteString(it->first), out);
-            writeString(R"(,"min":)", out);
-            writeIntText(it->second.min, out);
-            writeString(R"(,"max":)", out);
-            writeIntText(it->second.max, out);
-            writeString("}", out);
+            write_info_for_expression(it->first, it->second);
         }
         writeString("]", out);
     }
@@ -149,13 +163,22 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const
             if (it != recompression_ttl.begin())
                 writeString(",", out);
 
-            writeString(R"({"expression":)", out);
-            writeString(doubleQuoteString(it->first), out);
-            writeString(R"(,"min":)", out);
-            writeIntText(it->second.min, out);
-            writeString(R"(,"max":)", out);
-            writeIntText(it->second.max, out);
-            writeString("}", out);
+            write_info_for_expression(it->first, it->second);
+        }
+        writeString("]", out);
+    }
+    if (!group_by_ttl.empty())
+    {
+        if (!moves_ttl.empty() || !columns_ttl.empty() || !recompression_ttl.empty() || table_ttl.min)
+            writeString(",", out);
+
+        writeString(R"("group_by":[)", out);
+        for (auto it = group_by_ttl.begin(); it != group_by_ttl.end(); ++it)
+        {
+            if (it != group_by_ttl.begin())
+                writeString(",", out);
+
+            write_info_for_expression(it->first, it->second);
         }
         writeString("]", out);
     }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
index 17239e2618a..8ab6d6089db 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
@@ -49,10 +49,11 @@ struct MergeTreeDataPartTTLInfos
 
     TTLInfoMap recompression_ttl;
 
+    TTLInfoMap group_by_ttl;
+
     /// Return the smallest max recompression TTL value
     time_t getMinimalMaxRecompressionTTL() const;
 
-
     void read(ReadBuffer & in);
     void write(WriteBuffer & out) const;
     void update(const MergeTreeDataPartTTLInfos & other_infos);
@@ -68,7 +69,7 @@ struct MergeTreeDataPartTTLInfos
 
     bool empty() const
     {
-        return !part_min_ttl && moves_ttl.empty() && recompression_ttl.empty();
+        return !part_min_ttl && moves_ttl.empty() && recompression_ttl.empty() && group_by_ttl.empty();
     }
 };
 
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index e1284fe8d92..68c409eb85c 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -376,6 +376,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     if (metadata_snapshot->hasRowsTTL())
         updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true);
 
+    for (const auto & ttl_entry : metadata_snapshot->getGroupByTTLs())
+        updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true);
+
     for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
         updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);
 
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index a4500e2aa7b..463a7c3b382 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -125,7 +125,7 @@ TTLTableDescription StorageInMemoryMetadata::getTableTTLs() const
 
 bool StorageInMemoryMetadata::hasAnyTableTTL() const
 {
-    return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL();
+    return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL() || hasAnyGroupByTTL();
 }
 
 TTLColumnsDescription StorageInMemoryMetadata::getColumnTTLs() const
@@ -168,6 +168,16 @@ bool StorageInMemoryMetadata::hasAnyRecompressionTTL() const
     return !table_ttl.recompression_ttl.empty();
 }
 
+TTLDescriptions StorageInMemoryMetadata::getGroupByTTLs() const
+{
+    return table_ttl.group_by_ttl;
+}
+
+bool StorageInMemoryMetadata::hasAnyGroupByTTL() const
+{
+    return !table_ttl.group_by_ttl.empty();
+}
+
 ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(const NameSet & updated_columns) const
 {
     if (updated_columns.empty())
diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h
index 3656edf71f4..cf9f38fe135 100644
--- a/src/Storages/StorageInMemoryMetadata.h
+++ b/src/Storages/StorageInMemoryMetadata.h
@@ -118,6 +118,10 @@ struct StorageInMemoryMetadata
     TTLDescriptions getRecompressionTTLs() const;
     bool hasAnyRecompressionTTL() const;
 
+    // Just wrapper for table TTLs, return info about recompression ttl
+    TTLDescriptions getGroupByTTLs() const;
+    bool hasAnyGroupByTTL() const;
+
     /// Returns columns, which will be needed to calculate dependencies (skip
     /// indices, TTL expressions) if we update @updated_columns set of columns.
     ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const;
diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp
index 7ae20ed024e..d890551893c 100644
--- a/src/Storages/System/StorageSystemParts.cpp
+++ b/src/Storages/System/StorageSystemParts.cpp
@@ -68,6 +68,10 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_)
         {"recompression_ttl_info.expression",           std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
         {"recompression_ttl_info.min",                  std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
         {"recompression_ttl_info.max",                  std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+
+        {"group_by_ttl_info.expression",                std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
+        {"group_by_ttl_info.min",                       std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+        {"group_by_ttl_info.max",                       std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())}
     }
     )
 {
@@ -184,6 +188,7 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto
         columns_[i++]->insert(queryToString(part->default_codec->getCodecDesc()));
 
         add_ttl_info_map(part->ttl_infos.recompression_ttl);
+        add_ttl_info_map(part->ttl_infos.group_by_ttl);
     }
 }
 
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index c2c5898c70c..d8731dd4ab3 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -259,6 +259,7 @@ TTLTableDescription::TTLTableDescription(const TTLTableDescription & other)
  , rows_ttl(other.rows_ttl)
  , move_ttl(other.move_ttl)
  , recompression_ttl(other.recompression_ttl)
+ , group_by_ttl(other.group_by_ttl)
 {
 }
 
@@ -275,6 +276,7 @@ TTLTableDescription & TTLTableDescription::operator=(const TTLTableDescription &
     rows_ttl = other.rows_ttl;
     move_ttl = other.move_ttl;
     recompression_ttl = other.recompression_ttl;
+    group_by_ttl = other.group_by_ttl;
 
     return *this;
 }
@@ -295,7 +297,7 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
     for (const auto & ttl_element_ptr : definition_ast->children)
     {
         auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key);
-        if (ttl.mode == TTLMode::DELETE || ttl.mode == TTLMode::GROUP_BY)
+        if (ttl.mode == TTLMode::DELETE)
         {
             if (seen_delete_ttl)
                 throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION);
@@ -306,6 +308,10 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
         {
             result.recompression_ttl.emplace_back(std::move(ttl));
         }
+        else if (ttl.mode == TTLMode::GROUP_BY)
+        {
+            result.group_by_ttl.emplace_back(std::move(ttl));
+        }
         else
         {
             result.move_ttl.emplace_back(std::move(ttl));
diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h
index 4b0d4370a70..1cc3a832447 100644
--- a/src/Storages/TTLDescription.h
+++ b/src/Storages/TTLDescription.h
@@ -107,6 +107,8 @@ struct TTLTableDescription
 
     TTLDescriptions recompression_ttl;
 
+    TTLDescriptions group_by_ttl;
+
     TTLTableDescription() = default;
     TTLTableDescription(const TTLTableDescription & other);
     TTLTableDescription & operator=(const TTLTableDescription & other);

From a8f1786d952482e0e4224537ad27e6cf8bd92ae2 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 29 Dec 2020 18:19:11 +0300
Subject: [PATCH 0080/1238] fix TTL with GROUP BY

---
 src/DataStreams/TTLColumnAlgorithm.cpp | 1 -
 src/Storages/TTLDescription.cpp        | 9 ++++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/DataStreams/TTLColumnAlgorithm.cpp b/src/DataStreams/TTLColumnAlgorithm.cpp
index 4747a605e3b..afab3af62a7 100644
--- a/src/DataStreams/TTLColumnAlgorithm.cpp
+++ b/src/DataStreams/TTLColumnAlgorithm.cpp
@@ -26,7 +26,6 @@ void TTLColumnAlgorithm::execute(Block & block)
     if (!block)
         return;
 
-
     /// If we read not all table columns. E.g. while mutation.
     if (!block.has(column_name))
         return;
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index d8731dd4ab3..f0c936b10c2 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -211,9 +211,12 @@ TTLDescription TTLDescription::getTTLFromAST(
             const auto & primary_key_expressions = primary_key.expression_list_ast->children;
             for (size_t i = ttl_element->group_by_key.size(); i < primary_key_expressions.size(); ++i)
             {
-                ASTPtr expr = makeASTFunction("any", primary_key_expressions[i]->clone());
-                aggregations.emplace_back(pk_columns[i], std::move(expr));
-                aggregation_columns_set.insert(pk_columns[i]);
+                if (!aggregation_columns_set.count(pk_columns[i]))
+                {
+                    ASTPtr expr = makeASTFunction("any", primary_key_expressions[i]->clone());
+                    aggregations.emplace_back(pk_columns[i], std::move(expr));
+                    aggregation_columns_set.insert(pk_columns[i]);
+                }
             }
 
             for (const auto & column : columns.getOrdinary())

From 0856b2c5144171f73eb36afcec500a261ed34258 Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Mon, 11 Jan 2021 20:37:08 +0300
Subject: [PATCH 0081/1238] Ability to backup-restore metadata files for DiskS3
 (fixes and tests)

---
 src/Disks/DiskCacheWrapper.cpp                |  12 +-
 src/Disks/IDisk.h                             |   3 +
 src/Disks/S3/DiskS3.cpp                       |  83 ++++++++------
 src/Disks/S3/DiskS3.h                         |   7 +-
 src/Disks/S3/registerDiskS3.cpp               |   2 +-
 src/Storages/MergeTree/MergeTreeData.cpp      |  10 +-
 tests/integration/helpers/cluster.py          |  41 +++----
 .../config.d/bg_processing_pool_conf.xml      |   5 +
 .../configs/config.d/log_conf.xml             |  12 ++
 .../configs/config.d/storage_conf.xml         |  34 ++++++
 .../configs/config.d/users.xml                |   5 +
 .../configs/config.xml                        |  20 ++++
 .../test_merge_tree_s3_restore/test.py        | 106 ++++++++++++++++++
 13 files changed, 269 insertions(+), 71 deletions(-)
 create mode 100644 tests/integration/test_merge_tree_s3_restore/configs/config.d/bg_processing_pool_conf.xml
 create mode 100644 tests/integration/test_merge_tree_s3_restore/configs/config.d/log_conf.xml
 create mode 100644 tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf.xml
 create mode 100644 tests/integration/test_merge_tree_s3_restore/configs/config.d/users.xml
 create mode 100644 tests/integration/test_merge_tree_s3_restore/configs/config.xml
 create mode 100644 tests/integration/test_merge_tree_s3_restore/test.py

diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp
index d44f5a8e0d4..8dc8a005f57 100644
--- a/src/Disks/DiskCacheWrapper.cpp
+++ b/src/Disks/DiskCacheWrapper.cpp
@@ -108,7 +108,7 @@ DiskCacheWrapper::readFile(const String & path, size_t buf_size, size_t estimate
     if (!cache_file_predicate(path))
         return DiskDecorator::readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold);
 
-    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Read file {} from cache", backQuote(path));
+    LOG_DEBUG(&Poco::Logger::get("DiskCache"), "Read file {} from cache", backQuote(path));
 
     if (cache_disk->exists(path))
         return cache_disk->readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold);
@@ -122,11 +122,11 @@ DiskCacheWrapper::readFile(const String & path, size_t buf_size, size_t estimate
         {
             /// This thread will responsible for file downloading to cache.
             metadata->status = DOWNLOADING;
-            LOG_DEBUG(&Poco::Logger::get("DiskS3"), "File {} doesn't exist in cache. Will download it", backQuote(path));
+            LOG_DEBUG(&Poco::Logger::get("DiskCache"), "File {} doesn't exist in cache. Will download it", backQuote(path));
         }
         else if (metadata->status == DOWNLOADING)
         {
-            LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Waiting for file {} download to cache", backQuote(path));
+            LOG_DEBUG(&Poco::Logger::get("DiskCache"), "Waiting for file {} download to cache", backQuote(path));
             metadata->condition.wait(lock, [metadata] { return metadata->status == DOWNLOADED || metadata->status == ERROR; });
         }
     }
@@ -151,11 +151,11 @@ DiskCacheWrapper::readFile(const String & path, size_t buf_size, size_t estimate
                 }
                 cache_disk->moveFile(tmp_path, path);
 
-                LOG_DEBUG(&Poco::Logger::get("DiskS3"), "File {} downloaded to cache", backQuote(path));
+                LOG_DEBUG(&Poco::Logger::get("DiskCache"), "File {} downloaded to cache", backQuote(path));
             }
             catch (...)
             {
-                tryLogCurrentException("DiskS3", "Failed to download file + " + backQuote(path) + " to cache");
+                tryLogCurrentException("DiskCache", "Failed to download file + " + backQuote(path) + " to cache");
                 result_status = ERROR;
             }
         }
@@ -180,7 +180,7 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode
     if (!cache_file_predicate(path))
         return DiskDecorator::writeFile(path, buf_size, mode, estimated_size, aio_threshold);
 
-    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Write file {} to cache", backQuote(path));
+    LOG_DEBUG(&Poco::Logger::get("DiskCache"), "Write file {} to cache", backQuote(path));
 
     auto dir_path = directoryPath(path);
     if (!cache_disk->exists(dir_path))
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 7d3e498a40b..a26d5015ba0 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -195,6 +195,9 @@ public:
     /// Returns executor to perform asynchronous operations.
     virtual Executor & getExecutor() { return *executor; }
 
+    /// Invoked when partitions freeze is invoked.
+    virtual void onFreeze(const String &) { }
+
 private:
     std::unique_ptr<Executor> executor;
 };
diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index 97a7dc4939f..a13fa148413 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -924,19 +924,24 @@ void DiskS3::startup()
 
     /// Find last revision.
     UInt64 l = 0, r = LATEST_REVISION;
-    while (r - l > 1)
+    while (l < r)
     {
-        auto revision = (r - l) >> 1;
+        LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Check revision in bounds {}-{}", l, r);
+
+        auto revision = l + (r - l + 1) / 2;
         auto revision_str = revisionToString(revision);
-        /// Check that file or operation with such revision exists.
+
+        LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Check object with revision {}", revision);
+
+        /// Check file or operation with such revision exists.
         if (checkObjectExists(s3_root_path + "r" + revision_str)
             || checkObjectExists(s3_root_path + "operations/r" + revision_str))
             l = revision;
         else
-            r = revision;
+            r = revision - 1;
     }
     revision_counter = l;
-    LOG_INFO(&Poco::Logger::get("DiskS3"), "Found last revision number {}", revision_counter);
+    LOG_INFO(&Poco::Logger::get("DiskS3"), "Found last revision number {} for disk {}", revision_counter, name);
 }
 
 bool DiskS3::checkObjectExists(const String & prefix)
@@ -969,7 +974,7 @@ void DiskS3::listObjects(const String & source_bucket, const String & source_pat
     Aws::S3::Model::ListObjectsV2Request request;
     request.SetBucket(source_bucket);
     request.SetPrefix(source_path);
-    request.SetMaxKeys(1000);
+    request.SetMaxKeys(list_object_keys_size);
 
     Aws::S3::Model::ListObjectsV2Outcome outcome;
     do
@@ -1000,13 +1005,13 @@ void DiskS3::copyObject(const String & src_bucket, const String & src_key, const
 struct DiskS3::RestoreInformation
 {
     UInt64 revision = LATEST_REVISION;
-    String bucket;
-    String path;
+    String source_bucket;
+    String source_path;
 };
 
 void DiskS3::readRestoreInformation(DiskS3::RestoreInformation & restore_information)
 {
-    ReadBufferFromFile buffer(metadata_path + restore_file, 512);
+    ReadBufferFromFile buffer(metadata_path + restore_file_name, 512);
     buffer.next();
 
     /// Empty file - just restore all metadata.
@@ -1021,13 +1026,13 @@ void DiskS3::readRestoreInformation(DiskS3::RestoreInformation & restore_informa
         if (!buffer.hasPendingData())
             return;
 
-        readText(restore_information.bucket, buffer);
+        readText(restore_information.source_bucket, buffer);
         assertChar('\n', buffer);
 
         if (!buffer.hasPendingData())
             return;
 
-        readText(restore_information.path, buffer);
+        readText(restore_information.source_path, buffer);
         assertChar('\n', buffer);
 
         if (buffer.hasPendingData())
@@ -1041,35 +1046,42 @@ void DiskS3::readRestoreInformation(DiskS3::RestoreInformation & restore_informa
 
 void DiskS3::restore()
 {
-    if (!exists(restore_file))
+    if (!exists(restore_file_name))
         return;
 
     try
     {
         RestoreInformation information;
-        information.bucket = bucket;
-        information.path = s3_root_path;
+        information.source_bucket = bucket;
+        information.source_path = s3_root_path;
 
         readRestoreInformation(information);
         if (information.revision == 0)
             information.revision = LATEST_REVISION;
+        if (!information.source_path.ends_with('/'))
+            information.source_path += '/';
 
-        if (information.bucket == bucket)
+        if (information.source_bucket == bucket)
         {
             /// In this case we need to additionally cleanup S3 from objects with later revision.
             /// Will be simply just restore to different path.
-            if (information.path == s3_root_path && information.revision != LATEST_REVISION)
+            if (information.source_path == s3_root_path && information.revision != LATEST_REVISION)
                 throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS);
 
             /// This case complicates S3 cleanup in case of unsuccessful restore.
-            if (information.path != s3_root_path && (information.path.starts_with(s3_root_path) || s3_root_path.starts_with(information.path)))
-                throw Exception("Restoring to the same bucket is allowed only if restore paths are same or not prefixes of each other", ErrorCodes::BAD_ARGUMENTS);
+            if (information.source_path != s3_root_path && s3_root_path.starts_with(information.source_path))
+                throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS);
         }
 
         ///TODO: Cleanup FS and bucket if previous restore was failed.
 
-        restoreFiles(information.bucket, information.path, information.revision);
-        restoreFileOperations(information.bucket, information.path, information.revision);
+        restoreFiles(information.source_bucket, information.source_path, information.revision);
+        restoreFileOperations(information.source_bucket, information.source_path, information.revision);
+
+        Poco::File restore_file(metadata_path + restore_file_name);
+        restore_file.remove();
+
+        LOG_INFO(&Poco::Logger::get("DiskS3"), "Restore disk {} finished", name);
     }
     catch (const Exception & e)
     {
@@ -1093,7 +1105,7 @@ void DiskS3::restoreFiles(const String & source_bucket, const String & source_pa
             if (key.find("/operations/") != String::npos)
                 continue;
 
-            auto [revision, _] = extractRevisionAndOperationFromKey(key);
+            const auto [revision, _] = extractRevisionAndOperationFromKey(key);
             /// Filter early if it's possible to get revision from key.
             if (revision > target_revision)
                 continue;
@@ -1129,11 +1141,11 @@ void DiskS3::processRestoreFiles(const String & source_bucket, const String & so
         auto head_result = headObject(source_bucket, key);
         auto object_metadata = head_result.GetMetadata();
 
-        /// If object has 'path' in metadata then restore it.
+        /// Restore file if object has 'path' in metadata.
         auto path_entry = object_metadata.find("path");
         if (path_entry == object_metadata.end())
         {
-            LOG_WARNING(&Poco::Logger::get("DiskS3"), "Skip key {} because it doesn't have 'path' key in metadata", key);
+            LOG_WARNING(&Poco::Logger::get("DiskS3"), "Skip key {} because it doesn't have 'path' in metadata", key);
             continue;
         }
 
@@ -1141,17 +1153,16 @@ void DiskS3::processRestoreFiles(const String & source_bucket, const String & so
 
         createDirectories(directoryPath(path));
         auto metadata = createMeta(path);
-
         auto relative_key = shrinkKey(source_path, key);
-        metadata.addObject(relative_key, head_result.GetContentLength());
 
         /// Copy object if we restore to different bucket / path.
         if (bucket != source_bucket || s3_root_path != source_path)
             copyObject(source_bucket, key, bucket, s3_root_path + relative_key);
 
+        metadata.addObject(relative_key, head_result.GetContentLength());
         metadata.save();
 
-        LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Restored {} file", path);
+        LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Restored file {}", path);
     }
 }
 
@@ -1159,7 +1170,7 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String &
 {
     LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting restore file operations for disk {}", name);
 
-    /// Enable record file operations if we restore to different bucket / path.
+    /// Enable recording file operations if we restore to different bucket / path.
     send_metadata = bucket != source_bucket || s3_root_path != source_path;
 
     listObjects(source_bucket, source_path + "operations/", [this, &source_bucket, &target_revision](auto list_result)
@@ -1171,15 +1182,15 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String &
         {
             const String & key = row.GetKey();
 
-            auto [revision, operation] = extractRevisionAndOperationFromKey(key);
-            if (revision == 0)
+            const auto [revision, operation] = extractRevisionAndOperationFromKey(key);
+            if (revision == UNKNOWN_REVISION)
             {
-                LOG_WARNING(&Poco::Logger::get("DiskS3"), "Skip key {} with unknown revision", revision);
+                LOG_WARNING(&Poco::Logger::get("DiskS3"), "Skip key {} with unknown revision", key);
                 continue;
             }
 
-            /// Stop processing when get revision more than required.
-            /// S3 ensures that keys will be listed in ascending UTF-8 bytes order.
+            /// S3 ensures that keys will be listed in ascending UTF-8 bytes order (revision order).
+            /// We can stop processing if revision of the object is already more than required.
             if (revision > target_revision)
                 return false;
 
@@ -1220,7 +1231,7 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String &
 
 std::tuple<UInt64, String> DiskS3::extractRevisionAndOperationFromKey(const String & key)
 {
-    UInt64 revision = 0;
+    UInt64 revision = UNKNOWN_REVISION;
     String operation;
 
     re2::RE2::FullMatch(key, key_regexp, &revision, &operation);
@@ -1249,4 +1260,10 @@ String DiskS3::revisionToString(UInt64 revision)
     return revision_str;
 }
 
+void DiskS3::onFreeze(const String & path)
+{
+    WriteBufferFromFile revision_file_buf(metadata_path + path + "revision.txt", 32);
+    writeIntText(revision_counter.load(), revision_file_buf);
+}
+
 }
diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h
index 0140104c10f..c330bf0c4e6 100644
--- a/src/Disks/S3/DiskS3.h
+++ b/src/Disks/S3/DiskS3.h
@@ -126,6 +126,8 @@ public:
     /// Restore S3 metadata files on file system.
     void restore();
 
+    void onFreeze(const String & path) override;
+
 private:
     bool tryReserve(UInt64 bytes);
 
@@ -172,9 +174,10 @@ private:
 
     std::atomic<UInt64> revision_counter;
     static constexpr UInt64 LATEST_REVISION = (static_cast<UInt64>(1)) << 63;
+    static constexpr UInt64 UNKNOWN_REVISION = 0;
 
-    /// File contains restore information
-    const String restore_file = "restore";
+    /// File at path {metadata_path}/restore indicates that metadata restore is needed and contains restore information
+    const String restore_file_name = "restore";
     /// The number of keys listed in one request (1000 is max value).
     int list_object_keys_size;
 
diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp
index 88344b975bd..ad5ab15e30e 100644
--- a/src/Disks/S3/registerDiskS3.cpp
+++ b/src/Disks/S3/registerDiskS3.cpp
@@ -150,7 +150,7 @@ void registerDiskS3(DiskFactory & factory)
             context.getSettingsRef().s3_min_upload_part_size,
             context.getSettingsRef().s3_max_single_part_upload_size,
             config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
-            config.getBool(config_prefix + ".send_object_metadata", false),
+            config.getBool(config_prefix + ".send_metadata", false),
             config.getInt(config_prefix + ".thread_pool_size", 16),
             config.getInt(config_prefix + ".list_object_keys_size", 1000));
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 240759b29c7..ddc0e7c7808 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3604,6 +3604,10 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn m
     const auto data_parts = getDataParts();
 
     String backup_name = (!with_name.empty() ? escapeForFileName(with_name) : toString(increment));
+    String backup_path = shadow_path + backup_name + "/";
+
+    for (const auto & disk : getStoragePolicy()->getDisks())
+        disk->onFreeze(backup_path);
 
     PartitionCommandsResultInfo result;
 
@@ -3613,12 +3617,10 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn m
         if (!matcher(part))
             continue;
 
-        part->volume->getDisk()->createDirectories(shadow_path);
-
-        String backup_path = shadow_path + backup_name + "/";
-
         LOG_DEBUG(log, "Freezing part {} snapshot will be placed at {}", part->name, backup_path);
 
+        part->volume->getDisk()->createDirectories(shadow_path);
+
         String backup_part_path = backup_path + relative_data_path + part->relative_path;
         if (auto part_in_memory = asInMemoryPart(part))
             part_in_memory->flushToDisk(backup_path + relative_data_path, part->relative_path, metadata_snapshot);
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index a65a420cd5b..3a2723d29e3 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -1040,32 +1040,25 @@ class ClickHouseInstance:
         return self.http_query(sql=sql, data=data, params=params, user=user, password=password,
                                expect_fail_and_get_error=True)
 
-    def kill_clickhouse(self, stop_start_wait_sec=5):
-        pid = self.get_process_pid("clickhouse")
-        if not pid:
-            raise Exception("No clickhouse found")
-        self.exec_in_container(["bash", "-c", "kill -9 {}".format(pid)], user='root')
-        time.sleep(stop_start_wait_sec)
-
-    def restore_clickhouse(self, retries=100):
-        pid = self.get_process_pid("clickhouse")
-        if pid:
-            raise Exception("ClickHouse has already started")
-        self.exec_in_container(["bash", "-c", "{} --daemon".format(CLICKHOUSE_START_COMMAND)], user=str(os.getuid()))
-        from helpers.test_tools import assert_eq_with_retry
-        # wait start
-        assert_eq_with_retry(self, "select 1", "1", retry_count=retries)
-
-    def restart_clickhouse(self, stop_start_wait_sec=5, kill=False):
+    def stop_clickhouse(self, start_wait_sec=5, kill=False):
         if not self.stay_alive:
-            raise Exception("clickhouse can be restarted only with stay_alive=True instance")
+            raise Exception("clickhouse can be stopped only with stay_alive=True instance")
 
         self.exec_in_container(["bash", "-c", "pkill {} clickhouse".format("-9" if kill else "")], user='root')
-        time.sleep(stop_start_wait_sec)
+        time.sleep(start_wait_sec)
+
+    def start_clickhouse(self, stop_wait_sec=5):
+        if not self.stay_alive:
+            raise Exception("clickhouse can be started again only with stay_alive=True instance")
+
         self.exec_in_container(["bash", "-c", "{} --daemon".format(CLICKHOUSE_START_COMMAND)], user=str(os.getuid()))
         # wait start
         from helpers.test_tools import assert_eq_with_retry
-        assert_eq_with_retry(self, "select 1", "1", retry_count=int(stop_start_wait_sec / 0.5), sleep_time=0.5)
+        assert_eq_with_retry(self, "select 1", "1", retry_count=int(stop_wait_sec / 0.5), sleep_time=0.5)
+
+    def restart_clickhouse(self, stop_start_wait_sec=5, kill=False):
+        self.stop_clickhouse(stop_start_wait_sec, kill)
+        self.start_clickhouse(stop_start_wait_sec)
 
     def exec_in_container(self, cmd, detach=False, nothrow=False, **kwargs):
         container_id = self.get_docker_handle().id
@@ -1085,9 +1078,7 @@ class ClickHouseInstance:
         return self.cluster.copy_file_to_container(container_id, local_path, dest_path)
 
     def get_process_pid(self, process_name):
-        output = self.exec_in_container(["bash", "-c",
-                                         "ps ax | grep '{}' | grep -v 'grep' | grep -v 'bash -c' | awk '{{print $1}}'".format(
-                                             process_name)])
+        output = self.exec_in_container(["pidof", "{}".format(process_name)])
         if output:
             try:
                 pid = int(output.split('\n')[0].strip())
@@ -1403,7 +1394,7 @@ class ClickHouseKiller(object):
         self.clickhouse_node = clickhouse_node
 
     def __enter__(self):
-        self.clickhouse_node.kill_clickhouse()
+        self.clickhouse_node.stop_clickhouse()
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        self.clickhouse_node.restore_clickhouse()
+        self.clickhouse_node.start_clickhouse()
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/bg_processing_pool_conf.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/bg_processing_pool_conf.xml
new file mode 100644
index 00000000000..a756c4434ea
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/bg_processing_pool_conf.xml
@@ -0,0 +1,5 @@
+<yandex>
+    <background_processing_pool_thread_sleep_seconds>0.5</background_processing_pool_thread_sleep_seconds>
+    <background_processing_pool_task_sleep_seconds_when_no_work_min>0.5</background_processing_pool_task_sleep_seconds_when_no_work_min>
+    <background_processing_pool_task_sleep_seconds_when_no_work_max>0.5</background_processing_pool_task_sleep_seconds_when_no_work_max>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/log_conf.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/log_conf.xml
new file mode 100644
index 00000000000..318a6bca95d
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/log_conf.xml
@@ -0,0 +1,12 @@
+<yandex>
+    <shutdown_wait_unfinished>3</shutdown_wait_unfinished>
+    <logger>
+        <level>trace</level>
+        <log>/var/log/clickhouse-server/log.log</log>
+        <errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
+        <size>1000M</size>
+        <count>10</count>
+        <stderr>/var/log/clickhouse-server/stderr.log</stderr>
+        <stdout>/var/log/clickhouse-server/stdout.log</stdout>
+    </logger>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf.xml
new file mode 100644
index 00000000000..9361a21efca
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf.xml
@@ -0,0 +1,34 @@
+<yandex>
+    <storage_configuration>
+        <disks>
+            <s3>
+                <type>s3</type>
+                <endpoint>http://minio1:9001/root/data/</endpoint>
+                <access_key_id>minio</access_key_id>
+                <secret_access_key>minio123</secret_access_key>
+                <send_metadata>true</send_metadata>
+                <list_object_keys_size>1</list_object_keys_size> <!-- To effectively test restore parallelism -->
+            </s3>
+            <hdd>
+                <type>local</type>
+                <path>/</path>
+            </hdd>
+        </disks>
+        <policies>
+            <s3>
+                <volumes>
+                    <main>
+                        <disk>s3</disk>
+                    </main>
+                    <external>
+                        <disk>hdd</disk>
+                    </external>
+                </volumes>
+            </s3>
+        </policies>
+    </storage_configuration>
+
+    <merge_tree>
+        <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
+    </merge_tree>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/users.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/users.xml
new file mode 100644
index 00000000000..797113053f4
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/users.xml
@@ -0,0 +1,5 @@
+<yandex>
+    <profiles>
+        <default/>
+    </profiles>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.xml
new file mode 100644
index 00000000000..24b7344df3a
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<yandex>
+    <tcp_port>9000</tcp_port>
+    <listen_host>127.0.0.1</listen_host>
+
+    <openSSL>
+        <client>
+            <cacheSessions>true</cacheSessions>
+            <verificationMode>none</verificationMode>
+            <invalidCertificateHandler>
+                <name>AcceptCertificateHandler</name>
+            </invalidCertificateHandler>
+        </client>
+    </openSSL>
+
+    <max_concurrent_queries>500</max_concurrent_queries>
+    <mark_cache_size>5368709120</mark_cache_size>
+    <path>./clickhouse/</path>
+    <users_config>users.xml</users_config>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py
new file mode 100644
index 00000000000..6cafc077e81
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/test.py
@@ -0,0 +1,106 @@
+import logging
+import random
+import string
+import time
+
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+logging.getLogger().setLevel(logging.INFO)
+logging.getLogger().addHandler(logging.StreamHandler())
+
+
+@pytest.fixture(scope="module")
+def cluster():
+    try:
+        cluster = ClickHouseCluster(__file__)
+        cluster.add_instance("node", main_configs=["configs/config.d/storage_conf.xml",
+                                                   "configs/config.d/bg_processing_pool_conf.xml",
+                                                   "configs/config.d/log_conf.xml"], user_configs=[], with_minio=True, stay_alive=True)
+        logging.info("Starting cluster...")
+        cluster.start()
+        logging.info("Cluster started")
+
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def random_string(length):
+    letters = string.ascii_letters
+    return ''.join(random.choice(letters) for i in range(length))
+
+
+def generate_values(date_str, count, sign=1):
+    data = [[date_str, sign * (i + 1), random_string(10)] for i in range(count)]
+    data.sort(key=lambda tup: tup[1])
+    return ",".join(["('{}',{},'{}')".format(x, y, z) for x, y, z in data])
+
+
+def create_table(cluster, table_name, additional_settings=None):
+    node = cluster.instances["node"]
+
+    create_table_statement = """
+        CREATE TABLE {} (
+            dt Date,
+            id Int64,
+            data String,
+            INDEX min_max (id) TYPE minmax GRANULARITY 3
+        ) ENGINE=MergeTree()
+        PARTITION BY dt
+        ORDER BY (dt, id)
+        SETTINGS
+            storage_policy='s3',
+            old_parts_lifetime=600,
+            index_granularity=512
+        """.format(table_name)
+
+    if additional_settings:
+        create_table_statement += ","
+        create_table_statement += additional_settings
+
+    node.query(create_table_statement)
+
+
+@pytest.fixture(autouse=True)
+def drop_table(cluster):
+    yield
+    node = cluster.instances["node"]
+    minio = cluster.minio_client
+
+    node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
+
+    for obj in list(minio.list_objects(cluster.minio_bucket, 'data/')):
+        minio.remove_object(cluster.minio_bucket, obj.object_name)
+
+
+# Restore to the same bucket and path with latest revision.
+def test_simple_full_restore(cluster):
+    create_table(cluster, "s3_test")
+
+    node = cluster.instances["node"]
+
+    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
+    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-04', 4096, -1)))
+    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-05', 4096)))
+    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-05', 4096, -1)))
+
+    # To ensure parts have merged
+    node.query("OPTIMIZE TABLE s3_test")
+
+    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "({})".format(4096 * 4)
+    assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "({})".format(0)
+
+    node.stop_clickhouse()
+    node.exec_in_container(['bash', '-c', 'rm -r /var/lib/clickhouse/disks/s3/*'], user='root')
+    node.start_clickhouse()
+
+    # All data is removed.
+    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "({})".format(0)
+
+    node.stop_clickhouse()
+    node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/disks/s3/restore'], user='root')
+    node.start_clickhouse()
+
+    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "({})".format(4096 * 4)
+    assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "({})".format(0)

From 0164965bc0cd7557871bf53c11eb11dd4b934bb4 Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Mon, 11 Jan 2021 20:40:11 +0300
Subject: [PATCH 0082/1238] Fix get_process_pid

---
 tests/integration/helpers/cluster.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 3a2723d29e3..43c553f5318 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -1078,7 +1078,7 @@ class ClickHouseInstance:
         return self.cluster.copy_file_to_container(container_id, local_path, dest_path)
 
     def get_process_pid(self, process_name):
-        output = self.exec_in_container(["pidof", "{}".format(process_name)])
+        output = self.exec_in_container(["bash", "-c", "pidof {}".format(process_name)])
         if output:
             try:
                 pid = int(output.split('\n')[0].strip())

From 5822ee1f01e124a19ab9ab03e0ba85fd79914982 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 12 Jan 2021 02:07:21 +0300
Subject: [PATCH 0083/1238] allow multiple rows TTL with WHERE expression

---
 src/DataStreams/TTLBlockInputStream.cpp       |  4 +
 src/DataStreams/TTLDeleteAlgorithm.cpp        |  6 +-
 src/Storages/MergeTree/IMergeTreeDataPart.cpp |  6 ++
 .../MergeTree/MergeTreeDataPartTTLInfo.cpp    | 87 +++++++++----------
 .../MergeTree/MergeTreeDataPartTTLInfo.h      |  2 +
 .../MergeTree/MergeTreeDataWriter.cpp         |  3 +
 src/Storages/StorageInMemoryMetadata.cpp      | 10 +++
 src/Storages/StorageInMemoryMetadata.h        |  3 +
 src/Storages/TTLDescription.cpp               | 20 +++--
 src/Storages/TTLDescription.h                 |  5 +-
 .../0_stateless/01622_multiple_ttls.reference |  9 ++
 .../0_stateless/01622_multiple_ttls.sql       | 20 +++++
 12 files changed, 120 insertions(+), 55 deletions(-)
 create mode 100644 tests/queries/0_stateless/01622_multiple_ttls.reference
 create mode 100644 tests/queries/0_stateless/01622_multiple_ttls.sql

diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp
index 8c680f6875b..5154949ae71 100644
--- a/src/DataStreams/TTLBlockInputStream.cpp
+++ b/src/DataStreams/TTLBlockInputStream.cpp
@@ -44,6 +44,10 @@ TTLBlockInputStream::TTLBlockInputStream(
         algorithms.emplace_back(std::move(algorithm));
     }
 
+    for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTL())
+        algorithms.emplace_back(std::make_unique<TTLDeleteAlgorithm>(
+            where_ttl, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
+
     for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs())
         algorithms.emplace_back(std::make_unique<TTLAggregationAlgorithm>(
             group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_, header, storage_));
diff --git a/src/DataStreams/TTLDeleteAlgorithm.cpp b/src/DataStreams/TTLDeleteAlgorithm.cpp
index 9ff4eb767df..7227b40dad2 100644
--- a/src/DataStreams/TTLDeleteAlgorithm.cpp
+++ b/src/DataStreams/TTLDeleteAlgorithm.cpp
@@ -51,7 +51,11 @@ void TTLDeleteAlgorithm::execute(Block & block)
 
 void TTLDeleteAlgorithm::finalize(const MutableDataPartPtr & data_part) const
 {
-    data_part->ttl_infos.table_ttl = new_ttl_info;
+    if (description.where_expression)
+        data_part->ttl_infos.rows_where_ttl[description.result_column] = new_ttl_info;
+    else
+        data_part->ttl_infos.table_ttl = new_ttl_info;
+
     data_part->ttl_infos.updatePartMinMaxTTL(new_ttl_info.min, new_ttl_info.max);
 }
 
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index cdf66ec43f6..e78ff09cfc4 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -1142,6 +1142,12 @@ bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metada
             return false;
     }
 
+    for (const auto & rows_where_desc : metadata_snapshot->getRowsWhereTTL())
+    {
+        if (!ttl_infos.rows_where_ttl.count(rows_where_desc.result_column))
+            return false;
+    }
+
     return true;
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
index 3a0bb283b63..138e38e3b78 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
@@ -17,6 +17,12 @@ void MergeTreeDataPartTTLInfos::update(const MergeTreeDataPartTTLInfos & other_i
         updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
     }
 
+    for (const auto & [name, ttl_info] : other_infos.rows_where_ttl)
+    {
+        rows_where_ttl[name].update(ttl_info);
+        updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
+    }
+
     for (const auto & [name, ttl_info] : other_infos.group_by_ttl)
     {
         group_by_ttl[name].update(ttl_info);
@@ -91,6 +97,11 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in)
         const JSON & group_by = json["group_by"];
         fill_ttl_info_map(group_by, group_by_ttl);
     }
+    if (json.has("rows_where"))
+    {
+        const JSON & rows_where = json["rows_where"];
+        fill_ttl_info_map(rows_where, rows_where_ttl);
+    }
 }
 
 
@@ -127,61 +138,41 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const
         writeString("}", out);
     }
 
-    auto write_info_for_expression = [&out](const auto & name, const auto & info)
+    auto write_infos = [&out](const auto & infos, const auto & type, bool is_first)
     {
-        writeString(R"({"expression":)", out);
-        writeString(doubleQuoteString(name), out);
-        writeString(R"(,"min":)", out);
-        writeIntText(info.min, out);
-        writeString(R"(,"max":)", out);
-        writeIntText(info.max, out);
-        writeString("}", out);
+        if (!is_first)
+            writeString(",", out);
+
+        writeString(type, out);
+        writeString(R"(:[)", out);
+        for (auto it = infos.begin(); it != infos.end(); ++it)
+        {
+            if (it != infos.begin())
+                writeString(",", out);
+
+            writeString(R"({"expression":)", out);
+            writeString(doubleQuoteString(it->first), out);
+            writeString(R"(,"min":)", out);
+            writeIntText(it->second.min, out);
+            writeString(R"(,"max":)", out);
+            writeIntText(it->second.max, out);
+            writeString("}", out);
+        }
+        writeString("]", out);
     };
 
-    if (!moves_ttl.empty())
-    {
-        if (!columns_ttl.empty() || table_ttl.min)
-            writeString(",", out);
-        writeString(R"("moves":[)", out);
-        for (auto it = moves_ttl.begin(); it != moves_ttl.end(); ++it)
-        {
-            if (it != moves_ttl.begin())
-                writeString(",", out);
+    bool is_first = columns_ttl.empty() && !table_ttl.min;
+    write_infos(moves_ttl, "moves", is_first);
 
-            write_info_for_expression(it->first, it->second);
-        }
-        writeString("]", out);
-    }
-    if (!recompression_ttl.empty())
-    {
-        if (!moves_ttl.empty() || !columns_ttl.empty() || table_ttl.min)
-            writeString(",", out);
+    is_first &= moves_ttl.empty();
+    write_infos(recompression_ttl, "recompression", is_first);
 
-        writeString(R"("recompression":[)", out);
-        for (auto it = recompression_ttl.begin(); it != recompression_ttl.end(); ++it)
-        {
-            if (it != recompression_ttl.begin())
-                writeString(",", out);
+    is_first &= recompression_ttl.empty();
+    write_infos(group_by_ttl, "group_by", is_first);
 
-            write_info_for_expression(it->first, it->second);
-        }
-        writeString("]", out);
-    }
-    if (!group_by_ttl.empty())
-    {
-        if (!moves_ttl.empty() || !columns_ttl.empty() || !recompression_ttl.empty() || table_ttl.min)
-            writeString(",", out);
+    is_first &= group_by_ttl.empty();
+    write_infos(rows_where_ttl, "rows_where", is_first);
 
-        writeString(R"("group_by":[)", out);
-        for (auto it = group_by_ttl.begin(); it != group_by_ttl.end(); ++it)
-        {
-            if (it != group_by_ttl.begin())
-                writeString(",", out);
-
-            write_info_for_expression(it->first, it->second);
-        }
-        writeString("]", out);
-    }
     writeString("}", out);
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
index 8ab6d6089db..8b972116384 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
@@ -45,6 +45,8 @@ struct MergeTreeDataPartTTLInfos
     time_t part_min_ttl = 0;
     time_t part_max_ttl = 0;
 
+    TTLInfoMap rows_where_ttl;
+
     TTLInfoMap moves_ttl;
 
     TTLInfoMap recompression_ttl;
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 68c409eb85c..7c733c660d6 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -379,6 +379,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     for (const auto & ttl_entry : metadata_snapshot->getGroupByTTLs())
         updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true);
 
+    for (const auto & ttl_entry : metadata_snapshot->getRowsWhereTTL())
+        updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.rows_where_ttl[ttl_entry.result_column], block, true);
+
     for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
         updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);
 
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index 463a7c3b382..36947706474 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -148,6 +148,16 @@ bool StorageInMemoryMetadata::hasRowsTTL() const
     return table_ttl.rows_ttl.expression != nullptr;
 }
 
+TTLDescriptions StorageInMemoryMetadata::getRowsWhereTTL() const
+{
+    return table_ttl.rows_where_ttl;
+}
+
+bool StorageInMemoryMetadata::hasRowsWhereTTL() const
+{
+    return !table_ttl.rows_where_ttl.empty();
+}
+
 TTLDescriptions StorageInMemoryMetadata::getMoveTTLs() const
 {
     return table_ttl.move_ttl;
diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h
index cf9f38fe135..4a00457f7eb 100644
--- a/src/Storages/StorageInMemoryMetadata.h
+++ b/src/Storages/StorageInMemoryMetadata.h
@@ -109,6 +109,9 @@ struct StorageInMemoryMetadata
     TTLDescription getRowsTTL() const;
     bool hasRowsTTL() const;
 
+    TTLDescriptions getRowsWhereTTL() const;
+    bool hasRowsWhereTTL() const;
+
     /// Just wrapper for table TTLs, return moves (to disks or volumes) parts of
     /// table TTL.
     TTLDescriptions getMoveTTLs() const;
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index f0c936b10c2..6cef9e53097 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -260,6 +260,7 @@ TTLDescription TTLDescription::getTTLFromAST(
 TTLTableDescription::TTLTableDescription(const TTLTableDescription & other)
  : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
  , rows_ttl(other.rows_ttl)
+ , rows_where_ttl(other.rows_where_ttl)
  , move_ttl(other.move_ttl)
  , recompression_ttl(other.recompression_ttl)
  , group_by_ttl(other.group_by_ttl)
@@ -277,6 +278,7 @@ TTLTableDescription & TTLTableDescription::operator=(const TTLTableDescription &
         definition_ast.reset();
 
     rows_ttl = other.rows_ttl;
+    rows_where_ttl = other.rows_where_ttl;
     move_ttl = other.move_ttl;
     recompression_ttl = other.recompression_ttl;
     group_by_ttl = other.group_by_ttl;
@@ -296,16 +298,24 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
 
     result.definition_ast = definition_ast->clone();
 
-    bool seen_delete_ttl = false;
+    bool have_unconditional_delete_ttl = false;
     for (const auto & ttl_element_ptr : definition_ast->children)
     {
         auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key);
         if (ttl.mode == TTLMode::DELETE)
         {
-            if (seen_delete_ttl)
-                throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION);
-            result.rows_ttl = ttl;
-            seen_delete_ttl = true;
+            if (!ttl.where_expression)
+            {
+                if (have_unconditional_delete_ttl)
+                    throw Exception("More than one DELETE TTL expression without WHERE expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION);
+
+                have_unconditional_delete_ttl = true;
+                result.rows_ttl = ttl;
+            }
+            else
+            {
+                result.rows_where_ttl.emplace_back(std::move(ttl));
+            }
         }
         else if (ttl.mode == TTLMode::RECOMPRESS)
         {
diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h
index 1cc3a832447..a2340ad6bcd 100644
--- a/src/Storages/TTLDescription.h
+++ b/src/Storages/TTLDescription.h
@@ -99,9 +99,12 @@ struct TTLTableDescription
     /// ^~~~~~~~~~~~~~~definition~~~~~~~~~~~~~~~^
     ASTPtr definition_ast;
 
-    /// Rows removing TTL
+    /// Unconditional main removing rows TTL. Can be only one for table.
     TTLDescription rows_ttl;
 
+    /// Conditional removing rows TTLs.
+    TTLDescriptions rows_where_ttl;
+
     /// Moving data TTL (to other disks or volumes)
     TTLDescriptions move_ttl;
 
diff --git a/tests/queries/0_stateless/01622_multiple_ttls.reference b/tests/queries/0_stateless/01622_multiple_ttls.reference
new file mode 100644
index 00000000000..9b3ac02560c
--- /dev/null
+++ b/tests/queries/0_stateless/01622_multiple_ttls.reference
@@ -0,0 +1,9 @@
+1970-10-10	2
+1970-10-10	5
+1970-10-10	8
+2000-10-10	1
+2000-10-10	2
+2000-10-10	4
+2000-10-10	5
+2000-10-10	7
+2000-10-10	8
diff --git a/tests/queries/0_stateless/01622_multiple_ttls.sql b/tests/queries/0_stateless/01622_multiple_ttls.sql
new file mode 100644
index 00000000000..f86256150b5
--- /dev/null
+++ b/tests/queries/0_stateless/01622_multiple_ttls.sql
@@ -0,0 +1,20 @@
+DROP TABLE IF EXISTS ttl_where;
+
+CREATE TABLE ttl_where
+(
+    `d` Date,
+    `i` UInt32
+)
+ENGINE = MergeTree
+ORDER BY tuple()
+TTL d + toIntervalYear(10) DELETE WHERE i % 3 = 0,
+    d + toIntervalYear(40) DELETE WHERE i % 3 = 1;
+
+INSERT INTO ttl_where SELECT toDate('2000-10-10'), number FROM numbers(10);
+INSERT INTO ttl_where SELECT toDate('1970-10-10'), number FROM numbers(10);
+
+OPTIMIZE TABLE ttl_where FINAL;
+
+SELECT * FROM ttl_where ORDER BY d, i;
+
+DROP TABLE ttl_where;

From 61d6a323dddd0c049c10ee1602c5fe75adf49f5b Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 12 Jan 2021 03:40:07 +0300
Subject: [PATCH 0084/1238] multiple TTL with GROUP BY

---
 src/Parsers/ASTTTLElement.cpp                 | 12 ++--
 src/Parsers/ASTTTLElement.h                   |  2 +-
 src/Parsers/ExpressionElementParsers.cpp      | 64 +++++++++++--------
 src/Parsers/ExpressionElementParsers.h        |  8 +++
 src/Parsers/ParserAlterQuery.cpp              | 29 ---------
 src/Parsers/ParserAlterQuery.h                |  8 ---
 src/Storages/TTLDescription.cpp               | 25 ++++++--
 .../0_stateless/01622_multiple_ttls.reference | 13 ++++
 .../0_stateless/01622_multiple_ttls.sql       | 26 +++++++-
 9 files changed, 113 insertions(+), 74 deletions(-)

diff --git a/src/Parsers/ASTTTLElement.cpp b/src/Parsers/ASTTTLElement.cpp
index 39283a3168e..2d22c1b4307 100644
--- a/src/Parsers/ASTTTLElement.cpp
+++ b/src/Parsers/ASTTTLElement.cpp
@@ -20,7 +20,7 @@ ASTPtr ASTTTLElement::clone() const
 
     for (auto & expr : clone->group_by_key)
         expr = expr->clone();
-    for (auto & [name, expr] : clone->group_by_aggregations)
+    for (auto & expr : clone->group_by_assignments)
         expr = expr->clone();
 
     return clone;
@@ -46,15 +46,15 @@ void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & st
                 settings.ostr << ", ";
             (*it)->formatImpl(settings, state, frame);
         }
-        if (!group_by_aggregations.empty())
+
+        if (!group_by_assignments.empty())
         {
             settings.ostr << " SET ";
-            for (auto it = group_by_aggregations.begin(); it != group_by_aggregations.end(); ++it)
+            for (auto it = group_by_assignments.begin(); it != group_by_assignments.end(); ++it)
             {
-                if (it != group_by_aggregations.begin())
+                if (it != group_by_assignments.begin())
                     settings.ostr << ", ";
-                settings.ostr << it->first << " = ";
-                it->second->formatImpl(settings, state, frame);
+                (*it)->formatImpl(settings, state, frame);
             }
         }
     }
diff --git a/src/Parsers/ASTTTLElement.h b/src/Parsers/ASTTTLElement.h
index aadd019b59c..ce011d76c7b 100644
--- a/src/Parsers/ASTTTLElement.h
+++ b/src/Parsers/ASTTTLElement.h
@@ -18,7 +18,7 @@ public:
     String destination_name;
 
     ASTs group_by_key;
-    std::vector<std::pair<String, ASTPtr>> group_by_aggregations;
+    ASTs group_by_assignments;
 
     ASTPtr recompression_codec;
 
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 726e28005e3..0bcbcac302a 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -23,6 +23,7 @@
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTFunctionWithKeyValueArguments.h>
 #include <Parsers/ASTColumnsTransformers.h>
+#include <Parsers/ASTAssignment.h>
 
 #include <Parsers/parseIdentifierOrStringLiteral.h>
 #include <Parsers/parseIntervalKind.h>
@@ -1875,9 +1876,12 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserIdentifier parser_identifier;
     ParserStringLiteral parser_string_literal;
     ParserExpression parser_exp;
-    ParserExpressionList parser_expression_list(false);
+    ParserExpressionList parser_keys_list(false);
     ParserCodec parser_codec;
 
+    ParserList parser_assignment_list(
+        std::make_unique<ParserAssignment>(), std::make_unique<ParserToken>(TokenType::Comma));
+
     ASTPtr ttl_expr;
     if (!parser_exp.parse(pos, ttl_expr, expected))
         return false;
@@ -1911,9 +1915,9 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     }
 
     ASTPtr where_expr;
-    ASTPtr ast_group_by_key;
+    ASTPtr group_by_key;
     ASTPtr recompression_codec;
-    std::vector<std::pair<String, ASTPtr>> group_by_aggregations;
+    ASTPtr group_by_assignments;
 
     if (mode == TTLMode::MOVE)
     {
@@ -1925,30 +1929,13 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     }
     else if (mode == TTLMode::GROUP_BY)
     {
-        if (!parser_expression_list.parse(pos, ast_group_by_key, expected))
+        if (!parser_keys_list.parse(pos, group_by_key, expected))
             return false;
 
         if (s_set.ignore(pos))
         {
-            while (true)
-            {
-                if (!group_by_aggregations.empty() && !s_comma.ignore(pos))
-                    break;
-
-                ASTPtr name;
-                ASTPtr value;
-                if (!parser_identifier.parse(pos, name, expected))
-                    return false;
-                if (!s_eq.ignore(pos))
-                    return false;
-                if (!parser_exp.parse(pos, value, expected))
-                    return false;
-
-                String name_str;
-                if (!tryGetIdentifierNameInto(name, name_str))
-                    return false;
-                group_by_aggregations.emplace_back(name_str, std::move(value));
-            }
+            if (!parser_assignment_list.parse(pos, group_by_assignments, expected))
+                return false;
         }
     }
     else if (mode == TTLMode::DELETE && s_where.ignore(pos))
@@ -1972,8 +1959,8 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 
     if (mode == TTLMode::GROUP_BY)
     {
-        ttl_element->group_by_key = std::move(ast_group_by_key->children);
-        ttl_element->group_by_aggregations = std::move(group_by_aggregations);
+        ttl_element->group_by_key = std::move(group_by_key->children);
+        ttl_element->group_by_assignments = std::move(group_by_assignments->children);
     }
 
     if (mode == TTLMode::RECOMPRESS)
@@ -2008,4 +1995,31 @@ bool ParserIdentifierWithOptionalParameters::parseImpl(Pos & pos, ASTPtr & node,
     return false;
 }
 
+bool ParserAssignment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    auto assignment = std::make_shared<ASTAssignment>();
+    node = assignment;
+
+    ParserIdentifier p_identifier;
+    ParserToken s_equals(TokenType::Equals);
+    ParserExpression p_expression;
+
+    ASTPtr column;
+    if (!p_identifier.parse(pos, column, expected))
+        return false;
+
+    if (!s_equals.ignore(pos, expected))
+        return false;
+
+    ASTPtr expression;
+    if (!p_expression.parse(pos, expression, expected))
+        return false;
+
+    tryGetIdentifierNameInto(column, assignment->column_name);
+    if (expression)
+        assignment->children.push_back(expression);
+
+    return true;
+}
+
 }
diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h
index 917f084a700..1eb17bfb0bd 100644
--- a/src/Parsers/ExpressionElementParsers.h
+++ b/src/Parsers/ExpressionElementParsers.h
@@ -468,4 +468,12 @@ protected:
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
 };
 
+/// Part of the UPDATE command or TTL with GROUP BY of the form: col_name = expr
+class ParserAssignment : public IParserBase
+{
+protected:
+    const char * getName() const  override{ return "column assignment"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
 }
diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp
index f916537f438..5d20e27e486 100644
--- a/src/Parsers/ParserAlterQuery.cpp
+++ b/src/Parsers/ParserAlterQuery.cpp
@@ -11,7 +11,6 @@
 #include <Parsers/ASTIndexDeclaration.h>
 #include <Parsers/ASTAlterQuery.h>
 #include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTAssignment.h>
 #include <Parsers/parseDatabaseAndTableName.h>
 
 
@@ -651,34 +650,6 @@ bool ParserAlterCommandList::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
 }
 
 
-bool ParserAssignment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
-{
-    auto assignment = std::make_shared<ASTAssignment>();
-    node = assignment;
-
-    ParserIdentifier p_identifier;
-    ParserToken s_equals(TokenType::Equals);
-    ParserExpression p_expression;
-
-    ASTPtr column;
-    if (!p_identifier.parse(pos, column, expected))
-        return false;
-
-    if (!s_equals.ignore(pos, expected))
-        return false;
-
-    ASTPtr expression;
-    if (!p_expression.parse(pos, expression, expected))
-        return false;
-
-    tryGetIdentifierNameInto(column, assignment->column_name);
-    if (expression)
-        assignment->children.push_back(expression);
-
-    return true;
-}
-
-
 bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     auto query = std::make_shared<ASTAlterQuery>();
diff --git a/src/Parsers/ParserAlterQuery.h b/src/Parsers/ParserAlterQuery.h
index 514ef876430..b22b1c6ded2 100644
--- a/src/Parsers/ParserAlterQuery.h
+++ b/src/Parsers/ParserAlterQuery.h
@@ -63,12 +63,4 @@ public:
 };
 
 
-/// Part of the UPDATE command of the form: col_name = expr
-class ParserAssignment : public IParserBase
-{
-protected:
-    const char * getName() const  override{ return "column assignment"; }
-    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};
-
 }
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index 6cef9e53097..06416bfbf36 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -1,5 +1,6 @@
 #include <Storages/TTLDescription.h>
 
+#include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <Functions/IFunction.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/TreeRewriter.h>
@@ -7,12 +8,13 @@
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTTTLElement.h>
 #include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTAssignment.h>
+#include <Parsers/ASTLiteral.h>
 #include <Storages/ColumnsDescription.h>
 #include <Interpreters/Context.h>
 
 #include <Parsers/queryToString.h>
 
-
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 
@@ -197,16 +199,31 @@ TTLDescription TTLDescription::getTTLFromAST(
                 used_primary_key_columns_set.insert(pk_columns[i]);
             }
 
-            for (const auto & [name, _] : ttl_element->group_by_aggregations)
+            std::vector<std::pair<String, ASTPtr>> aggregations;
+            for (const auto & ast : ttl_element->group_by_assignments)
+            {
+                const auto assignment = ast->as<const ASTAssignment &>();
+                auto expression = assignment.expression();
+
+                const auto * expression_func = expression->as<const ASTFunction>();
+                if (!expression_func || !AggregateFunctionFactory::instance().isAggregateFunctionName(expression_func->name))
+                    throw Exception(ErrorCodes::BAD_TTL_EXPRESSION,
+                    "Invalid expression for assignment of column {}. Should be an aggregate function", assignment.column_name);
+
+                auto type_literal = std::make_shared<ASTLiteral>(columns.getPhysical(assignment.column_name).type->getName());
+                expression = makeASTFunction("cast", expression->clone(), type_literal);
+                aggregations.emplace_back(assignment.column_name, std::move(expression));
+            }
+
+            for (const auto & [name, _] : aggregations)
                 aggregation_columns_set.insert(name);
 
-            if (aggregation_columns_set.size() != ttl_element->group_by_aggregations.size())
+            if (aggregation_columns_set.size() != ttl_element->group_by_assignments.size())
                 throw Exception(
                     "Multiple aggregations set for one column in TTL Expression",
                     ErrorCodes::BAD_TTL_EXPRESSION);
 
             result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size());
-            auto aggregations = ttl_element->group_by_aggregations;
 
             const auto & primary_key_expressions = primary_key.expression_list_ast->children;
             for (size_t i = ttl_element->group_by_key.size(); i < primary_key_expressions.size(); ++i)
diff --git a/tests/queries/0_stateless/01622_multiple_ttls.reference b/tests/queries/0_stateless/01622_multiple_ttls.reference
index 9b3ac02560c..d9ebb694584 100644
--- a/tests/queries/0_stateless/01622_multiple_ttls.reference
+++ b/tests/queries/0_stateless/01622_multiple_ttls.reference
@@ -1,3 +1,4 @@
+TTL WHERE
 1970-10-10	2
 1970-10-10	5
 1970-10-10	8
@@ -7,3 +8,15 @@
 2000-10-10	5
 2000-10-10	7
 2000-10-10	8
+TTL GROUP BY
+1970-10-01	0	4950
+2000-10-01	0	450
+2000-10-01	1	460
+2000-10-01	2	470
+2000-10-01	3	480
+2000-10-01	4	490
+2000-10-01	5	500
+2000-10-01	6	510
+2000-10-01	7	520
+2000-10-01	8	530
+2000-10-01	9	540
diff --git a/tests/queries/0_stateless/01622_multiple_ttls.sql b/tests/queries/0_stateless/01622_multiple_ttls.sql
index f86256150b5..aa2eeb5759b 100644
--- a/tests/queries/0_stateless/01622_multiple_ttls.sql
+++ b/tests/queries/0_stateless/01622_multiple_ttls.sql
@@ -1,3 +1,4 @@
+SELECT 'TTL WHERE';
 DROP TABLE IF EXISTS ttl_where;
 
 CREATE TABLE ttl_where
@@ -10,11 +11,34 @@ ORDER BY tuple()
 TTL d + toIntervalYear(10) DELETE WHERE i % 3 = 0,
     d + toIntervalYear(40) DELETE WHERE i % 3 = 1;
 
+-- This test will fail at 2040-10-10
+
 INSERT INTO ttl_where SELECT toDate('2000-10-10'), number FROM numbers(10);
 INSERT INTO ttl_where SELECT toDate('1970-10-10'), number FROM numbers(10);
-
 OPTIMIZE TABLE ttl_where FINAL;
 
 SELECT * FROM ttl_where ORDER BY d, i;
 
 DROP TABLE ttl_where;
+
+SELECT 'TTL GROUP BY';
+DROP TABLE IF EXISTS ttl_group_by;
+
+CREATE TABLE ttl_group_by
+(
+    `d` Date,
+    `i` UInt32,
+    `v` UInt64
+)
+ENGINE = MergeTree
+ORDER BY (toStartOfMonth(d), i % 10)
+TTL d + toIntervalYear(10) GROUP BY toStartOfMonth(d), i % 10 SET d = any(toStartOfMonth(d)), i = any(i % 10), v = sum(v),
+    d + toIntervalYear(40) GROUP BY toStartOfMonth(d) SET d = any(toStartOfMonth(d)), v = sum(v);
+
+INSERT INTO ttl_group_by SELECT toDate('2000-10-10'), number, number FROM numbers(100);
+INSERT INTO ttl_group_by SELECT toDate('1970-10-10'), number, number FROM numbers(100);
+OPTIMIZE TABLE ttl_group_by FINAL;
+
+SELECT * FROM ttl_group_by ORDER BY d, i;
+
+DROP TABLE ttl_group_by;

From 58b9ef5a10a6d208b3ba68798015b87096ed42c3 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 12 Jan 2021 17:04:03 +0300
Subject: [PATCH 0085/1238] fix TTL info serialization

---
 .../MergeTree/MergeTreeDataPartTTLInfo.cpp    | 28 +++++++++++++------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
index 138e38e3b78..d1916f31cc3 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
@@ -143,8 +143,8 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const
         if (!is_first)
             writeString(",", out);
 
-        writeString(type, out);
-        writeString(R"(:[)", out);
+        writeDoubleQuotedString(type, out);
+        writeString(":[", out);
         for (auto it = infos.begin(); it != infos.end(); ++it)
         {
             if (it != infos.begin())
@@ -162,16 +162,26 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const
     };
 
     bool is_first = columns_ttl.empty() && !table_ttl.min;
-    write_infos(moves_ttl, "moves", is_first);
+    if (!moves_ttl.empty())
+    {
+        write_infos(moves_ttl, "moves", is_first);
+        is_first = false;
+    }
 
-    is_first &= moves_ttl.empty();
-    write_infos(recompression_ttl, "recompression", is_first);
+    if (!recompression_ttl.empty())
+    {
+        write_infos(recompression_ttl, "recompression", is_first);
+        is_first = false;
+    }
 
-    is_first &= recompression_ttl.empty();
-    write_infos(group_by_ttl, "group_by", is_first);
+    if (!group_by_ttl.empty())
+    {
+        write_infos(group_by_ttl, "group_by", is_first);
+        is_first = false;
+    }
 
-    is_first &= group_by_ttl.empty();
-    write_infos(rows_where_ttl, "rows_where", is_first);
+    if (!rows_where_ttl.empty())
+        write_infos(rows_where_ttl, "rows_where", is_first);
 
     writeString("}", out);
 }

From aed8c78d0d5ac77d7070bc39cda580ca6e92668f Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 12 Jan 2021 18:35:07 +0300
Subject: [PATCH 0086/1238] better check for existence of aggregate function

---
 src/Parsers/ExpressionElementParsers.cpp |  3 ++-
 src/Storages/TTLDescription.cpp          | 25 ++++++++++++++++++++++--
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 0bcbcac302a..df67417d218 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -1960,7 +1960,8 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     if (mode == TTLMode::GROUP_BY)
     {
         ttl_element->group_by_key = std::move(group_by_key->children);
-        ttl_element->group_by_assignments = std::move(group_by_assignments->children);
+        if (group_by_assignments)
+            ttl_element->group_by_assignments = std::move(group_by_assignments->children);
     }
 
     if (mode == TTLMode::RECOMPRESS)
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index 06416bfbf36..42fdd76fc83 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -4,6 +4,7 @@
 #include <Functions/IFunction.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/TreeRewriter.h>
+#include <Interpreters/InDepthNodeVisitor.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTTTLElement.h>
@@ -81,6 +82,24 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin
     }
 }
 
+class FindAggregateFunctionData
+{
+public:
+    using TypeToVisit = ASTFunction;
+    bool has_aggregate_function = false;
+
+    void visit(const ASTFunction & func, ASTPtr &)
+    {
+        /// Do not throw if found aggregate function inside another aggregate function,
+        /// because it will be checked, while creating expressions.
+        if (AggregateFunctionFactory::instance().isAggregateFunctionName(func.name))
+            has_aggregate_function = true;
+    }
+};
+
+using FindAggregateFunctionFinderMatcher = OneTypeMatcher<FindAggregateFunctionData>;
+using FindAggregateFunctionVisitor = InDepthNodeVisitor<FindAggregateFunctionFinderMatcher, true>;
+
 }
 
 TTLDescription::TTLDescription(const TTLDescription & other)
@@ -205,8 +224,10 @@ TTLDescription TTLDescription::getTTLFromAST(
                 const auto assignment = ast->as<const ASTAssignment &>();
                 auto expression = assignment.expression();
 
-                const auto * expression_func = expression->as<const ASTFunction>();
-                if (!expression_func || !AggregateFunctionFactory::instance().isAggregateFunctionName(expression_func->name))
+                FindAggregateFunctionVisitor::Data data{false};
+                FindAggregateFunctionVisitor(data).visit(expression);
+
+                if (!data.has_aggregate_function)
                     throw Exception(ErrorCodes::BAD_TTL_EXPRESSION,
                     "Invalid expression for assignment of column {}. Should be an aggregate function", assignment.column_name);
 

From 60b88986bf5e0a30412e0b4cbcbd822914ca6a18 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Tue, 12 Jan 2021 19:42:49 +0300
Subject: [PATCH 0087/1238] minor changes near TTL computation

---
 src/DataStreams/ITTLAlgorithm.cpp              |  8 ++++++--
 src/DataStreams/ITTLAlgorithm.h                |  7 ++++++-
 src/DataStreams/TTLAggregationAlgorithm.cpp    |  4 ++--
 src/DataStreams/TTLBlockInputStream.cpp        |  7 +++++--
 src/DataStreams/TTLColumnAlgorithm.cpp         | 18 +++++++-----------
 src/DataStreams/TTLColumnAlgorithm.h           |  4 +++-
 src/DataStreams/TTLDeleteAlgorithm.cpp         |  4 ++--
 src/DataStreams/TTLUpdateInfoAlgorithm.cpp     |  2 +-
 src/Storages/MergeTree/MergeTreeDataWriter.cpp | 12 +++++-------
 src/Storages/TTLDescription.cpp                |  4 ++--
 10 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/src/DataStreams/ITTLAlgorithm.cpp b/src/DataStreams/ITTLAlgorithm.cpp
index f0e98e9ab1c..7513e0c6ce0 100644
--- a/src/DataStreams/ITTLAlgorithm.cpp
+++ b/src/DataStreams/ITTLAlgorithm.cpp
@@ -25,7 +25,8 @@ bool ITTLAlgorithm::isTTLExpired(time_t ttl) const
     return (ttl && (ttl <= current_time));
 }
 
-ColumnPtr ITTLAlgorithm::extractRequieredColumn(const ExpressionActionsPtr & expression, const Block & block, const String & result_column)
+ColumnPtr ITTLAlgorithm::executeExpressionAndGetColumn(
+    const ExpressionActionsPtr & expression, const Block & block, const String & result_column)
 {
     if (!expression)
         return nullptr;
@@ -37,7 +38,10 @@ ColumnPtr ITTLAlgorithm::extractRequieredColumn(const ExpressionActionsPtr & exp
     for (const auto & column_name : expression->getRequiredColumns())
         block_copy.insert(block.getByName(column_name));
 
-    expression->execute(block_copy);
+    /// Keep number of rows for const expression.
+    size_t num_rows = block.rows();
+    expression->execute(block_copy, num_rows);
+
     return block_copy.getByName(result_column).column;
 }
 
diff --git a/src/DataStreams/ITTLAlgorithm.h b/src/DataStreams/ITTLAlgorithm.h
index 28a371e9289..429ca4bcc61 100644
--- a/src/DataStreams/ITTLAlgorithm.h
+++ b/src/DataStreams/ITTLAlgorithm.h
@@ -23,10 +23,15 @@ public:
     bool isMinTTLExpired() const { return force || isTTLExpired(old_ttl_info.min); }
     bool isMaxTTLExpired() const { return isTTLExpired(old_ttl_info.max); }
 
+    /** This function is needed to avoid a conflict between already calculated columns and columns that needed to execute TTL.
+      * If result column is absent in block, all required columns are copied to new block and expression is executed on new block.
+      */
+    static ColumnPtr executeExpressionAndGetColumn(
+        const ExpressionActionsPtr & expression, const Block & block, const String & result_column);
+
 protected:
     bool isTTLExpired(time_t ttl) const;
     UInt32 getTimestampByIndex(const IColumn * column, size_t index) const;
-    static ColumnPtr extractRequieredColumn(const ExpressionActionsPtr & expression, const Block & block, const String & result_column);
 
     const TTLDescription description;
     const TTLInfo old_ttl_info;
diff --git a/src/DataStreams/TTLAggregationAlgorithm.cpp b/src/DataStreams/TTLAggregationAlgorithm.cpp
index 6cc1ac00b7e..ebe08159c55 100644
--- a/src/DataStreams/TTLAggregationAlgorithm.cpp
+++ b/src/DataStreams/TTLAggregationAlgorithm.cpp
@@ -56,8 +56,8 @@ void TTLAggregationAlgorithm::execute(Block & block)
     MutableColumns result_columns = header.cloneEmptyColumns();
     MutableColumns aggregate_columns = header.cloneEmptyColumns();
 
-    auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column);
-    auto where_column = extractRequieredColumn(description.where_expression, block, description.where_result_column);
+    auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
+    auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
 
     size_t rows_aggregated = 0;
     size_t current_key_start = 0;
diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp
index 5154949ae71..7dd5952bb07 100644
--- a/src/DataStreams/TTLBlockInputStream.cpp
+++ b/src/DataStreams/TTLBlockInputStream.cpp
@@ -60,19 +60,22 @@ TTLBlockInputStream::TTLBlockInputStream(
         for (const auto & [name, description] : metadata_snapshot_->getColumnTTLs())
         {
             ExpressionActionsPtr default_expression;
+            String default_column_name;
             auto it = column_defaults.find(name);
             if (it != column_defaults.end())
             {
                 const auto & column = storage_columns.get(name);
                 auto default_ast = it->second.expression->clone();
-                default_ast = setAlias(addTypeConversionToAST(std::move(default_ast), column.type->getName()), it->first);
+                default_ast = addTypeConversionToAST(std::move(default_ast), column.type->getName());
 
                 auto syntax_result = TreeRewriter(storage_.global_context).analyze(default_ast, metadata_snapshot_->getColumns().getAllPhysical());
                 default_expression = ExpressionAnalyzer{default_ast, syntax_result, storage_.global_context}.getActions(true);
+                default_column_name = default_ast->getColumnName();
             }
 
             algorithms.emplace_back(std::make_unique<TTLColumnAlgorithm>(
-                description, old_ttl_infos.columns_ttl[name], current_time_, force_, name, default_expression));
+                description, old_ttl_infos.columns_ttl[name], current_time_,
+                force_, name, default_expression, default_column_name));
         }
     }
 
diff --git a/src/DataStreams/TTLColumnAlgorithm.cpp b/src/DataStreams/TTLColumnAlgorithm.cpp
index afab3af62a7..140631ac0bf 100644
--- a/src/DataStreams/TTLColumnAlgorithm.cpp
+++ b/src/DataStreams/TTLColumnAlgorithm.cpp
@@ -9,10 +9,12 @@ TTLColumnAlgorithm::TTLColumnAlgorithm(
     time_t current_time_,
     bool force_,
     const String & column_name_,
-    const ExpressionActionsPtr & default_expression_)
+    const ExpressionActionsPtr & default_expression_,
+    const String & default_column_name_)
     : ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
     , column_name(column_name_)
     , default_expression(default_expression_)
+    , default_column_name(default_column_name_)
 {
     if (!isMinTTLExpired())
     {
@@ -38,17 +40,11 @@ void TTLColumnAlgorithm::execute(Block & block)
     if (isMaxTTLExpired())
         return;
 
-    //// TODO: use extractRequiredColumn
-    ColumnPtr default_column;
-    if (default_expression)
-    {
-        Block block_with_defaults;
-        block_with_defaults = block;
-        default_expression->execute(block_with_defaults);
-        default_column = block_with_defaults.getByName(column_name).column->convertToFullColumnIfConst();
-    }
+    auto default_column = executeExpressionAndGetColumn(default_expression, block, default_column_name);
+    if (default_column)
+        default_column = default_column->convertToFullColumnIfConst();
 
-    auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column);
+    auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
 
     auto & column_with_type = block.getByName(column_name);
     const IColumn * values_column = column_with_type.column.get();
diff --git a/src/DataStreams/TTLColumnAlgorithm.h b/src/DataStreams/TTLColumnAlgorithm.h
index b2824dba9b0..3b1c199292d 100644
--- a/src/DataStreams/TTLColumnAlgorithm.h
+++ b/src/DataStreams/TTLColumnAlgorithm.h
@@ -14,7 +14,8 @@ public:
         time_t current_time_,
         bool force_,
         const String & column_name_,
-        const ExpressionActionsPtr & default_expression_);
+        const ExpressionActionsPtr & default_expression_,
+        const String & default_column_name_);
 
     void execute(Block & block) override;
     void finalize(const MutableDataPartPtr & data_part) const override;
@@ -22,6 +23,7 @@ public:
 private:
     const String column_name;
     const ExpressionActionsPtr default_expression;
+    const String default_column_name;
 
     bool is_fully_empty = true;
 };
diff --git a/src/DataStreams/TTLDeleteAlgorithm.cpp b/src/DataStreams/TTLDeleteAlgorithm.cpp
index 7227b40dad2..c364bb06f3e 100644
--- a/src/DataStreams/TTLDeleteAlgorithm.cpp
+++ b/src/DataStreams/TTLDeleteAlgorithm.cpp
@@ -16,8 +16,8 @@ void TTLDeleteAlgorithm::execute(Block & block)
     if (!block || !isMinTTLExpired())
         return;
 
-    auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column);
-    auto where_column = extractRequieredColumn(description.where_expression, block, description.where_result_column);
+    auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
+    auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
 
     MutableColumns result_columns;
     const auto & column_names = block.getNames();
diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.cpp b/src/DataStreams/TTLUpdateInfoAlgorithm.cpp
index ce4d4128eec..d5feb14658b 100644
--- a/src/DataStreams/TTLUpdateInfoAlgorithm.cpp
+++ b/src/DataStreams/TTLUpdateInfoAlgorithm.cpp
@@ -14,7 +14,7 @@ void TTLUpdateInfoAlgorithm::execute(Block & block)
     if (!block)
         return;
 
-    auto ttl_column = extractRequieredColumn(description.expression, block, description.result_column);
+    auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
     for (size_t i = 0; i < block.rows(); ++i)
     {
         UInt32 cur_ttl = ITTLAlgorithm::getTimestampByIndex(ttl_column.get(), i);
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index 7c733c660d6..42fc24c8c8e 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -13,6 +13,7 @@
 #include <Poco/File.h>
 #include <Common/typeid_cast.h>
 #include <Common/FileSyncGuard.h>
+#include <DataStreams/ITTLAlgorithm.h>
 
 #include <Parsers/queryToString.h>
 
@@ -95,23 +96,20 @@ void updateTTL(
     const Block & block,
     bool update_part_min_max_ttls)
 {
-    Block block_copy = block;
-    if (!block_copy.has(ttl_entry.result_column))
-        ttl_entry.expression->execute(block_copy);
+    auto ttl_column = ITTLAlgorithm::executeExpressionAndGetColumn(ttl_entry.expression, block, ttl_entry.result_column);
 
-    const IColumn * column = block_copy.getByName(ttl_entry.result_column).column.get();
-    if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
+    if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(ttl_column.get()))
     {
         const auto & date_lut = DateLUT::instance();
         for (const auto & val : column_date->getData())
             ttl_info.update(date_lut.fromDayNum(DayNum(val)));
     }
-    else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
+    else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(ttl_column.get()))
     {
         for (const auto & val : column_date_time->getData())
             ttl_info.update(val);
     }
-    else if (const ColumnConst * column_const = typeid_cast<const ColumnConst *>(column))
+    else if (const ColumnConst * column_const = typeid_cast<const ColumnConst *>(ttl_column.get()))
     {
         if (typeid_cast<const ColumnUInt16 *>(&column_const->getDataColumn()))
         {
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index 42fdd76fc83..19195e6ba6d 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -5,6 +5,7 @@
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/TreeRewriter.h>
 #include <Interpreters/InDepthNodeVisitor.h>
+#include <Interpreters/addTypeConversionToAST.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTTTLElement.h>
@@ -231,8 +232,7 @@ TTLDescription TTLDescription::getTTLFromAST(
                     throw Exception(ErrorCodes::BAD_TTL_EXPRESSION,
                     "Invalid expression for assignment of column {}. Should be an aggregate function", assignment.column_name);
 
-                auto type_literal = std::make_shared<ASTLiteral>(columns.getPhysical(assignment.column_name).type->getName());
-                expression = makeASTFunction("cast", expression->clone(), type_literal);
+                expression = addTypeConversionToAST(std::move(expression), columns.getPhysical(assignment.column_name).type->getName());
                 aggregations.emplace_back(assignment.column_name, std::move(expression));
             }
 

From b09862b7b92d37238202871897b2897d15a86d72 Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Tue, 12 Jan 2021 20:18:40 +0300
Subject: [PATCH 0088/1238] Ability to backup-restore metadata files for DiskS3
 (fixes and tests)

---
 src/Disks/DiskCacheWrapper.cpp                |   3 +-
 src/Disks/DiskDecorator.cpp                   |   5 +
 src/Disks/DiskDecorator.h                     |   1 +
 src/Disks/S3/DiskS3.cpp                       |  15 +-
 src/Disks/S3/DiskS3.h                         |   6 +-
 src/Storages/MergeTree/MergeTreeData.cpp      |   2 +-
 .../MergeTree/MergeTreeDataMergerMutator.cpp  |   2 +-
 tests/integration/helpers/cluster.py          |  18 +-
 .../config.d/storage_conf_another_bucket.xml  |  34 +++
 .../storage_conf_another_bucket_path.xml      |  34 +++
 .../test_merge_tree_s3_restore/test.py        | 260 ++++++++++++++++--
 11 files changed, 336 insertions(+), 44 deletions(-)
 create mode 100644 tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml
 create mode 100644 tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket_path.xml

diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp
index 8dc8a005f57..eab3f1fddd7 100644
--- a/src/Disks/DiskCacheWrapper.cpp
+++ b/src/Disks/DiskCacheWrapper.cpp
@@ -255,7 +255,8 @@ void DiskCacheWrapper::removeRecursive(const String & path)
 
 void DiskCacheWrapper::createHardLink(const String & src_path, const String & dst_path)
 {
-    if (cache_disk->exists(src_path))
+    /// Don't create hardlinks for cache files to shadow directory as it just waste cache disk space.
+    if (cache_disk->exists(src_path) && !dst_path.starts_with("shadow/"))
     {
         auto dir_path = directoryPath(dst_path);
         if (!cache_disk->exists(dir_path))
diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp
index 8441803a2af..a7154e12e8e 100644
--- a/src/Disks/DiskDecorator.cpp
+++ b/src/Disks/DiskDecorator.cpp
@@ -180,4 +180,9 @@ Executor & DiskDecorator::getExecutor()
     return delegate->getExecutor();
 }
 
+void DiskDecorator::onFreeze(const String & path)
+{
+    delegate->onFreeze(path);
+}
+
 }
diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h
index eed3c77abf6..e3c036cf3e1 100644
--- a/src/Disks/DiskDecorator.h
+++ b/src/Disks/DiskDecorator.h
@@ -50,6 +50,7 @@ public:
     void sync(int fd) const override;
     const String getType() const override { return delegate->getType(); }
     Executor & getExecutor() override;
+    void onFreeze(const String & path) override;
 
 protected:
     DiskPtr delegate;
diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index a13fa148413..5787457bf11 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -40,6 +40,7 @@ namespace ErrorCodes
     extern const int UNKNOWN_FORMAT;
     extern const int INCORRECT_DISK_INDEX;
     extern const int NOT_IMPLEMENTED;
+    extern const int BAD_ARGUMENTS;
     extern const int PATH_ACCESS_DENIED;
     extern const int LOGICAL_ERROR;
 }
@@ -848,7 +849,7 @@ Poco::Timestamp DiskS3::getLastModified(const String & path)
 void DiskS3::createHardLink(const String & src_path, const String & dst_path)
 {
     /// We don't need to record hardlinks created to shadow folder.
-    if (send_metadata && dst_path.find("/shadow/") != String::npos)
+    if (send_metadata && !dst_path.starts_with("shadow/"))
     {
         auto revision = ++revision_counter;
         const ObjectMetadata object_metadata {
@@ -1075,6 +1076,9 @@ void DiskS3::restore()
 
         ///TODO: Cleanup FS and bucket if previous restore was failed.
 
+        LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting to restore disk {}. Revision: {}, Source bucket: {}, Source path: {}",
+                 name, information.revision, information.source_bucket, information.source_path);
+
         restoreFiles(information.source_bucket, information.source_path, information.revision);
         restoreFileOperations(information.source_bucket, information.source_path, information.revision);
 
@@ -1085,6 +1089,8 @@ void DiskS3::restore()
     }
     catch (const Exception & e)
     {
+        LOG_ERROR(&Poco::Logger::get("DiskS3"), "Failed to restore disk. Code: {}, e.displayText() = {}, Stack trace:\n\n{}", e.code(), e.displayText(), e.getStackTraceString());
+
         throw Exception("Failed to restore disk: " + name, e, ErrorCodes::LOGICAL_ERROR);
     }
 }
@@ -1206,7 +1212,7 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String &
                 if (exists(from_path))
                 {
                     moveFile(from_path, to_path);
-                    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Restored rename {} -> {}", from_path, to_path);
+                    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Revision {}. Restored rename {} -> {}", revision, from_path, to_path);
                 }
             }
             else if (operation == hardlink)
@@ -1215,8 +1221,9 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String &
                 auto dst_path = object_metadata["dst_path"];
                 if (exists(src_path))
                 {
+                    createDirectories(directoryPath(dst_path));
                     createHardLink(src_path, dst_path);
-                    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Restored hardlink {} -> {}", src_path, dst_path);
+                    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path);
                 }
             }
         }
@@ -1262,8 +1269,10 @@ String DiskS3::revisionToString(UInt64 revision)
 
 void DiskS3::onFreeze(const String & path)
 {
+    createDirectories(path);
     WriteBufferFromFile revision_file_buf(metadata_path + path + "revision.txt", 32);
     writeIntText(revision_counter.load(), revision_file_buf);
+    revision_file_buf.finalize();
 }
 
 }
diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h
index c330bf0c4e6..bc5055b942a 100644
--- a/src/Disks/S3/DiskS3.h
+++ b/src/Disks/S3/DiskS3.h
@@ -126,6 +126,7 @@ public:
     /// Restore S3 metadata files on file system.
     void restore();
 
+    /// Dumps current revision counter into file 'revision.txt' at given path.
     void onFreeze(const String & path) override;
 
 private:
@@ -156,7 +157,6 @@ private:
     static String shrinkKey(const String & path, const String & key);
     std::tuple<UInt64, String> extractRevisionAndOperationFromKey(const String & key);
 
-private:
     const String name;
     std::shared_ptr<Aws::S3::S3Client> client;
     std::shared_ptr<S3::ProxyConfiguration> proxy_configuration;
@@ -176,9 +176,9 @@ private:
     static constexpr UInt64 LATEST_REVISION = (static_cast<UInt64>(1)) << 63;
     static constexpr UInt64 UNKNOWN_REVISION = 0;
 
-    /// File at path {metadata_path}/restore indicates that metadata restore is needed and contains restore information
+    /// File at path {metadata_path}/restore contains metadata restore information
     const String restore_file_name = "restore";
-    /// The number of keys listed in one request (1000 is max value).
+    /// The number of keys listed in one request (1000 is max value)
     int list_object_keys_size;
 
     /// Key has format: ../../r{revision}-{operation}
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index ddc0e7c7808..6a64c69c987 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3619,7 +3619,7 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(MatcherFn m
 
         LOG_DEBUG(log, "Freezing part {} snapshot will be placed at {}", part->name, backup_path);
 
-        part->volume->getDisk()->createDirectories(shadow_path);
+        part->volume->getDisk()->createDirectories(backup_path);
 
         String backup_part_path = backup_path + relative_data_path + part->relative_path;
         if (auto part_in_memory = asInMemoryPart(part))
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 2365ef141b6..9b0daba0749 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -1234,7 +1234,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
             if (files_to_skip.count(it->name()))
                 continue;
 
-            String destination = new_part_tmp_path + "/";
+            String destination = new_part_tmp_path;
             String file_name = it->name();
             auto rename_it = std::find_if(files_to_rename.begin(), files_to_rename.end(), [&file_name](const auto & rename_pair) { return rename_pair.first == file_name; });
             if (rename_it != files_to_rename.end())
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 43c553f5318..65f438b6575 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -147,6 +147,7 @@ class ClickHouseCluster:
         self.minio_certs_dir = None
         self.minio_host = "minio1"
         self.minio_bucket = "root"
+        self.minio_bucket_2 = "root2"
         self.minio_port = 9001
         self.minio_client = None  # type: Minio
         self.minio_redirect_host = "proxy1"
@@ -549,17 +550,18 @@ class ClickHouseCluster:
 
                 print("Connected to Minio.")
 
-                if minio_client.bucket_exists(self.minio_bucket):
-                    minio_client.remove_bucket(self.minio_bucket)
+                buckets = [self.minio_bucket, self.minio_bucket_2]
 
-                minio_client.make_bucket(self.minio_bucket)
-
-                print(("S3 bucket '%s' created", self.minio_bucket))
+                for bucket in buckets:
+                    if minio_client.bucket_exists(bucket):
+                        minio_client.remove_bucket(bucket)
+                    minio_client.make_bucket(bucket)
+                    print("S3 bucket '%s' created", bucket)
 
                 self.minio_client = minio_client
                 return
             except Exception as ex:
-                print(("Can't connect to Minio: %s", str(ex)))
+                print("Can't connect to Minio: %s", str(ex))
                 time.sleep(1)
 
         raise Exception("Can't wait Minio to start")
@@ -1078,7 +1080,9 @@ class ClickHouseInstance:
         return self.cluster.copy_file_to_container(container_id, local_path, dest_path)
 
     def get_process_pid(self, process_name):
-        output = self.exec_in_container(["bash", "-c", "pidof {}".format(process_name)])
+        output = self.exec_in_container(["bash", "-c",
+                                         "ps ax | grep '{}' | grep -v 'grep' | grep -v 'bash -c' | awk '{{print $1}}'".format(
+                                             process_name)])
         if output:
             try:
                 pid = int(output.split('\n')[0].strip())
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml
new file mode 100644
index 00000000000..645d1111ab8
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml
@@ -0,0 +1,34 @@
+<yandex>
+    <storage_configuration>
+        <disks>
+            <s3>
+                <type>s3</type>
+                <endpoint>http://minio1:9001/root2/data/</endpoint>
+                <access_key_id>minio</access_key_id>
+                <secret_access_key>minio123</secret_access_key>
+                <send_metadata>true</send_metadata>
+                <list_object_keys_size>1</list_object_keys_size> <!-- To effectively test restore parallelism -->
+            </s3>
+            <hdd>
+                <type>local</type>
+                <path>/</path>
+            </hdd>
+        </disks>
+        <policies>
+            <s3>
+                <volumes>
+                    <main>
+                        <disk>s3</disk>
+                    </main>
+                    <external>
+                        <disk>hdd</disk>
+                    </external>
+                </volumes>
+            </s3>
+        </policies>
+    </storage_configuration>
+
+    <merge_tree>
+        <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
+    </merge_tree>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket_path.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket_path.xml
new file mode 100644
index 00000000000..42207674c79
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket_path.xml
@@ -0,0 +1,34 @@
+<yandex>
+    <storage_configuration>
+        <disks>
+            <s3>
+                <type>s3</type>
+                <endpoint>http://minio1:9001/root2/another_data/</endpoint>
+                <access_key_id>minio</access_key_id>
+                <secret_access_key>minio123</secret_access_key>
+                <send_metadata>true</send_metadata>
+                <list_object_keys_size>1</list_object_keys_size> <!-- To effectively test restore parallelism -->
+            </s3>
+            <hdd>
+                <type>local</type>
+                <path>/</path>
+            </hdd>
+        </disks>
+        <policies>
+            <s3>
+                <volumes>
+                    <main>
+                        <disk>s3</disk>
+                    </main>
+                    <external>
+                        <disk>hdd</disk>
+                    </external>
+                </volumes>
+            </s3>
+        </policies>
+    </storage_configuration>
+
+    <merge_tree>
+        <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
+    </merge_tree>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py
index 6cafc077e81..8859fa73299 100644
--- a/tests/integration/test_merge_tree_s3_restore/test.py
+++ b/tests/integration/test_merge_tree_s3_restore/test.py
@@ -14,9 +14,18 @@ logging.getLogger().addHandler(logging.StreamHandler())
 def cluster():
     try:
         cluster = ClickHouseCluster(__file__)
-        cluster.add_instance("node", main_configs=["configs/config.d/storage_conf.xml",
-                                                   "configs/config.d/bg_processing_pool_conf.xml",
-                                                   "configs/config.d/log_conf.xml"], user_configs=[], with_minio=True, stay_alive=True)
+        cluster.add_instance("node", main_configs=[
+            "configs/config.d/storage_conf.xml",
+            "configs/config.d/bg_processing_pool_conf.xml",
+            "configs/config.d/log_conf.xml"], user_configs=[], with_minio=True, stay_alive=True)
+        cluster.add_instance("node_another_bucket", main_configs=[
+            "configs/config.d/storage_conf_another_bucket.xml",
+            "configs/config.d/bg_processing_pool_conf.xml",
+            "configs/config.d/log_conf.xml"], user_configs=[], stay_alive=True)
+        cluster.add_instance("node_another_bucket_path", main_configs=[
+            "configs/config.d/storage_conf_another_bucket_path.xml",
+            "configs/config.d/bg_processing_pool_conf.xml",
+            "configs/config.d/log_conf.xml"], user_configs=[], stay_alive=True)
         logging.info("Starting cluster...")
         cluster.start()
         logging.info("Cluster started")
@@ -34,17 +43,18 @@ def random_string(length):
 def generate_values(date_str, count, sign=1):
     data = [[date_str, sign * (i + 1), random_string(10)] for i in range(count)]
     data.sort(key=lambda tup: tup[1])
-    return ",".join(["('{}',{},'{}')".format(x, y, z) for x, y, z in data])
+    return ",".join(["('{}',{},'{}',{})".format(x, y, z, 0) for x, y, z in data])
 
 
-def create_table(cluster, table_name, additional_settings=None):
-    node = cluster.instances["node"]
+def create_table(node, table_name, additional_settings=None):
+    node.query("CREATE DATABASE IF NOT EXISTS s3 ENGINE = Ordinary")
 
     create_table_statement = """
-        CREATE TABLE {} (
+        CREATE TABLE s3.{} (
             dt Date,
             id Int64,
             data String,
+            counter Int64,
             INDEX min_max (id) TYPE minmax GRANULARITY 3
         ) ENGINE=MergeTree()
         PARTITION BY dt
@@ -62,45 +72,239 @@ def create_table(cluster, table_name, additional_settings=None):
     node.query(create_table_statement)
 
 
+def purge_s3(cluster, bucket):
+    minio = cluster.minio_client
+    for obj in list(minio.list_objects(bucket, recursive=True)):
+        minio.remove_object(bucket, obj.object_name)
+
+
+def drop_s3_metadata(node):
+    node.exec_in_container(['bash', '-c', 'rm -rf /var/lib/clickhouse/disks/s3/*'], user='root')
+
+
+def drop_shadow_information(node):
+    node.exec_in_container(['bash', '-c', 'rm -rf /var/lib/clickhouse/shadow/*'], user='root')
+
+
+def create_restore_file(node, revision='0', bucket=None, path=None):
+    add_restore_option = 'echo -en "{}\n" >> /var/lib/clickhouse/disks/s3/restore'
+    node.exec_in_container(['bash', '-c', add_restore_option.format(revision)], user='root')
+    if bucket:
+        node.exec_in_container(['bash', '-c', add_restore_option.format(bucket)], user='root')
+    if path:
+        node.exec_in_container(['bash', '-c', add_restore_option.format(path)], user='root')
+
+
+def get_revision_counter(node, backup_number):
+    return node.exec_in_container(['bash', '-c', 'cat /var/lib/clickhouse/disks/s3/shadow/{}/revision.txt'.format(backup_number)], user='root')
+
+
 @pytest.fixture(autouse=True)
 def drop_table(cluster):
     yield
-    node = cluster.instances["node"]
-    minio = cluster.minio_client
 
-    node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
+    node_names = ["node", "node_another_bucket", "node_another_bucket_path"]
 
-    for obj in list(minio.list_objects(cluster.minio_bucket, 'data/')):
-        minio.remove_object(cluster.minio_bucket, obj.object_name)
+    for node_name in node_names:
+        node = cluster.instances[node_name]
+        node.query("DROP TABLE IF EXISTS s3.test NO DELAY")
+
+        drop_s3_metadata(node)
+        drop_shadow_information(node)
+
+    buckets = [cluster.minio_bucket, cluster.minio_bucket_2]
+    for bucket in buckets:
+        purge_s3(cluster, bucket)
 
 
-# Restore to the same bucket and path with latest revision.
-def test_simple_full_restore(cluster):
-    create_table(cluster, "s3_test")
-
+def test_full_restore(cluster):
     node = cluster.instances["node"]
 
-    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
-    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-04', 4096, -1)))
-    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-05', 4096)))
-    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-05', 4096, -1)))
+    create_table(node, "test")
+
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-04', 4096, -1)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096, -1)))
 
     # To ensure parts have merged
-    node.query("OPTIMIZE TABLE s3_test")
+    node.query("OPTIMIZE TABLE s3.test")
 
-    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "({})".format(4096 * 4)
-    assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "({})".format(0)
+    assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
 
     node.stop_clickhouse()
-    node.exec_in_container(['bash', '-c', 'rm -r /var/lib/clickhouse/disks/s3/*'], user='root')
+    drop_s3_metadata(node)
     node.start_clickhouse()
 
     # All data is removed.
-    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "({})".format(0)
+    assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(0)
 
     node.stop_clickhouse()
-    node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/disks/s3/restore'], user='root')
+    create_restore_file(node)
     node.start_clickhouse()
 
-    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "({})".format(4096 * 4)
-    assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "({})".format(0)
+    assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+
+
+def test_restore_another_bucket_path(cluster):
+    node = cluster.instances["node"]
+
+    create_table(node, "test")
+
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-04', 4096, -1)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096, -1)))
+
+    # To ensure parts have merged
+    node.query("OPTIMIZE TABLE s3.test")
+
+    assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+
+    node_another_bucket = cluster.instances["node_another_bucket"]
+
+    create_table(node_another_bucket, "test")
+
+    node_another_bucket.stop_clickhouse()
+    create_restore_file(node_another_bucket, bucket="root")
+    node_another_bucket.start_clickhouse()
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+
+    node_another_bucket_path = cluster.instances["node_another_bucket_path"]
+
+    create_table(node_another_bucket_path, "test")
+
+    node_another_bucket_path.stop_clickhouse()
+    create_restore_file(node_another_bucket_path, bucket="root2", path="data")
+    node_another_bucket_path.start_clickhouse()
+
+    assert node_another_bucket_path.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node_another_bucket_path.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+
+
+def test_restore_different_revisions(cluster):
+    node = cluster.instances["node"]
+
+    create_table(node, "test")
+
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-04', 4096, -1)))
+
+    node.query("ALTER TABLE s3.test FREEZE")
+    revision1 = get_revision_counter(node, 1)
+
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096, -1)))
+
+    node.query("ALTER TABLE s3.test FREEZE")
+    revision2 = get_revision_counter(node, 2)
+
+    # To ensure parts have merged
+    node.query("OPTIMIZE TABLE s3.test")
+
+    node.query("ALTER TABLE s3.test FREEZE")
+    revision3 = get_revision_counter(node, 3)
+
+    assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node.query("SELECT count(*) from system.parts where table = 'test'") == '5\n'
+
+    node_another_bucket = cluster.instances["node_another_bucket"]
+
+    create_table(node_another_bucket, "test")
+
+    # Restore to revision 1 (2 parts).
+    node_another_bucket.stop_clickhouse()
+    drop_s3_metadata(node_another_bucket)
+    purge_s3(cluster, cluster.minio_bucket_2)
+    create_restore_file(node_another_bucket, revision=revision1, bucket="root")
+    node_another_bucket.start_clickhouse()
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node_another_bucket.query("SELECT count(*) from system.parts where table = 'test'") == '2\n'
+
+    # Restore to revision 2 (4 parts).
+    node_another_bucket.stop_clickhouse()
+    drop_s3_metadata(node_another_bucket)
+    purge_s3(cluster, cluster.minio_bucket_2)
+    create_restore_file(node_another_bucket, revision=revision2, bucket="root")
+    node_another_bucket.start_clickhouse()
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node_another_bucket.query("SELECT count(*) from system.parts where table = 'test'") == '4\n'
+
+    # Restore to revision 3 (4 parts + 1 merged).
+    node_another_bucket.stop_clickhouse()
+    drop_s3_metadata(node_another_bucket)
+    purge_s3(cluster, cluster.minio_bucket_2)
+    create_restore_file(node_another_bucket, revision=revision3, bucket="root")
+    node_another_bucket.start_clickhouse()
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node_another_bucket.query("SELECT count(*) from system.parts where table = 'test'") == '5\n'
+
+
+def test_restore_mutations(cluster):
+    node = cluster.instances["node"]
+
+    create_table(node, "test")
+
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096)))
+    node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096, -1)))
+
+    node.query("ALTER TABLE s3.test FREEZE")
+    revision_before_mutation = get_revision_counter(node, 1)
+
+    node.query("ALTER TABLE s3.test UPDATE counter = 1 WHERE 1", settings={"mutations_sync": 2})
+
+    node.query("ALTER TABLE s3.test FREEZE")
+    revision_after_mutation = get_revision_counter(node, 2)
+
+    node_another_bucket = cluster.instances["node_another_bucket"]
+
+    create_table(node_another_bucket, "test")
+
+    # Restore to revision before mutation.
+    node_another_bucket.stop_clickhouse()
+    drop_s3_metadata(node_another_bucket)
+    purge_s3(cluster, cluster.minio_bucket_2)
+    create_restore_file(node_another_bucket, revision=revision_before_mutation, bucket="root")
+    node_another_bucket.start_clickhouse()
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node_another_bucket.query("SELECT sum(counter) FROM s3.test FORMAT Values") == "({})".format(0)
+
+    # Restore to revision after mutation.
+    node_another_bucket.stop_clickhouse()
+    drop_s3_metadata(node_another_bucket)
+    purge_s3(cluster, cluster.minio_bucket_2)
+    create_restore_file(node_another_bucket, revision=revision_after_mutation, bucket="root")
+    node_another_bucket.start_clickhouse()
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node_another_bucket.query("SELECT sum(counter) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
+    assert node_another_bucket.query("SELECT sum(counter) FROM s3.test WHERE id > 0 FORMAT Values") == "({})".format(4096)
+
+    # Restore to revision in the middle of mutation.
+    # Unfinished mutation should be completed after table startup.
+    node_another_bucket.stop_clickhouse()
+    drop_s3_metadata(node_another_bucket)
+    purge_s3(cluster, cluster.minio_bucket_2)
+    revision = str((int(revision_before_mutation) + int(revision_after_mutation)) // 2)
+    create_restore_file(node_another_bucket, revision=revision, bucket="root")
+    node_another_bucket.start_clickhouse()
+
+    assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
+    assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
+    assert node_another_bucket.query("SELECT sum(counter) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
+    assert node_another_bucket.query("SELECT sum(counter) FROM s3.test WHERE id > 0 FORMAT Values") == "({})".format(4096)

From d7e6c8393fe2d55c246cae55fafdcc1faf34c6f9 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 13 Jan 2021 13:32:20 +0300
Subject: [PATCH 0089/1238] Some useless code

---
 src/CMakeLists.txt                         |   4 +-
 src/Coordination/CMakeLists.txt            |   0
 src/Coordination/InMemoryLogStore.cpp      | 193 +++++++++++++++++++++
 src/Coordination/InMemoryLogStore.h        |  47 +++++
 src/Coordination/InMemoryStateManager.cpp  |  32 ++++
 src/Coordination/InMemoryStateManager.h    |  41 +++++
 src/Coordination/tests/gtest_for_build.cpp |  11 ++
 7 files changed, 327 insertions(+), 1 deletion(-)
 create mode 100644 src/Coordination/CMakeLists.txt
 create mode 100644 src/Coordination/InMemoryLogStore.cpp
 create mode 100644 src/Coordination/InMemoryLogStore.h
 create mode 100644 src/Coordination/InMemoryStateManager.cpp
 create mode 100644 src/Coordination/InMemoryStateManager.h
 create mode 100644 src/Coordination/tests/gtest_for_build.cpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4e04f5607df..2027f527bae 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -60,6 +60,7 @@ add_subdirectory (Processors)
 add_subdirectory (Formats)
 add_subdirectory (Compression)
 add_subdirectory (Server)
+add_subdirectory (Coordination)
 
 
 set(dbms_headers)
@@ -185,6 +186,7 @@ add_object_library(clickhouse_processors_sources Processors/Sources)
 add_object_library(clickhouse_processors_merges Processors/Merges)
 add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms)
 add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)
+add_object_library(clickhouse_coordination Coordination)
 
 set (DBMS_COMMON_LIBRARIES)
 # libgcc_s does not provide an implementation of an atomics library. Instead,
@@ -308,7 +310,7 @@ if (USE_KRB5)
 endif()
 
 if (USE_NURAFT)
-    dbms_target_link_libraries(PRIVATE ${NURAFT_LIBRARY})
+    dbms_target_link_libraries(PUBLIC ${NURAFT_LIBRARY})
 endif()
 
 if(RE2_INCLUDE_DIR)
diff --git a/src/Coordination/CMakeLists.txt b/src/Coordination/CMakeLists.txt
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/Coordination/InMemoryLogStore.cpp b/src/Coordination/InMemoryLogStore.cpp
new file mode 100644
index 00000000000..3b9ad3fe18a
--- /dev/null
+++ b/src/Coordination/InMemoryLogStore.cpp
@@ -0,0 +1,193 @@
+#include <Coordination/InMemoryLogStore.h>
+
+namespace DB
+{
+
+namespace
+{
+using namespace nuraft;
+ptr<log_entry> makeClone(const ptr<log_entry>& entry) {
+    ptr<log_entry> clone = cs_new<log_entry>
+                           ( entry->get_term(),
+                             buffer::clone( entry->get_buf() ),
+                             entry->get_val_type() );
+    return clone;
+}
+}
+
+InMemoryLogStore::InMemoryLogStore()
+    : start_idx(1)
+{}
+
+size_t InMemoryLogStore::start_index() const
+{
+    return start_idx;
+}
+
+size_t InMemoryLogStore::next_slot() const
+{
+    std::lock_guard<std::mutex> l(logs_lock);
+    // Exclude the dummy entry.
+    return start_idx + logs.size() - 1;
+}
+
+nuraft::ptr<nuraft::log_entry> InMemoryLogStore::last_entry() const
+{
+    ulong next_idx = next_slot();
+    std::lock_guard<std::mutex> lock(logs_lock);
+    auto entry = logs.find(next_idx - 1);
+    if (entry == logs.end())
+        entry = logs.find(0);
+
+    return makeClone(entry->second);
+}
+
+size_t InMemoryLogStore::append(nuraft::ptr<nuraft::log_entry> & entry)
+{
+    ptr<log_entry> clone = makeClone(entry);
+
+    std::lock_guard<std::mutex> l(logs_lock);
+    size_t idx = start_idx + logs.size() - 1;
+    logs[idx] = clone;
+    return idx;
+}
+
+void InMemoryLogStore::write_at(size_t index, nuraft::ptr<nuraft::log_entry> & entry)
+{
+    nuraft::ptr<log_entry> clone = makeClone(entry);
+
+    // Discard all logs equal to or greater than `index.
+    std::lock_guard<std::mutex> l(logs_lock);
+    auto itr = logs.lower_bound(index);
+    while (itr != logs.end())
+        itr = logs.erase(itr);
+    logs[index] = clone;
+}
+
+nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> InMemoryLogStore::log_entries(size_t start, size_t end)
+{
+    nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> ret =
+        nuraft::cs_new<std::vector<nuraft::ptr<nuraft::log_entry>>>();
+
+    ret->resize(end - start);
+    size_t cc = 0;
+    for (size_t ii = start; ii < end; ++ii)
+    {
+        nuraft::ptr<nuraft::log_entry> src = nullptr;
+        {
+            std::lock_guard<std::mutex> l(logs_lock);
+            auto entry = logs.find(ii);
+            if (entry == logs.end())
+            {
+                entry = logs.find(0);
+                assert(0);
+            }
+            src = entry->second;
+        }
+        (*ret)[cc++] = makeClone(src);
+    }
+    return ret;
+}
+
+nuraft::ptr<nuraft::log_entry> InMemoryLogStore::entry_at(size_t index)
+{
+    nuraft::ptr<nuraft::log_entry> src = nullptr;
+    {
+        std::lock_guard<std::mutex> l(logs_lock);
+        auto entry = logs.find(index);
+        if (entry == logs.end())
+            entry = logs.find(0);
+        src = entry->second;
+    }
+    return makeClone(src);
+}
+
+size_t InMemoryLogStore::term_at(size_t index)
+{
+    ulong term = 0;
+    {
+        std::lock_guard<std::mutex> l(logs_lock);
+        auto entry = logs.find(index);
+        if (entry == logs.end())
+            entry = logs.find(0);
+        term = entry->second->get_term();
+    }
+    return term;
+}
+
+nuraft::ptr<nuraft::buffer> InMemoryLogStore::pack(size_t index, Int32 cnt)
+{
+    std::vector<nuraft::ptr<nuraft::buffer>> returned_logs;
+
+    size_t size_total = 0;
+    for (ulong ii = index; ii < index + cnt; ++ii)
+    {
+        ptr<log_entry> le = nullptr;
+        {
+            std::lock_guard<std::mutex> l(logs_lock);
+            le = logs[ii];
+        }
+        assert(le.get());
+        nuraft::ptr<nuraft::buffer> buf = le->serialize();
+        size_total += buf->size();
+        returned_logs.push_back(buf);
+    }
+
+    nuraft::ptr<buffer> buf_out = nuraft::buffer::alloc(sizeof(int32) + cnt * sizeof(int32) + size_total);
+    buf_out->pos(0);
+    buf_out->put(static_cast<Int32>(cnt));
+
+    for (auto & entry : returned_logs)
+    {
+        nuraft::ptr<nuraft::buffer> & bb = entry;
+        buf_out->put(static_cast<Int32>(bb->size()));
+        buf_out->put(*bb);
+    }
+    return buf_out;
+}
+
+void InMemoryLogStore::apply_pack(size_t index, nuraft::buffer & pack)
+{
+    pack.pos(0);
+    Int32 num_logs = pack.get_int();
+
+    for (Int32 ii = 0; ii < num_logs; ++ii)
+    {
+        size_t cur_idx = index + ii;
+        Int32 buf_size = pack.get_int();
+
+        nuraft::ptr<nuraft::buffer> buf_local = nuraft::buffer::alloc(buf_size);
+        pack.get(buf_local);
+
+        nuraft::ptr<nuraft::log_entry> le = nuraft::log_entry::deserialize(*buf_local);
+        {
+            std::lock_guard<std::mutex> l(logs_lock);
+            logs[cur_idx] = le;
+        }
+    }
+
+    {
+        std::lock_guard<std::mutex> l(logs_lock);
+        auto entry = logs.upper_bound(0);
+        if (entry != logs.end())
+            start_idx = entry->first;
+        else
+            start_idx = 1;
+    }
+}
+
+bool InMemoryLogStore::compact(size_t last_log_index)
+{
+    std::lock_guard<std::mutex> l(logs_lock);
+    for (ulong ii = start_idx; ii <= last_log_index; ++ii)
+    {
+        auto entry = logs.find(ii);
+        if (entry != logs.end())
+            logs.erase(entry);
+    }
+
+    start_idx = last_log_index + 1;
+    return true;
+}
+
+}
diff --git a/src/Coordination/InMemoryLogStore.h b/src/Coordination/InMemoryLogStore.h
new file mode 100644
index 00000000000..e9c41b50cf6
--- /dev/null
+++ b/src/Coordination/InMemoryLogStore.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <atomic>
+#include <map>
+#include <mutex>
+#include <Core/Types.h>
+#include <libnuraft/log_store.hxx>
+
+namespace DB
+{
+
+class InMemoryLogStore : public nuraft::log_store
+{
+public:
+    InMemoryLogStore();
+
+    size_t start_index() const override;
+
+    size_t next_slot() const override;
+
+    nuraft::ptr<nuraft::log_entry> last_entry() const override;
+
+    size_t append(nuraft::ptr<nuraft::log_entry> & entry) override;
+
+    void write_at(size_t index, nuraft::ptr<nuraft::log_entry> & entry) override;
+
+    nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> log_entries(size_t start, size_t end) override;
+
+    nuraft::ptr<nuraft::log_entry> entry_at(size_t index) override;
+
+    size_t term_at(size_t index) override;
+
+    nuraft::ptr<nuraft::buffer> pack(size_t index, Int32 cnt) override;
+
+    void apply_pack(size_t index, nuraft::buffer & pack) override;
+
+    bool compact(size_t last_log_index) override;
+
+    bool flush() override { return true; }
+
+private:
+    std::map<ulong, nuraft::ptr<nuraft::log_entry>> logs;
+    mutable std::mutex logs_lock;
+    std::atomic<size_t> start_idx;
+};
+
+}
diff --git a/src/Coordination/InMemoryStateManager.cpp b/src/Coordination/InMemoryStateManager.cpp
new file mode 100644
index 00000000000..15a1f7aa622
--- /dev/null
+++ b/src/Coordination/InMemoryStateManager.cpp
@@ -0,0 +1,32 @@
+#include <Coordination/InMemoryStateManager.h>
+
+namespace DB
+{
+
+InMemoryStateManager::InMemoryStateManager(int my_server_id_, const std::string & endpoint_)
+    : my_server_id(my_server_id_)
+    , endpoint(endpoint_)
+    , log_store(nuraft::cs_new<InMemoryLogStore>())
+    , server_config(nuraft::cs_new<nuraft::srv_config>(my_server_id, endpoint))
+    , cluster_config(nuraft::cs_new<nuraft::cluster_config>())
+{
+    cluster_config->get_servers().push_back(server_config);
+}
+
+void InMemoryStateManager::save_config(const nuraft::cluster_config & config)
+{
+    // Just keep in memory in this example.
+    // Need to write to disk here, if want to make it durable.
+    nuraft::ptr<nuraft::buffer> buf = config.serialize();
+    cluster_config = nuraft::cluster_config::deserialize(*buf);
+}
+
+void InMemoryStateManager::save_state(const nuraft::srv_state & state)
+{
+     // Just keep in memory in this example.
+     // Need to write to disk here, if want to make it durable.
+     nuraft::ptr<nuraft::buffer> buf = state.serialize();
+     server_state = nuraft::srv_state::deserialize(*buf);
+ }
+
+}
diff --git a/src/Coordination/InMemoryStateManager.h b/src/Coordination/InMemoryStateManager.h
new file mode 100644
index 00000000000..32eea343465
--- /dev/null
+++ b/src/Coordination/InMemoryStateManager.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include <Core/Types.h>
+#include <string>
+#include <Coordination/InMemoryLogStore.h>
+#include <libnuraft/nuraft.hxx>
+
+namespace DB
+{
+
+class InMemoryStateManager : public nuraft::state_mgr
+{
+public:
+    InMemoryStateManager(int server_id_, const std::string & endpoint_);
+
+    nuraft::ptr<nuraft::cluster_config> load_config() override { return cluster_config; }
+
+    void save_config(const nuraft::cluster_config & config) override;
+
+    void save_state(const nuraft::srv_state & state) override;
+
+    nuraft::ptr<nuraft::srv_state> read_state() override { return server_state; }
+
+    nuraft::ptr<nuraft::log_store> load_log_store() override { return log_store; }
+
+    Int32 server_id() override { return my_server_id; }
+
+    nuraft::ptr<nuraft::srv_config> get_srv_config() const { return server_config; }
+
+    void system_exit(const int /* exit_code */) override {}
+
+private:
+    int my_server_id;
+    std::string endpoint;
+    nuraft::ptr<InMemoryLogStore> log_store;
+    nuraft::ptr<nuraft::srv_config> server_config;
+    nuraft::ptr<nuraft::cluster_config> cluster_config;
+    nuraft::ptr<nuraft::srv_state> server_state;
+};
+
+}
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
new file mode 100644
index 00000000000..1026b779cdf
--- /dev/null
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -0,0 +1,11 @@
+#include <gtest/gtest.h>
+
+#include <Coordination/InMemoryLogStore.h>
+#include <Coordination/InMemoryStateManager.h>
+
+TEST(CoordinationTest, BuildTest)
+{
+    DB::InMemoryLogStore store;
+    DB::InMemoryStateManager state_manager(1, "localhost:12345");
+    EXPECT_EQ(1, 1);
+}

From 53389f79c0c433f566bfc69b71971c6bc9e8adac Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Wed, 13 Jan 2021 15:05:32 +0300
Subject: [PATCH 0090/1238] Ability to backup-restore metadata files for DiskS3
 (minor fixes)

---
 src/Disks/IDisk.h                                    | 2 +-
 src/Disks/S3/DiskS3.cpp                              | 3 ++-
 tests/integration/helpers/cluster.py                 | 2 +-
 tests/integration/test_merge_tree_s3_restore/test.py | 9 ++++++---
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index a26d5015ba0..983f0dd6808 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -195,7 +195,7 @@ public:
     /// Returns executor to perform asynchronous operations.
     virtual Executor & getExecutor() { return *executor; }
 
-    /// Invoked when partitions freeze is invoked.
+    /// Invoked on partitions freeze query.
     virtual void onFreeze(const String &) { }
 
 private:
diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index 5787457bf11..831296032a5 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -566,6 +566,7 @@ DiskS3::DiskS3(
     , max_single_part_upload_size(max_single_part_upload_size_)
     , min_bytes_for_seek(min_bytes_for_seek_)
     , send_metadata(send_metadata_)
+    , revision_counter(0)
     , list_object_keys_size(list_object_keys_size_)
 {
 }
@@ -1091,7 +1092,7 @@ void DiskS3::restore()
     {
         LOG_ERROR(&Poco::Logger::get("DiskS3"), "Failed to restore disk. Code: {}, e.displayText() = {}, Stack trace:\n\n{}", e.code(), e.displayText(), e.getStackTraceString());
 
-        throw Exception("Failed to restore disk: " + name, e, ErrorCodes::LOGICAL_ERROR);
+        throw;
     }
 }
 
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 65f438b6575..7dc847005c3 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -1398,7 +1398,7 @@ class ClickHouseKiller(object):
         self.clickhouse_node = clickhouse_node
 
     def __enter__(self):
-        self.clickhouse_node.stop_clickhouse()
+        self.clickhouse_node.stop_clickhouse(kill=True)
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.clickhouse_node.start_clickhouse()
diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py
index 8859fa73299..9f4aab9f35d 100644
--- a/tests/integration/test_merge_tree_s3_restore/test.py
+++ b/tests/integration/test_merge_tree_s3_restore/test.py
@@ -86,7 +86,7 @@ def drop_shadow_information(node):
     node.exec_in_container(['bash', '-c', 'rm -rf /var/lib/clickhouse/shadow/*'], user='root')
 
 
-def create_restore_file(node, revision='0', bucket=None, path=None):
+def create_restore_file(node, revision=0, bucket=None, path=None):
     add_restore_option = 'echo -en "{}\n" >> /var/lib/clickhouse/disks/s3/restore'
     node.exec_in_container(['bash', '-c', add_restore_option.format(revision)], user='root')
     if bucket:
@@ -96,7 +96,7 @@ def create_restore_file(node, revision='0', bucket=None, path=None):
 
 
 def get_revision_counter(node, backup_number):
-    return node.exec_in_container(['bash', '-c', 'cat /var/lib/clickhouse/disks/s3/shadow/{}/revision.txt'.format(backup_number)], user='root')
+    return int(node.exec_in_container(['bash', '-c', 'cat /var/lib/clickhouse/disks/s3/shadow/{}/revision.txt'.format(backup_number)], user='root'))
 
 
 @pytest.fixture(autouse=True)
@@ -300,10 +300,13 @@ def test_restore_mutations(cluster):
     node_another_bucket.stop_clickhouse()
     drop_s3_metadata(node_another_bucket)
     purge_s3(cluster, cluster.minio_bucket_2)
-    revision = str((int(revision_before_mutation) + int(revision_after_mutation)) // 2)
+    revision = (revision_before_mutation + revision_after_mutation) // 2
     create_restore_file(node_another_bucket, revision=revision, bucket="root")
     node_another_bucket.start_clickhouse()
 
+    # Wait for unfinished mutation completion.
+    time.sleep(3)
+
     assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
     assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
     assert node_another_bucket.query("SELECT sum(counter) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)

From 0e903552a06b1628a1dc9b2ca7e6b0383d856fba Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Wed, 13 Jan 2021 17:04:27 +0300
Subject: [PATCH 0091/1238] fix TTLs with WHERE

---
 src/DataStreams/TTLBlockInputStream.cpp        | 2 +-
 src/Storages/MergeTree/IMergeTreeDataPart.cpp  | 2 +-
 src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +-
 src/Storages/StorageInMemoryMetadata.cpp       | 6 +++---
 src/Storages/StorageInMemoryMetadata.h         | 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp
index 7dd5952bb07..4f141a03475 100644
--- a/src/DataStreams/TTLBlockInputStream.cpp
+++ b/src/DataStreams/TTLBlockInputStream.cpp
@@ -44,7 +44,7 @@ TTLBlockInputStream::TTLBlockInputStream(
         algorithms.emplace_back(std::move(algorithm));
     }
 
-    for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTL())
+    for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTLs())
         algorithms.emplace_back(std::make_unique<TTLDeleteAlgorithm>(
             where_ttl, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
 
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 6ba351a4459..a937208b66a 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -1142,7 +1142,7 @@ bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metada
             return false;
     }
 
-    for (const auto & rows_where_desc : metadata_snapshot->getRowsWhereTTL())
+    for (const auto & rows_where_desc : metadata_snapshot->getRowsWhereTTLs())
     {
         if (!ttl_infos.rows_where_ttl.count(rows_where_desc.result_column))
             return false;
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index b28f0979dc0..c3eafd2423e 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -377,7 +377,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
     for (const auto & ttl_entry : metadata_snapshot->getGroupByTTLs())
         updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true);
 
-    for (const auto & ttl_entry : metadata_snapshot->getRowsWhereTTL())
+    for (const auto & ttl_entry : metadata_snapshot->getRowsWhereTTLs())
         updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.rows_where_ttl[ttl_entry.result_column], block, true);
 
     for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index 36947706474..f810c73c02a 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -125,7 +125,7 @@ TTLTableDescription StorageInMemoryMetadata::getTableTTLs() const
 
 bool StorageInMemoryMetadata::hasAnyTableTTL() const
 {
-    return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL() || hasAnyGroupByTTL();
+    return hasAnyMoveTTL() || hasRowsTTL() || hasAnyRecompressionTTL() || hasAnyGroupByTTL() || hasAnyRowsWhereTTL();
 }
 
 TTLColumnsDescription StorageInMemoryMetadata::getColumnTTLs() const
@@ -148,12 +148,12 @@ bool StorageInMemoryMetadata::hasRowsTTL() const
     return table_ttl.rows_ttl.expression != nullptr;
 }
 
-TTLDescriptions StorageInMemoryMetadata::getRowsWhereTTL() const
+TTLDescriptions StorageInMemoryMetadata::getRowsWhereTTLs() const
 {
     return table_ttl.rows_where_ttl;
 }
 
-bool StorageInMemoryMetadata::hasRowsWhereTTL() const
+bool StorageInMemoryMetadata::hasAnyRowsWhereTTL() const
 {
     return !table_ttl.rows_where_ttl.empty();
 }
diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h
index 9a0f730f1f4..038416aff7d 100644
--- a/src/Storages/StorageInMemoryMetadata.h
+++ b/src/Storages/StorageInMemoryMetadata.h
@@ -109,8 +109,8 @@ struct StorageInMemoryMetadata
     TTLDescription getRowsTTL() const;
     bool hasRowsTTL() const;
 
-    TTLDescriptions getRowsWhereTTL() const;
-    bool hasRowsWhereTTL() const;
+    TTLDescriptions getRowsWhereTTLs() const;
+    bool hasAnyRowsWhereTTL() const;
 
     /// Just wrapper for table TTLs, return moves (to disks or volumes) parts of
     /// table TTL.

From 294e8f095d7cec5ef825c9c22dcfb5f9261e3f39 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 13 Jan 2021 18:00:39 +0300
Subject: [PATCH 0092/1238] I was able to replicate single number at localhost

---
 src/Coordination/InMemoryLogStore.cpp      |  12 +-
 src/Coordination/tests/gtest_for_build.cpp | 175 +++++++++++++++++++++
 2 files changed, 181 insertions(+), 6 deletions(-)

diff --git a/src/Coordination/InMemoryLogStore.cpp b/src/Coordination/InMemoryLogStore.cpp
index 3b9ad3fe18a..9f8d398a110 100644
--- a/src/Coordination/InMemoryLogStore.cpp
+++ b/src/Coordination/InMemoryLogStore.cpp
@@ -6,18 +6,18 @@ namespace DB
 namespace
 {
 using namespace nuraft;
-ptr<log_entry> makeClone(const ptr<log_entry>& entry) {
-    ptr<log_entry> clone = cs_new<log_entry>
-                           ( entry->get_term(),
-                             buffer::clone( entry->get_buf() ),
-                             entry->get_val_type() );
+ptr<log_entry> makeClone(const ptr<log_entry> & entry) {
+    ptr<log_entry> clone = cs_new<log_entry>(entry->get_term(), buffer::clone(entry->get_buf()), entry->get_val_type());
     return clone;
 }
 }
 
 InMemoryLogStore::InMemoryLogStore()
     : start_idx(1)
-{}
+{
+    nuraft::ptr<nuraft::buffer> buf = nuraft::buffer::alloc(sizeof(size_t));
+    logs[0] = nuraft::cs_new<nuraft::log_entry>(0, buf);
+}
 
 size_t InMemoryLogStore::start_index() const
 {
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index 1026b779cdf..f9856eb275a 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -2,10 +2,185 @@
 
 #include <Coordination/InMemoryLogStore.h>
 #include <Coordination/InMemoryStateManager.h>
+#include <Coordination/SummingStateMachine.h>
+#include <Common/Exception.h>
+#include <libnuraft/nuraft.hxx>
+#include <thread>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+}
 
 TEST(CoordinationTest, BuildTest)
 {
     DB::InMemoryLogStore store;
     DB::InMemoryStateManager state_manager(1, "localhost:12345");
+    DB::SummingStateMachine machine;
     EXPECT_EQ(1, 1);
 }
+
+struct SummingRaftServer
+{
+    SummingRaftServer(int server_id_, const std::string & hostname_, int port_)
+        : server_id(server_id_)
+        , hostname(hostname_)
+        , port(port_)
+        , endpoint(hostname + ":" + std::to_string(port))
+        , state_machine(nuraft::cs_new<DB::SummingStateMachine>())
+        , state_manager(nuraft::cs_new<DB::InMemoryStateManager>(server_id, endpoint))
+    {
+        nuraft::raft_params params;
+        params.heart_beat_interval_ = 100;
+        params.election_timeout_lower_bound_ = 200;
+        params.election_timeout_upper_bound_ = 400;
+        params.reserved_log_items_ = 5;
+        params.snapshot_distance_ = 5;
+        params.client_req_timeout_ = 3000;
+        params.return_method_ = nuraft::raft_params::blocking;
+
+        raft_instance = launcher.init(
+            state_machine, state_manager, nuraft::cs_new<nuraft::logger>(), port,
+            nuraft::asio_service::options{}, params);
+
+        if (!raft_instance)
+        {
+            std::cerr << "Failed to initialize launcher (see the message "
+                         "in the log file)." << std::endl;
+            exit(-1);
+        }
+        std::cout << "init Raft instance " << server_id;
+        for (size_t ii = 0; ii < 20; ++ii)
+        {
+            if (raft_instance->is_initialized())
+            {
+                std::cout << " done" << std::endl;
+                break;
+            }
+            std::cout << ".";
+            fflush(stdout);
+            std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        }
+    }
+
+    // Server ID.
+    int server_id;
+
+    // Server address.
+    std::string hostname;
+
+    // Server port.
+    int port;
+
+    std::string endpoint;
+
+    // State machine.
+    nuraft::ptr<DB::SummingStateMachine> state_machine;
+
+    // State manager.
+    nuraft::ptr<nuraft::state_mgr> state_manager;
+
+    // Raft launcher.
+    nuraft::raft_launcher launcher;
+
+    // Raft server instance.
+    nuraft::ptr<nuraft::raft_server> raft_instance;
+};
+
+nuraft::ptr<nuraft::buffer> getLogEntry(int64_t number)
+{
+    nuraft::ptr<nuraft::buffer> ret = nuraft::buffer::alloc(sizeof(number));
+    nuraft::buffer_serializer bs(ret);
+    // WARNING: We don't consider endian-safety in this example.
+    bs.put_raw(&number, sizeof(number));
+    return ret;
+}
+
+TEST(CoordinationTest, TestSummingRaft)
+{
+    SummingRaftServer s1(1, "localhost", 44444);
+    SummingRaftServer s2(2, "localhost", 44445);
+    SummingRaftServer s3(3, "localhost", 44446);
+
+    nuraft::srv_config first_config(1, "localhost:44444");
+    auto ret1 = s2.raft_instance->add_srv(first_config);
+    if (!ret1->get_accepted())
+    {
+        std::cout << "failed to add server: "
+                  << ret1->get_result_str() << std::endl;
+        EXPECT_TRUE(false);
+    }
+
+    while(s1.raft_instance->get_leader() != 2)
+    {
+        std::cout << "Waiting s1 to join to s2 quorum\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    nuraft::srv_config third_config(3, "localhost:44446");
+    auto ret3 = s2.raft_instance->add_srv(third_config);
+    if (!ret3->get_accepted())
+    {
+        std::cout << "failed to add server: "
+                  << ret3->get_result_str() << std::endl;
+        EXPECT_TRUE(false);
+    }
+
+    while(s3.raft_instance->get_leader() != 2)
+    {
+        std::cout << "Waiting s3 to join to s2 quorum\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    /// S2 is leader
+    EXPECT_EQ(s1.raft_instance->get_leader(), 2);
+    EXPECT_EQ(s2.raft_instance->get_leader(), 2);
+    EXPECT_EQ(s3.raft_instance->get_leader(), 2);
+
+    std::cerr << "Starting to add entries\n";
+    auto entry = getLogEntry(1);
+    auto ret = s2.raft_instance->append_entries({entry});
+    if (!ret->get_accepted())
+    {
+        // Log append rejected, usually because this node is not a leader.
+        std::cout << "failed to replicate: entry 1" << ret->get_result_code() << std::endl;
+        EXPECT_TRUE(false);
+    }
+    if (ret->get_result_code() != nuraft::cmd_result_code::OK)
+    {
+        // Something went wrong.
+        // This means committing this log failed,
+        // but the log itself is still in the log store.
+        std::cout << "failed to replicate: entry 1" << ret->get_result_code() << std::endl;
+        EXPECT_TRUE(false);
+    }
+    else
+    {
+        std::cout << "Append ok\n";
+    }
+
+    while (s1.state_machine->getValue() != 1)
+    {
+        std::cout << "Waiting s1 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    while (s2.state_machine->getValue() != 1)
+    {
+        std::cout << "Waiting s2 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    EXPECT_EQ(s1.state_machine->getValue(), 1);
+    EXPECT_EQ(s2.state_machine->getValue(), 1);
+    EXPECT_EQ(s3.state_machine->getValue(), 1);
+
+    s1.launcher.shutdown(5);
+    s2.launcher.shutdown(5);
+    s3.launcher.shutdown(5);
+}

From 66e1072c2cac2bd6a716f4d5286244031863e2c2 Mon Sep 17 00:00:00 2001
From: root <keenwolf@gmail.com>
Date: Thu, 14 Jan 2021 00:46:55 +0800
Subject: [PATCH 0093/1238] Add the function to read file as a String.

---
 src/Functions/FunctionFile.cpp        | 121 ++++++++++++++++++++++++++
 src/Functions/FunctionsConversion.cpp |   4 +-
 2 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 src/Functions/FunctionFile.cpp

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
new file mode 100644
index 00000000000..8c29a9a39df
--- /dev/null
+++ b/src/Functions/FunctionFile.cpp
@@ -0,0 +1,121 @@
+//#include <Interpreters/Context.h>
+#include <Columns/ColumnString.h>
+#include <Columns/IColumn.h>
+#include <Columns/ColumnFixedString.h>
+#include <DataTypes/DataTypeFixedString.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionStringToString.h>
+#include <common/find_symbols.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int TOO_LARGE_STRING_SIZE;
+    extern const int NOT_IMPLEMENTED;
+}
+
+
+/** Conversion to fixed string is implemented only for strings.
+  */
+class FunctionFromFile : public IFunction
+{
+public:
+    static constexpr auto name = "file";
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionFromFile>(); }
+    static FunctionPtr create() { return std::make_shared<FunctionFromFile>(); }
+    //static FunctionPtr create(const Context & context) { return std::make_shared<ConcatImpl>(context); }
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 1; }
+    //bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        if (!isStringOrFixedString(arguments[0].type))
+            throw Exception(getName() + " is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED);
+        //??how to get accurate length  here? or should we return normal string type?
+        //return std::make_shared<DataTypeFixedString>(1);
+        return std::make_shared<DataTypeString>();
+    }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+    //ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+
+    ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+    {
+        const auto & column = arguments[0].column;
+        const char * filename = nullptr;
+        // if (const auto * column_string = checkAndGetColumnConst<ColumnString>(column.get()))
+        if (const auto * column_string = checkAndGetColumn<ColumnString>(column.get()))
+        {
+            const auto & filename_chars = column_string->getChars();
+            filename = reinterpret_cast<const char *>(&filename_chars[0]);
+
+ /*
+            //get file path
+            auto user_files_path = Context::getUserFilesPath();
+
+
+            String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString();
+            Poco::Path poco_path = Poco::Path(table_path);
+            if (poco_path.isRelative())
+                poco_path = Poco::Path(user_files_absolute_path, poco_path);
+            else //need to judge if the absolute path is in userfilespath?
+            const String path = poco_path.absolute().toString();
+
+*/
+            auto fd = open(filename, O_RDONLY);
+            if (fd == -1)
+            {//arguments[0].column->getName()
+                throw Exception("Can't open " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); //ErrorCode need to be rectify
+            }
+            struct stat file_stat;
+            if (fstat(fd, &file_stat) == -1)
+            {
+                throw Exception("Can't stat " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
+            }
+            auto file_length = static_cast<uint64_t>(file_stat.st_size);
+            auto res = ColumnString::create();
+            auto & res_chars = res->getChars();
+            auto & res_offsets = res->getOffsets();
+            //res_chars.resize_fill(file_length + 1);
+            //omit the copy op to only once.
+            res_chars.resize_exact(file_length + 1);
+            res_offsets.push_back(file_length + 1);
+            char * buf = reinterpret_cast<char *>(&res_chars[0]);
+            ssize_t bytes_read = pread(fd, buf, file_length, 0);
+
+            if (bytes_read == -1)
+            {
+                throw Exception("Bad read of " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
+            }
+            if (static_cast<uint64_t>(bytes_read) != file_length)
+            {
+                throw Exception("Short read of " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
+            }
+            buf[file_length] = '\0';
+            close(fd);
+            return res;
+        }
+        else
+        {
+            throw Exception("Bad Function arguments for file() " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
+        }
+    }
+};
+
+
+
+void registerFunctionFromFile(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionFromFile>();
+}
+
+}
\ No newline at end of file
diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 257b852ecd8..a6866ce0939 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -6,6 +6,7 @@ namespace DB
 {
 
 void registerFunctionFixedString(FunctionFactory & factory);
+void registerFunctionFromFile(FunctionFactory & factory);
 
 void registerFunctionsConversion(FunctionFactory & factory)
 {
@@ -36,7 +37,8 @@ void registerFunctionsConversion(FunctionFactory & factory)
     factory.registerFunction<FunctionToString>();
 
     registerFunctionFixedString(factory);
-
+    registerFunctionFromFile(factory);
+    
     factory.registerFunction<FunctionToUnixTimestamp>();
 
     factory.registerFunction<CastOverloadResolver<CastType::nonAccurate>>(FunctionFactory::CaseInsensitive);

From 701b61dcedef91f88808647cbcb141369a47bf24 Mon Sep 17 00:00:00 2001
From: root <keenwolf@gmail.com>
Date: Thu, 14 Jan 2021 13:36:22 +0800
Subject: [PATCH 0094/1238] Function arguments declaration Upgrade with super
 class

---
 src/Functions/FunctionFile.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index 8c29a9a39df..2a524adde47 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -47,8 +47,7 @@ public:
 
     bool useDefaultImplementationForConstants() const override { return true; }
     //ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
-
-    ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
     {
         const auto & column = arguments[0].column;
         const char * filename = nullptr;

From e95b8089cd0384090b8808d98723a4ad4cd414be Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Thu, 14 Jan 2021 18:44:16 +0800
Subject: [PATCH 0095/1238] Make code clean including properly exception handle

---
 src/Functions/FunctionFile.cpp | 75 +++++++++++++---------------------
 1 file changed, 29 insertions(+), 46 deletions(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index 2a524adde47..e856befa9d1 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -1,7 +1,5 @@
-//#include <Interpreters/Context.h>
 #include <Columns/ColumnString.h>
 #include <Columns/IColumn.h>
-#include <Columns/ColumnFixedString.h>
 #include <DataTypes/DataTypeFixedString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionStringToString.h>
@@ -18,88 +16,74 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
     extern const int TOO_LARGE_STRING_SIZE;
     extern const int NOT_IMPLEMENTED;
+    extern const int FILE_DOESNT_EXIST;
+    extern const int CANNOT_OPEN_FILE;
+    extern const int CANNOT_CLOSE_FILE;
+    extern const int CANNOT_FSTAT;
+    extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR;
 }
 
 
-/** Conversion to fixed string is implemented only for strings.
+/** A function to read file as a string.
   */
-class FunctionFromFile : public IFunction
+class FunctionFile : public IFunction
 {
 public:
     static constexpr auto name = "file";
-    static FunctionPtr create(const Context &) { return std::make_shared<FunctionFromFile>(); }
-    static FunctionPtr create() { return std::make_shared<FunctionFromFile>(); }
-    //static FunctionPtr create(const Context & context) { return std::make_shared<ConcatImpl>(context); }
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionFile>(); }
+    static FunctionPtr create() { return std::make_shared<FunctionFile>(); }
 
     String getName() const override { return name; }
 
     size_t getNumberOfArguments() const override { return 1; }
-    //bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
+    bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
         if (!isStringOrFixedString(arguments[0].type))
             throw Exception(getName() + " is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED);
-        //??how to get accurate length  here? or should we return normal string type?
-        //return std::make_shared<DataTypeFixedString>(1);
         return std::make_shared<DataTypeString>();
     }
 
     bool useDefaultImplementationForConstants() const override { return true; }
-    //ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
     {
         const auto & column = arguments[0].column;
         const char * filename = nullptr;
-        // if (const auto * column_string = checkAndGetColumnConst<ColumnString>(column.get()))
+
         if (const auto * column_string = checkAndGetColumn<ColumnString>(column.get()))
         {
             const auto & filename_chars = column_string->getChars();
             filename = reinterpret_cast<const char *>(&filename_chars[0]);
 
- /*
-            //get file path
-            auto user_files_path = Context::getUserFilesPath();
-
-
-            String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString();
-            Poco::Path poco_path = Poco::Path(table_path);
-            if (poco_path.isRelative())
-                poco_path = Poco::Path(user_files_absolute_path, poco_path);
-            else //need to judge if the absolute path is in userfilespath?
-            const String path = poco_path.absolute().toString();
-
-*/
             auto fd = open(filename, O_RDONLY);
-            if (fd == -1)
-            {//arguments[0].column->getName()
-                throw Exception("Can't open " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); //ErrorCode need to be rectify
-            }
+            if (-1 == fd)
+                throwFromErrnoWithPath("Cannot open file " + std::string(filename), std::string(filename),
+                                       errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
             struct stat file_stat;
-            if (fstat(fd, &file_stat) == -1)
-            {
-                throw Exception("Can't stat " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
-            }
+            if (-1 == fstat(fd, &file_stat))
+                throwFromErrnoWithPath("Cannot stat file " + std::string(filename), std::string(filename),
+                                       ErrorCodes::CANNOT_FSTAT);
+
             auto file_length = static_cast<uint64_t>(file_stat.st_size);
             auto res = ColumnString::create();
             auto & res_chars = res->getChars();
             auto & res_offsets = res->getOffsets();
-            //res_chars.resize_fill(file_length + 1);
-            //omit the copy op to only once.
             res_chars.resize_exact(file_length + 1);
             res_offsets.push_back(file_length + 1);
-            char * buf = reinterpret_cast<char *>(&res_chars[0]);
-            ssize_t bytes_read = pread(fd, buf, file_length, 0);
+            char * res_buf = reinterpret_cast<char *>(&res_chars[0]);
 
+            //To read directly into the String buf, avoiding one redundant copy
+            ssize_t bytes_read = pread(fd, res_buf, file_length, 0);
             if (bytes_read == -1)
-            {
-                throw Exception("Bad read of " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
-            }
+                throwFromErrnoWithPath("Read failed for " + std::string(filename), std::string(filename),
+                                   errno == EBADF ? ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR : ErrorCodes::ILLEGAL_COLUMN);
             if (static_cast<uint64_t>(bytes_read) != file_length)
-            {
-                throw Exception("Short read of " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
-            }
-            buf[file_length] = '\0';
+                throwFromErrnoWithPath("Cannot read all bytes from " + std::string(filename), std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
+
+            res_buf[file_length] = '\0';
             close(fd);
             return res;
         }
@@ -111,10 +95,9 @@ public:
 };
 
 
-
 void registerFunctionFromFile(FunctionFactory & factory)
 {
-    factory.registerFunction<FunctionFromFile>();
+    factory.registerFunction<FunctionFile>();
 }
 
 }
\ No newline at end of file

From 791a4cfb52b27d511a24c9e74a479bef8a15f20d Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Thu, 14 Jan 2021 19:46:19 +0800
Subject: [PATCH 0096/1238] Small fix

---
 src/Functions/FunctionFile.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index e856befa9d1..f491ad54bf2 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -77,7 +77,7 @@ public:
 
             //To read directly into the String buf, avoiding one redundant copy
             ssize_t bytes_read = pread(fd, res_buf, file_length, 0);
-            if (bytes_read == -1)
+            if (-1 == bytes_read)
                 throwFromErrnoWithPath("Read failed for " + std::string(filename), std::string(filename),
                                    errno == EBADF ? ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR : ErrorCodes::ILLEGAL_COLUMN);
             if (static_cast<uint64_t>(bytes_read) != file_length)

From 53e483d36c24c821e714d3c5224ea8b9d1e17670 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Thu, 14 Jan 2021 20:09:13 +0800
Subject: [PATCH 0097/1238] Small fix

---
 src/Functions/FunctionFile.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index f491ad54bf2..317bc46364a 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -21,6 +21,7 @@ namespace ErrorCodes
     extern const int CANNOT_CLOSE_FILE;
     extern const int CANNOT_FSTAT;
     extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR;
+    extern const int CANNOT_CLOSE_FILE;
 }
 
 
@@ -84,7 +85,10 @@ public:
                 throwFromErrnoWithPath("Cannot read all bytes from " + std::string(filename), std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
 
             res_buf[file_length] = '\0';
-            close(fd);
+            if (0 != close(fd))
+                throw Exception("Cannot close file " + std::string(filename), ErrorCodes::CANNOT_CLOSE_FILE);
+            fd = -1;
+
             return res;
         }
         else

From 4b6cc4ea4bf6ff293207f3fbbf91a53ff6ce4528 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Thu, 14 Jan 2021 23:48:38 +0800
Subject: [PATCH 0098/1238] Add Function to read file as a String, Using
 ReadBuffer.

---
 src/Functions/FunctionFile.cpp | 159 ++++++++++++++-------------------
 1 file changed, 67 insertions(+), 92 deletions(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index 317bc46364a..c2757798584 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -1,107 +1,82 @@
 #include <Columns/ColumnString.h>
 #include <Columns/IColumn.h>
-#include <DataTypes/DataTypeFixedString.h>
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionStringToString.h>
-#include <common/find_symbols.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
+#include <DataTypes/DataTypeString.h>
+#include <IO/ReadBufferFromFile.h>
+
 
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_COLUMN;
-    extern const int TOO_LARGE_STRING_SIZE;
-    extern const int NOT_IMPLEMENTED;
-    extern const int FILE_DOESNT_EXIST;
-    extern const int CANNOT_OPEN_FILE;
-    extern const int CANNOT_CLOSE_FILE;
-    extern const int CANNOT_FSTAT;
-    extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR;
-    extern const int CANNOT_CLOSE_FILE;
-}
+    namespace ErrorCodes
+    {
+        extern const int ILLEGAL_COLUMN;
+        extern const int NOT_IMPLEMENTED;
+    }
 
 
 /** A function to read file as a string.
   */
-class FunctionFile : public IFunction
-{
-public:
-    static constexpr auto name = "file";
-    static FunctionPtr create(const Context &) { return std::make_shared<FunctionFile>(); }
-    static FunctionPtr create() { return std::make_shared<FunctionFile>(); }
-
-    String getName() const override { return name; }
-
-    size_t getNumberOfArguments() const override { return 1; }
-    bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    class FunctionFile : public IFunction
     {
-        if (!isStringOrFixedString(arguments[0].type))
-            throw Exception(getName() + " is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED);
-        return std::make_shared<DataTypeString>();
+    public:
+        static constexpr auto name = "file";
+        static FunctionPtr create(const Context &) { return std::make_shared<FunctionFile>(); }
+        static FunctionPtr create() { return std::make_shared<FunctionFile>(); }
+
+        String getName() const override { return name; }
+
+        size_t getNumberOfArguments() const override { return 1; }
+        bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
+
+        DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+        {
+            if (!isStringOrFixedString(arguments[0].type))
+                throw Exception(getName() + " is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED);
+            return std::make_shared<DataTypeString>();
+        }
+
+        bool useDefaultImplementationForConstants() const override { return true; }
+        ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+
+        ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+        {
+            const auto & column = arguments[0].column;
+            const char * filename = nullptr;
+            if (const auto * column_string = checkAndGetColumn<ColumnString>(column.get()))
+            {
+                const auto & filename_chars = column_string->getChars();
+                filename = reinterpret_cast<const char *>(&filename_chars[0]);
+                auto res = ColumnString::create();
+                auto & res_chars = res->getChars();
+                auto & res_offsets = res->getOffsets();
+		
+                ReadBufferFromFile in(filename);
+                char *res_buf;
+                size_t file_len = 0, rlen = 0;
+                while (0 == file_len || 4096 == rlen)
+                {
+                    file_len += rlen;
+                    res_chars.resize(4096 + file_len);
+                    res_buf = reinterpret_cast<char *>(&res_chars[0]);
+                    rlen = in.read(res_buf + file_len, 4096);
+                }
+                file_len += rlen;
+                res_offsets.push_back(file_len + 1);
+                res_buf[file_len] = '\0';
+
+                return res;
+            }
+            else
+            {
+                throw Exception("Bad Function arguments for file() " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
+            }
+        }
+    };
+
+    void registerFunctionFromFile(FunctionFactory & factory)
+    {
+        factory.registerFunction<FunctionFile>();
     }
 
-    bool useDefaultImplementationForConstants() const override { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
-    {
-        const auto & column = arguments[0].column;
-        const char * filename = nullptr;
-
-        if (const auto * column_string = checkAndGetColumn<ColumnString>(column.get()))
-        {
-            const auto & filename_chars = column_string->getChars();
-            filename = reinterpret_cast<const char *>(&filename_chars[0]);
-
-            auto fd = open(filename, O_RDONLY);
-            if (-1 == fd)
-                throwFromErrnoWithPath("Cannot open file " + std::string(filename), std::string(filename),
-                                       errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
-            struct stat file_stat;
-            if (-1 == fstat(fd, &file_stat))
-                throwFromErrnoWithPath("Cannot stat file " + std::string(filename), std::string(filename),
-                                       ErrorCodes::CANNOT_FSTAT);
-
-            auto file_length = static_cast<uint64_t>(file_stat.st_size);
-            auto res = ColumnString::create();
-            auto & res_chars = res->getChars();
-            auto & res_offsets = res->getOffsets();
-            res_chars.resize_exact(file_length + 1);
-            res_offsets.push_back(file_length + 1);
-            char * res_buf = reinterpret_cast<char *>(&res_chars[0]);
-
-            //To read directly into the String buf, avoiding one redundant copy
-            ssize_t bytes_read = pread(fd, res_buf, file_length, 0);
-            if (-1 == bytes_read)
-                throwFromErrnoWithPath("Read failed for " + std::string(filename), std::string(filename),
-                                   errno == EBADF ? ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR : ErrorCodes::ILLEGAL_COLUMN);
-            if (static_cast<uint64_t>(bytes_read) != file_length)
-                throwFromErrnoWithPath("Cannot read all bytes from " + std::string(filename), std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
-
-            res_buf[file_length] = '\0';
-            if (0 != close(fd))
-                throw Exception("Cannot close file " + std::string(filename), ErrorCodes::CANNOT_CLOSE_FILE);
-            fd = -1;
-
-            return res;
-        }
-        else
-        {
-            throw Exception("Bad Function arguments for file() " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
-        }
-    }
-};
-
-
-void registerFunctionFromFile(FunctionFactory & factory)
-{
-    factory.registerFunction<FunctionFile>();
 }
-
-}
\ No newline at end of file

From a2070bf13010d57e5614749177c1e7da3160c0a7 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 14 Jan 2021 19:20:33 +0300
Subject: [PATCH 0099/1238] Add some missed files

---
 src/Coordination/LoggerWrapper.h           |  40 +++++
 src/Coordination/SummingStateMachine.cpp   | 163 +++++++++++++++++++++
 src/Coordination/SummingStateMachine.h     |  77 ++++++++++
 src/Coordination/tests/gtest_for_build.cpp |  91 +++++++++---
 4 files changed, 351 insertions(+), 20 deletions(-)
 create mode 100644 src/Coordination/LoggerWrapper.h
 create mode 100644 src/Coordination/SummingStateMachine.cpp
 create mode 100644 src/Coordination/SummingStateMachine.h

diff --git a/src/Coordination/LoggerWrapper.h b/src/Coordination/LoggerWrapper.h
new file mode 100644
index 00000000000..51718eaee8b
--- /dev/null
+++ b/src/Coordination/LoggerWrapper.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <libnuraft/nuraft.hxx>
+#include <common/logger_useful.h>
+
+namespace DB
+{
+
+class LoggerWrapper : public nuraft::logger
+{
+    LoggerWrapper(const std::string & name)
+        : log(&Poco::Logger::get(name))
+    {}
+
+    void put_details(
+        int level,
+        const char * /* source_file */,
+        const char * /* func_name */,
+        size_t /* line_number */,
+        const std::string & msg) override
+    {
+        LOG_IMPL(log, level, level, msg);
+    }
+
+    void set_level(int level) override
+    {
+        level = std::max(6, std::min(1, level));
+        log->setLevel(level);
+    }
+
+    int get_level() override
+    {
+        return log->getLevel();
+    }
+
+pivate:
+    Poco::Logger * log;
+};
+
+}
diff --git a/src/Coordination/SummingStateMachine.cpp b/src/Coordination/SummingStateMachine.cpp
new file mode 100644
index 00000000000..16154ca8cd4
--- /dev/null
+++ b/src/Coordination/SummingStateMachine.cpp
@@ -0,0 +1,163 @@
+#include <Coordination/SummingStateMachine.h>
+#include <iostream>
+
+namespace DB
+{
+
+static int64_t deserializeValue(nuraft::buffer & buffer)
+{
+    nuraft::buffer_serializer bs(buffer);
+    int64_t result;
+    memcpy(&result, bs.get_raw(buffer.size()), sizeof(result));
+    return result;
+}
+
+SummingStateMachine::SummingStateMachine()
+    : value(0)
+    , last_committed_idx(0)
+{
+}
+
+nuraft::ptr<nuraft::buffer> SummingStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
+{
+    int64_t value_to_add = deserializeValue(data);
+
+    value += value_to_add;
+    last_committed_idx = log_idx;
+
+    // Return Raft log number as a return result.
+    nuraft::ptr<nuraft::buffer> ret = nuraft::buffer::alloc(sizeof(log_idx));
+    nuraft::buffer_serializer bs(ret);
+    bs.put_u64(log_idx);
+    return ret;
+}
+
+bool SummingStateMachine::apply_snapshot(nuraft::snapshot & s)
+{
+    std::lock_guard<std::mutex> ll(snapshots_lock);
+    auto entry = snapshots.find(s.get_last_log_idx());
+    if (entry == snapshots.end())
+        return false;
+
+    auto ctx = entry->second;
+    value = ctx->value;
+    return true;
+}
+
+nuraft::ptr<nuraft::snapshot> SummingStateMachine::last_snapshot()
+{
+    // Just return the latest snapshot.
+    std::lock_guard<std::mutex> ll(snapshots_lock);
+    auto entry = snapshots.rbegin();
+    if (entry == snapshots.rend()) return nullptr;
+
+    auto ctx = entry->second;
+    return ctx->snapshot;
+}
+
+
+void SummingStateMachine::createSnapshotInternal(nuraft::snapshot & s)
+{
+    // Clone snapshot from `s`.
+    nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
+    nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
+
+    // Put into snapshot map.
+    auto ctx = cs_new<SingleValueSnapshotContext>(ss, value);
+    snapshots[s.get_last_log_idx()] = ctx;
+
+    // Maintain last 3 snapshots only.
+    const int MAX_SNAPSHOTS = 3;
+    int num = snapshots.size();
+    auto entry = snapshots.begin();
+
+    for (int ii = 0; ii < num - MAX_SNAPSHOTS; ++ii)
+    {
+        if (entry == snapshots.end())
+            break;
+        entry = snapshots.erase(entry);
+    }
+}
+
+void SummingStateMachine::save_logical_snp_obj(
+    nuraft::snapshot & s,
+    size_t & obj_id,
+    nuraft::buffer & data,
+    bool /*is_first_obj*/,
+    bool /*is_last_obj*/)
+{
+    if (obj_id == 0)
+    {
+        // Object ID == 0: it contains dummy value, create snapshot context.
+        createSnapshotInternal(s);
+    }
+    else
+    {
+        // Object ID > 0: actual snapshot value.
+        nuraft::buffer_serializer bs(data);
+        int64_t local_value = static_cast<int64_t>(bs.get_u64());
+
+        std::lock_guard<std::mutex> ll(snapshots_lock);
+        auto entry = snapshots.find(s.get_last_log_idx());
+        assert(entry != snapshots.end());
+        entry->second->value = local_value;
+    }
+    // Request next object.
+    obj_id++;
+}
+
+int SummingStateMachine::read_logical_snp_obj(
+    nuraft::snapshot & s,
+    void* & /*user_snp_ctx*/,
+    ulong obj_id,
+    nuraft::ptr<nuraft::buffer> & data_out,
+    bool & is_last_obj)
+{
+    nuraft::ptr<SingleValueSnapshotContext> ctx = nullptr;
+    {
+        std::lock_guard<std::mutex> ll(snapshots_lock);
+        auto entry = snapshots.find(s.get_last_log_idx());
+        if (entry == snapshots.end()) {
+            // Snapshot doesn't exist.
+            data_out = nullptr;
+            is_last_obj = true;
+            return 0;
+        }
+        ctx = entry->second;
+    }
+
+    if (obj_id == 0)
+    {
+        // Object ID == 0: first object, put dummy data.
+        data_out = nuraft::buffer::alloc(sizeof(Int32));
+        nuraft::buffer_serializer bs(data_out);
+        bs.put_i32(0);
+        is_last_obj = false;
+
+    }
+    else
+    {
+        // Object ID > 0: second object, put actual value.
+        data_out = nuraft::buffer::alloc(sizeof(size_t));
+        nuraft::buffer_serializer bs(data_out);
+        bs.put_u64(ctx->value);
+        is_last_obj = true;
+    }
+    return 0;
+}
+
+void SummingStateMachine::create_snapshot(
+    nuraft::snapshot & s,
+    nuraft::async_result<bool>::handler_type & when_done)
+{
+    {
+        std::lock_guard<std::mutex> ll(snapshots_lock);
+        createSnapshotInternal(s);
+    }
+    nuraft::ptr<std::exception> except(nullptr);
+    bool ret = true;
+    when_done(ret, except);
+}
+
+
+}
diff --git a/src/Coordination/SummingStateMachine.h b/src/Coordination/SummingStateMachine.h
new file mode 100644
index 00000000000..df343378408
--- /dev/null
+++ b/src/Coordination/SummingStateMachine.h
@@ -0,0 +1,77 @@
+#pragma once
+
+#include <libnuraft/nuraft.hxx>
+#include <Core/Types.h>
+#include <atomic>
+#include <map>
+#include <mutex>
+
+namespace DB
+{
+
+class SummingStateMachine : public nuraft::state_machine
+{
+public:
+    SummingStateMachine();
+
+    nuraft::ptr<nuraft::buffer> pre_commit(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; }
+
+    nuraft::ptr<nuraft::buffer> commit(const size_t log_idx, nuraft::buffer & data) override;
+
+    void rollback(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override {}
+
+    size_t last_commit_index() override { return last_committed_idx; }
+
+    bool apply_snapshot(nuraft::snapshot & s) override;
+
+    nuraft::ptr<nuraft::snapshot> last_snapshot() override;
+
+    void create_snapshot(
+        nuraft::snapshot & s,
+        nuraft::async_result<bool>::handler_type & when_done) override;
+
+    void save_logical_snp_obj(
+        nuraft::snapshot & s,
+        size_t & obj_id,
+        nuraft::buffer & data,
+        bool is_first_obj,
+        bool is_last_obj) override;
+
+    int read_logical_snp_obj(
+        nuraft::snapshot & s,
+        void* & user_snp_ctx,
+        ulong obj_id,
+        nuraft::ptr<nuraft::buffer> & data_out,
+        bool & is_last_obj) override;
+
+    int64_t getValue() const { return value; }
+
+private:
+    struct SingleValueSnapshotContext
+    {
+        SingleValueSnapshotContext(nuraft::ptr<nuraft::snapshot> & s, int64_t v)
+            : snapshot(s)
+            , value(v)
+        {}
+
+        nuraft::ptr<nuraft::snapshot> snapshot;
+        int64_t value;
+    };
+
+    void createSnapshotInternal(nuraft::snapshot & s);
+
+    // State machine's current value.
+    std::atomic<int64_t> value;
+
+    // Last committed Raft log number.
+    std::atomic<uint64_t> last_committed_idx;
+
+    // Keeps the last 3 snapshots, by their Raft log numbers.
+    std::map<uint64_t, nuraft::ptr<SingleValueSnapshotContext>> snapshots;
+
+    // Mutex for `snapshots_`.
+    std::mutex snapshots_lock;
+
+};
+
+}
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index f9856eb275a..5785c9adb27 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -3,6 +3,7 @@
 #include <Coordination/InMemoryLogStore.h>
 #include <Coordination/InMemoryStateManager.h>
 #include <Coordination/SummingStateMachine.h>
+#include <Coordination/LoggerWrapper.h>
 #include <Common/Exception.h>
 #include <libnuraft/nuraft.hxx>
 #include <thread>
@@ -45,7 +46,7 @@ struct SummingRaftServer
         params.return_method_ = nuraft::raft_params::blocking;
 
         raft_instance = launcher.init(
-            state_machine, state_manager, nuraft::cs_new<nuraft::logger>(), port,
+            state_machine, state_manager, nuraft::cs_new<LoggerWrapper>(), port,
             nuraft::asio_service::options{}, params);
 
         if (!raft_instance)
@@ -101,7 +102,31 @@ nuraft::ptr<nuraft::buffer> getLogEntry(int64_t number)
     return ret;
 }
 
-TEST(CoordinationTest, TestSummingRaft)
+
+TEST(CoordinationTest, TestSummingRaft1)
+{
+    SummingRaftServer s1(1, "localhost", 44444);
+
+    /// Single node is leader
+    EXPECT_EQ(s1.raft_instance->get_leader(), 1);
+
+    auto entry1 = getLogEntry(143);
+    auto ret = s1.raft_instance->append_entries({entry});
+    EXPECT_TRUE(ret->get_accepted()) << "failed to replicate: entry 1" << ret->get_result_code();
+    EXPECT_EQ(ret->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 1" << ret->get_result_code();
+
+    while (s1.state_machine->getValue() != 143)
+    {
+        std::cout << "Waiting s1 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    EXPECT_EQ(s1.state_machine->getValue(), 143);
+
+    s1.launcher.shutdown(5);
+}
+
+TEST(CoordinationTest, TestSummingRaft3)
 {
     SummingRaftServer s1(1, "localhost", 44444);
     SummingRaftServer s2(2, "localhost", 44445);
@@ -145,24 +170,8 @@ TEST(CoordinationTest, TestSummingRaft)
     std::cerr << "Starting to add entries\n";
     auto entry = getLogEntry(1);
     auto ret = s2.raft_instance->append_entries({entry});
-    if (!ret->get_accepted())
-    {
-        // Log append rejected, usually because this node is not a leader.
-        std::cout << "failed to replicate: entry 1" << ret->get_result_code() << std::endl;
-        EXPECT_TRUE(false);
-    }
-    if (ret->get_result_code() != nuraft::cmd_result_code::OK)
-    {
-        // Something went wrong.
-        // This means committing this log failed,
-        // but the log itself is still in the log store.
-        std::cout << "failed to replicate: entry 1" << ret->get_result_code() << std::endl;
-        EXPECT_TRUE(false);
-    }
-    else
-    {
-        std::cout << "Append ok\n";
-    }
+    EXPECT_TRUE(ret->get_accepted()) << "failed to replicate: entry 1" << ret->get_result_code();
+    EXPECT_EQ(ret->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 1" << ret->get_result_code();
 
     while (s1.state_machine->getValue() != 1)
     {
@@ -176,10 +185,52 @@ TEST(CoordinationTest, TestSummingRaft)
         std::this_thread::sleep_for(std::chrono::milliseconds(100));
     }
 
+    while (s3.state_machine->getValue() != 1)
+    {
+        std::cout << "Waiting s3 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
     EXPECT_EQ(s1.state_machine->getValue(), 1);
     EXPECT_EQ(s2.state_machine->getValue(), 1);
     EXPECT_EQ(s3.state_machine->getValue(), 1);
 
+    auto non_leader_entry = getLogEntry(3);
+    auto ret_non_leader1 = s1.raft_instance->append_entries({non_leader_entry});
+
+    EXPECT_FALSE(ret_non_leader1->get_accepted());
+
+    auto ret_non_leader3 = s3.raft_instance->append_entries({non_leader_entry});
+
+    EXPECT_FALSE(ret_non_leader3->get_accepted());
+
+    auto leader_entry = getLogEntry(77);
+    auto ret_leader = s2.raft_instance->append_entries({leader_entry});
+    EXPECT_TRUE(ret_leader->get_accepted()) << "failed to replicate: entry 78" << ret_leader->get_result_code();
+    EXPECT_EQ(ret_leader->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 78" << ret_leader->get_result_code();
+
+    while (s1.state_machine->getValue() != 78)
+    {
+        std::cout << "Waiting s1 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    while (s2.state_machine->getValue() != 78)
+    {
+        std::cout << "Waiting s2 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    while (s3.state_machine->getValue() != 78)
+    {
+        std::cout << "Waiting s3 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    EXPECT_EQ(s1.state_machine->getValue(), 78);
+    EXPECT_EQ(s2.state_machine->getValue(), 78);
+    EXPECT_EQ(s3.state_machine->getValue(), 78);
+
     s1.launcher.shutdown(5);
     s2.launcher.shutdown(5);
     s3.launcher.shutdown(5);

From 1cc5be3b68d725919d812756f47f880316f26c69 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 14 Jan 2021 23:43:52 +0300
Subject: [PATCH 0100/1238] Compileable code

---
 src/Coordination/LoggerWrapper.h           | 5 +++--
 src/Coordination/tests/gtest_for_build.cpp | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/Coordination/LoggerWrapper.h b/src/Coordination/LoggerWrapper.h
index 51718eaee8b..37de7806e9d 100644
--- a/src/Coordination/LoggerWrapper.h
+++ b/src/Coordination/LoggerWrapper.h
@@ -8,6 +8,7 @@ namespace DB
 
 class LoggerWrapper : public nuraft::logger
 {
+public:
     LoggerWrapper(const std::string & name)
         : log(&Poco::Logger::get(name))
     {}
@@ -19,7 +20,7 @@ class LoggerWrapper : public nuraft::logger
         size_t /* line_number */,
         const std::string & msg) override
     {
-        LOG_IMPL(log, level, level, msg);
+        LOG_IMPL(log, static_cast<DB::LogsLevel>(level), static_cast<Poco::Message::Priority>(level), msg);
     }
 
     void set_level(int level) override
@@ -33,7 +34,7 @@ class LoggerWrapper : public nuraft::logger
         return log->getLevel();
     }
 
-pivate:
+private:
     Poco::Logger * log;
 };
 
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index 5785c9adb27..c13c5799ff7 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -46,7 +46,7 @@ struct SummingRaftServer
         params.return_method_ = nuraft::raft_params::blocking;
 
         raft_instance = launcher.init(
-            state_machine, state_manager, nuraft::cs_new<LoggerWrapper>(), port,
+            state_machine, state_manager, nuraft::cs_new<DB::LoggerWrapper>("ToyRaftLogger"), port,
             nuraft::asio_service::options{}, params);
 
         if (!raft_instance)
@@ -111,7 +111,7 @@ TEST(CoordinationTest, TestSummingRaft1)
     EXPECT_EQ(s1.raft_instance->get_leader(), 1);
 
     auto entry1 = getLogEntry(143);
-    auto ret = s1.raft_instance->append_entries({entry});
+    auto ret = s1.raft_instance->append_entries({entry1});
     EXPECT_TRUE(ret->get_accepted()) << "failed to replicate: entry 1" << ret->get_result_code();
     EXPECT_EQ(ret->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 1" << ret->get_result_code();
 

From d98cac0dd32b26e56ac0f40a3df074fafe0e1be4 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Fri, 15 Jan 2021 14:27:38 +0800
Subject: [PATCH 0101/1238] Add another method for reading file at once to
 avoid frequently realloc and mem move

---
 src/Functions/FunctionFile.cpp | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index c2757798584..1450b748955 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -3,6 +3,7 @@
 #include <Functions/FunctionFactory.h>
 #include <DataTypes/DataTypeString.h>
 #include <IO/ReadBufferFromFile.h>
+#include <Poco/File.h>
 
 
 namespace DB
@@ -50,18 +51,33 @@ namespace DB
                 auto res = ColumnString::create();
                 auto & res_chars = res->getChars();
                 auto & res_offsets = res->getOffsets();
-		
+
+                //TBD: Here, need to restrict the access permission for only user_path...
+
                 ReadBufferFromFile in(filename);
+
+                // Method-1: Read the whole file at once
+                size_t file_len = Poco::File(filename).getSize();
+                res_chars.resize(file_len + 1);
+                char *res_buf = reinterpret_cast<char *>(&res_chars[0]);
+                in.readStrict(res_buf, file_len);
+
+                /*
+                //Method-2: Read with loop
+
                 char *res_buf;
-                size_t file_len = 0, rlen = 0;
-                while (0 == file_len || 4096 == rlen)
+                size_t file_len = 0, rlen = 0, bsize = 4096;
+                while (0 == file_len || rlen == bsize)
                 {
                     file_len += rlen;
-                    res_chars.resize(4096 + file_len);
+                    res_chars.resize(1 + bsize + file_len);
                     res_buf = reinterpret_cast<char *>(&res_chars[0]);
-                    rlen = in.read(res_buf + file_len, 4096);
+                    rlen = in.read(res_buf + file_len, bsize);
                 }
                 file_len += rlen;
+                */
+
+
                 res_offsets.push_back(file_len + 1);
                 res_buf[file_len] = '\0';
 

From 2d2277245535d1dda55c64ad4535d1ffacb5e707 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sat, 16 Jan 2021 11:27:31 +0800
Subject: [PATCH 0102/1238] Handle with context pass

---
 CMakeLists.txt                 |  4 +---
 src/Functions/FunctionFile.cpp | 27 ++++++++++++++++++++++-----
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 853b2df7aca..3a37ba4c28e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -375,9 +375,7 @@ else ()
     option(WERROR "Enable -Werror compiler option" ON)
 endif ()
 
-if (WERROR)
-    add_warning(error)
-endif ()
+option(WERROR "Enable -Werror compiler option" OFF)
 
 # Make this extra-checks for correct library dependencies.
 if (OS_LINUX AND NOT SANITIZE)
diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index 1450b748955..0d8f315cdea 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -4,6 +4,8 @@
 #include <DataTypes/DataTypeString.h>
 #include <IO/ReadBufferFromFile.h>
 #include <Poco/File.h>
+#include <Poco/Path.h>
+#include <Interpreters/Context.h>
 
 
 namespace DB
@@ -15,15 +17,19 @@ namespace DB
         extern const int NOT_IMPLEMENTED;
     }
 
+    void checkCreationIsAllowed(const Context & context_global, const std::string & db_dir_path, const std::string & table_path);
 
-/** A function to read file as a string.
+
+    /** A function to read file as a string.
   */
     class FunctionFile : public IFunction
     {
     public:
         static constexpr auto name = "file";
-        static FunctionPtr create(const Context &) { return std::make_shared<FunctionFile>(); }
-        static FunctionPtr create() { return std::make_shared<FunctionFile>(); }
+        static FunctionPtr create(const Context &context) { return std::make_shared<FunctionFile>(context); }
+        //static FunctionPtr create() { return std::make_shared<FunctionFile>(); }
+        explicit FunctionFile(const Context &context_) : context(context_) {};
+        //FunctionFile() {};
 
         String getName() const override { return name; }
 
@@ -52,13 +58,21 @@ namespace DB
                 auto & res_chars = res->getChars();
                 auto & res_offsets = res->getOffsets();
 
-                //TBD: Here, need to restrict the access permission for only user_path...
+                //File_path access permission check.
+                const String user_files_path = context.getUserFilesPath();
+                String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString();
+                Poco::Path poco_filepath = Poco::Path(filename);
+                if (poco_filepath.isRelative())
+                    poco_filepath = Poco::Path(user_files_absolute_path, poco_filepath);
+                const String file_absolute_path = poco_filepath.absolute().toString();
+                checkCreationIsAllowed(context, user_files_absolute_path, file_absolute_path);
 
+                //Start read from file.
                 ReadBufferFromFile in(filename);
 
                 // Method-1: Read the whole file at once
                 size_t file_len = Poco::File(filename).getSize();
-                res_chars.resize(file_len + 1);
+                res_chars.resize_exact(file_len + 1);
                 char *res_buf = reinterpret_cast<char *>(&res_chars[0]);
                 in.readStrict(res_buf, file_len);
 
@@ -88,6 +102,9 @@ namespace DB
                 throw Exception("Bad Function arguments for file() " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
             }
         }
+
+    private:
+        const Context & context;
     };
 
     void registerFunctionFromFile(FunctionFactory & factory)

From 29aa0da28c7099771121924e23743910e1e666b9 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sat, 16 Jan 2021 14:55:59 +0800
Subject: [PATCH 0103/1238] Make filepath check done but with infile func, need
 to modify the ld path

---
 src/Functions/FunctionFile.cpp | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index 0d8f315cdea..7e362ca539b 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -6,7 +6,8 @@
 #include <Poco/File.h>
 #include <Poco/Path.h>
 #include <Interpreters/Context.h>
-
+#include <Storages/StorageFile.h>
+#include <Common/StringUtils/StringUtils.h>
 
 namespace DB
 {
@@ -20,6 +21,25 @@ namespace DB
     void checkCreationIsAllowed(const Context & context_global, const std::string & db_dir_path, const std::string & table_path);
 
 
+    inline bool startsWith2(const std::string & s, const std::string & prefix)
+    {
+        return s.size() >= prefix.size() && 0 == memcmp(s.data(), prefix.data(), prefix.size());
+    }
+
+    void checkCreationIsAllowed(const Context & context_global, const std::string & db_dir_path, const std::string & table_path)
+    {
+        if (context_global.getApplicationType() != Context::ApplicationType::SERVER)
+            return;
+
+        /// "/dev/null" is allowed for perf testing
+        if (!startsWith2(table_path, db_dir_path) && table_path != "/dev/null")
+            throw Exception("File is not inside " + db_dir_path, 9);
+
+        Poco::File table_path_poco_file = Poco::File(table_path);
+        if (table_path_poco_file.exists() && table_path_poco_file.isDirectory())
+            throw Exception("File must not be a directory", 9);
+    }
+
     /** A function to read file as a string.
   */
     class FunctionFile : public IFunction

From 77e74b397c30efbdfaf4a139facdcdbcc4919cd4 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sat, 16 Jan 2021 18:43:56 +0800
Subject: [PATCH 0104/1238] Add file access check, also give another read
 method in comments for reference

---
 src/Functions/FunctionFile.cpp | 84 +++++++++++++++-------------------
 1 file changed, 38 insertions(+), 46 deletions(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index 7e362ca539b..1de98cc3f38 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -6,8 +6,8 @@
 #include <Poco/File.h>
 #include <Poco/Path.h>
 #include <Interpreters/Context.h>
-#include <Storages/StorageFile.h>
-#include <Common/StringUtils/StringUtils.h>
+#include <fcntl.h>
+#include <unistd.h>
 
 namespace DB
 {
@@ -15,29 +15,14 @@ namespace DB
     namespace ErrorCodes
     {
         extern const int ILLEGAL_COLUMN;
+        extern const int TOO_LARGE_STRING_SIZE;
         extern const int NOT_IMPLEMENTED;
-    }
-
-    void checkCreationIsAllowed(const Context & context_global, const std::string & db_dir_path, const std::string & table_path);
-
-
-    inline bool startsWith2(const std::string & s, const std::string & prefix)
-    {
-        return s.size() >= prefix.size() && 0 == memcmp(s.data(), prefix.data(), prefix.size());
-    }
-
-    void checkCreationIsAllowed(const Context & context_global, const std::string & db_dir_path, const std::string & table_path)
-    {
-        if (context_global.getApplicationType() != Context::ApplicationType::SERVER)
-            return;
-
-        /// "/dev/null" is allowed for perf testing
-        if (!startsWith2(table_path, db_dir_path) && table_path != "/dev/null")
-            throw Exception("File is not inside " + db_dir_path, 9);
-
-        Poco::File table_path_poco_file = Poco::File(table_path);
-        if (table_path_poco_file.exists() && table_path_poco_file.isDirectory())
-            throw Exception("File must not be a directory", 9);
+        extern const int FILE_DOESNT_EXIST;
+        extern const int CANNOT_OPEN_FILE;
+        extern const int CANNOT_CLOSE_FILE;
+        extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR;
+        extern const int INCORRECT_FILE_NAME;
+        extern const int DATABASE_ACCESS_DENIED;
     }
 
     /** A function to read file as a string.
@@ -47,9 +32,7 @@ namespace DB
     public:
         static constexpr auto name = "file";
         static FunctionPtr create(const Context &context) { return std::make_shared<FunctionFile>(context); }
-        //static FunctionPtr create() { return std::make_shared<FunctionFile>(); }
         explicit FunctionFile(const Context &context_) : context(context_) {};
-        //FunctionFile() {};
 
         String getName() const override { return name; }
 
@@ -78,40 +61,36 @@ namespace DB
                 auto & res_chars = res->getChars();
                 auto & res_offsets = res->getOffsets();
 
-                //File_path access permission check.
+                //File access permission check
                 const String user_files_path = context.getUserFilesPath();
                 String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString();
                 Poco::Path poco_filepath = Poco::Path(filename);
                 if (poco_filepath.isRelative())
                     poco_filepath = Poco::Path(user_files_absolute_path, poco_filepath);
                 const String file_absolute_path = poco_filepath.absolute().toString();
-                checkCreationIsAllowed(context, user_files_absolute_path, file_absolute_path);
+                checkReadIsAllowed(user_files_absolute_path, file_absolute_path);
 
-                //Start read from file.
-                ReadBufferFromFile in(filename);
-
-                // Method-1: Read the whole file at once
-                size_t file_len = Poco::File(filename).getSize();
+                //Method-1: Read file with ReadBuffer
+                ReadBufferFromFile in(file_absolute_path);
+                ssize_t file_len = Poco::File(file_absolute_path).getSize();
                 res_chars.resize_exact(file_len + 1);
                 char *res_buf = reinterpret_cast<char *>(&res_chars[0]);
                 in.readStrict(res_buf, file_len);
 
                 /*
-                //Method-2: Read with loop
-
-                char *res_buf;
-                size_t file_len = 0, rlen = 0, bsize = 4096;
-                while (0 == file_len || rlen == bsize)
-                {
-                    file_len += rlen;
-                    res_chars.resize(1 + bsize + file_len);
-                    res_buf = reinterpret_cast<char *>(&res_chars[0]);
-                    rlen = in.read(res_buf + file_len, bsize);
-                }
-                file_len += rlen;
+                //Method-2: Read directly into the String buf, which avoiding one copy from PageCache to ReadBuffer
+                int fd;
+                if (-1 == (fd = open(file_absolute_path.c_str(), O_RDONLY)))
+                     throwFromErrnoWithPath("Cannot open file " + std::string(file_absolute_path), std::string(file_absolute_path),
+                                           errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+                if (file_len != pread(fd, res_buf, file_len, 0))
+                    throwFromErrnoWithPath("Read failed with " + std::string(file_absolute_path), std::string(file_absolute_path),
+                                           ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
+                if (0 != close(fd))
+                    throw Exception("Cannot close file " + std::string(file_absolute_path), ErrorCodes::CANNOT_CLOSE_FILE);
+                fd = -1;
                 */
 
-
                 res_offsets.push_back(file_len + 1);
                 res_buf[file_len] = '\0';
 
@@ -124,9 +103,22 @@ namespace DB
         }
 
     private:
+        void checkReadIsAllowed(const std::string & user_files_path, const std::string & file_path) const
+        {
+            // If run in Local mode, no need for path checking.
+            if (context.getApplicationType() != Context::ApplicationType::LOCAL)
+                if (file_path.find(user_files_path) != 0)
+                    throw Exception("File is not inside " + user_files_path, ErrorCodes::DATABASE_ACCESS_DENIED);
+
+            Poco::File path_poco_file = Poco::File(file_path);
+            if (path_poco_file.exists() && path_poco_file.isDirectory())
+                throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME);
+        }
+
         const Context & context;
     };
 
+
     void registerFunctionFromFile(FunctionFactory & factory)
     {
         factory.registerFunction<FunctionFile>();

From 85e4bfa566f35d6a4ab87639610f59c628599c38 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sat, 16 Jan 2021 19:31:15 +0800
Subject: [PATCH 0105/1238] Remove CMakefile from vcs

---
 CMakeLists.txt | 565 -------------------------------------------------
 1 file changed, 565 deletions(-)
 delete mode 100644 CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
deleted file mode 100644
index 3a37ba4c28e..00000000000
--- a/CMakeLists.txt
+++ /dev/null
@@ -1,565 +0,0 @@
-cmake_minimum_required(VERSION 3.3)
-
-foreach(policy
-        CMP0023
-        CMP0048 # CMake 3.0
-        CMP0074 # CMake 3.12
-        CMP0077
-        CMP0079
-    )
-    if(POLICY ${policy})
-        cmake_policy(SET ${policy} NEW)
-    endif()
-endforeach()
-
-# set default policy
-foreach(default_policy_var_name
-        # make option() honor normal variables for BUILD_SHARED_LIBS:
-        # - re2
-        # - snappy
-        CMAKE_POLICY_DEFAULT_CMP0077
-        # Google Test from sources uses too old cmake, 2.6.x, and CMP0022 should
-        # set, to avoid using deprecated LINK_INTERFACE_LIBRARIES(_<CONFIG>)? over
-        # INTERFACE_LINK_LIBRARIES.
-        CMAKE_POLICY_DEFAULT_CMP0022
-    )
-    set(${default_policy_var_name} NEW)
-endforeach()
-
-project(ClickHouse)
-
-# If turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue.
-option(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION
-   "Stop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF)
-   but is not possible to satisfy" ON)
-
-if(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION)
-    set(RECONFIGURE_MESSAGE_LEVEL FATAL_ERROR)
-else()
-    set(RECONFIGURE_MESSAGE_LEVEL STATUS)
-endif()
-
-include (cmake/arch.cmake)
-include (cmake/target.cmake)
-include (cmake/tools.cmake)
-include (cmake/analysis.cmake)
-
-# Ignore export() since we don't use it,
-# but it gets broken with a global targets via link_libraries()
-macro (export)
-endmacro ()
-
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/")
-set(CMAKE_EXPORT_COMPILE_COMMANDS 1) # Write compile_commands.json
-set(CMAKE_LINK_DEPENDS_NO_SHARED 1) # Do not relink all depended targets on .so
-set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE)
-set(CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a postfix.")    # To be consistent with CMakeLists from contrib libs.
-
-# Enable the ability to organize targets into hierarchies of "folders" for capable GUI-based IDEs.
-# For more info see https://cmake.org/cmake/help/latest/prop_gbl/USE_FOLDERS.html
-set_property(GLOBAL PROPERTY USE_FOLDERS ON)
-
-# Check that submodules are present only if source was downloaded with git
-if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/boost")
-    message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init --recursive")
-endif ()
-
-include (cmake/find/ccache.cmake)
-
-option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile too long or to take too much memory while compiling" OFF)
-if (ENABLE_CHECK_HEAVY_BUILDS)
-    # set DATA (since RSS does not work since 2.6.x+) to 2G
-    set (RLIMIT_DATA 5000000000)
-    # set VIRT (RLIMIT_AS) to 10G (DATA*10)
-    set (RLIMIT_AS 10000000000)
-    # gcc10/gcc10/clang -fsanitize=memory is too heavy
-    if (SANITIZE STREQUAL "memory" OR COMPILER_GCC)
-       set (RLIMIT_DATA 10000000000)
-    endif()
-    set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=600)
-endif ()
-
-if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None")
-    set (CMAKE_BUILD_TYPE "RelWithDebInfo")
-    message (STATUS "CMAKE_BUILD_TYPE is not set, set to default = ${CMAKE_BUILD_TYPE}")
-endif ()
-message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
-
-string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
-
-option(USE_STATIC_LIBRARIES "Disable to use shared libraries" ON)
-option(MAKE_STATIC_LIBRARIES "Disable to make shared libraries" ${USE_STATIC_LIBRARIES})
-
-if (NOT MAKE_STATIC_LIBRARIES)
-    # DEVELOPER ONLY.
-    # Faster linking if turned on.
-    option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files")
-
-    option(CLICKHOUSE_SPLIT_BINARY
-        "Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled")
-endif ()
-
-if (MAKE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
-    message(FATAL_ERROR "Defining SPLIT_SHARED_LIBRARIES=1 without MAKE_STATIC_LIBRARIES=0 has no effect.")
-endif()
-
-if (NOT MAKE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
-    set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "")
-endif ()
-
-if (USE_STATIC_LIBRARIES)
-    list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
-endif ()
-
-# Implies ${WITH_COVERAGE}
-option (ENABLE_FUZZING "Fuzzy testing using libfuzzer" OFF)
-
-if (ENABLE_FUZZING)
-    message (STATUS "Fuzzing instrumentation enabled")
-    set (WITH_COVERAGE ON)
-    set (FUZZER "libfuzzer")
-endif()
-
-# Global libraries
-# See:
-# - default_libs.cmake
-# - sanitize.cmake
-add_library(global-libs INTERFACE)
-
-include (cmake/fuzzer.cmake)
-include (cmake/sanitize.cmake)
-
-if (CMAKE_GENERATOR STREQUAL "Ninja" AND NOT DISABLE_COLORED_BUILD)
-    # Turn on colored output. https://github.com/ninja-build/ninja/wiki/FAQ
-    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
-    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always")
-endif ()
-
-include (cmake/add_warning.cmake)
-
-if (NOT MSVC)
-    set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wall")    # -Werror and many more is also added inside cmake/warnings.cmake
-endif ()
-
-if (COMPILER_CLANG)
-    # clang: warning: argument unused during compilation: '-specs=/usr/share/dpkg/no-pie-compile.specs' [-Wunused-command-line-argument]
-    set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wno-unused-command-line-argument")
-    # generate ranges for fast "addr2line" search
-    if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
-        set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges")
-    endif ()
-endif ()
-
-# If turned `ON`, assumes the user has either the system GTest library or the bundled one.
-option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
-
-if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0")
-    # Only for Linux, x86_64.
-    # Implies ${ENABLE_FASTMEMCPY}
-    option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON)
-elseif(GLIBC_COMPATIBILITY)
-    message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration")
-endif ()
-
-if (NOT CMAKE_VERSION VERSION_GREATER "3.9.0")
-    message (WARNING "CMake version must be greater than 3.9.0 for production builds.")
-endif ()
-
-# Make sure the final executable has symbols exported
-set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
-
-if (OS_LINUX)
-    find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
-    if (OBJCOPY_PATH)
-        message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
-
-        if (ARCH_AMD64)
-            set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386)
-        elseif (ARCH_AARCH64)
-            set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64)
-        endif ()
-    else ()
-        message(FATAL_ERROR "Cannot find objcopy.")
-    endif ()
-endif ()
-
-if (OS_DARWIN)
-    set(WHOLE_ARCHIVE -all_load)
-    set(NO_WHOLE_ARCHIVE -noall_load)
-else ()
-    set(WHOLE_ARCHIVE --whole-archive)
-    set(NO_WHOLE_ARCHIVE --no-whole-archive)
-endif ()
-
-# Ignored if `lld` is used
-option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.")
-
-if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
-    # Can be lld or ld-lld.
-    if (LINKER_NAME MATCHES "lld$")
-        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
-        set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index")
-        message (STATUS "Adding .gdb-index via --gdb-index linker option.")
-    # we use another tool for gdb-index, because gold linker removes section .debug_aranges, which used inside clickhouse stacktraces
-    # http://sourceware-org.1504.n7.nabble.com/gold-No-debug-aranges-section-when-linking-with-gdb-index-td540965.html#a556932
-    elseif (LINKER_NAME MATCHES "gold$" AND ADD_GDB_INDEX_FOR_GOLD)
-        find_program (GDB_ADD_INDEX_EXE NAMES "gdb-add-index" DOC "Path to gdb-add-index executable")
-        if (NOT GDB_ADD_INDEX_EXE)
-            set (USE_GDB_ADD_INDEX 0)
-            message (WARNING "Cannot add gdb index to binaries, because gold linker is used, but gdb-add-index executable not found.")
-        else()
-            set (USE_GDB_ADD_INDEX 1)
-            message (STATUS "gdb-add-index found: ${GDB_ADD_INDEX_EXE}")
-        endif()
-    endif ()
-endif()
-
-# Create BuildID when using lld. For other linkers it is created by default.
-if (LINKER_NAME MATCHES "lld$")
-    # SHA1 is not cryptographically secure but it is the best what lld is offering.
-    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1")
-endif ()
-
-# Add a section with the hash of the compiled machine code for integrity checks.
-# Only for official builds, because adding a section can be time consuming (rewrite of several GB).
-# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
-if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
-    set (USE_BINARY_HASH 1)
-endif ()
-
-cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
-
-
-if(NOT AVAILABLE_PHYSICAL_MEMORY OR AVAILABLE_PHYSICAL_MEMORY GREATER 8000)
-    # Less `/tmp` usage, more RAM usage.
-    option(COMPILER_PIPE "-pipe compiler option" ON)
-endif()
-
-if(COMPILER_PIPE)
-    set(COMPILER_FLAGS "${COMPILER_FLAGS} -pipe")
-else()
-    message(STATUS "Disabling compiler -pipe option (have only ${AVAILABLE_PHYSICAL_MEMORY} mb of memory)")
-endif()
-
-if(NOT DISABLE_CPU_OPTIMIZE)
-    include(cmake/cpu_features.cmake)
-endif()
-
-option(ARCH_NATIVE "Add -march=native compiler flag")
-
-if (ARCH_NATIVE)
-    set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native")
-endif ()
-
-if (COMPILER_GCC OR COMPILER_CLANG)
-    # to make numeric_limits<__int128> works with GCC
-    set (_CXX_STANDARD "gnu++2a")
-else()
-    set (_CXX_STANDARD "c++2a")
-endif()
-
-# cmake < 3.12 doesn't support 20. We'll set CMAKE_CXX_FLAGS for now
-# set (CMAKE_CXX_STANDARD 20)
-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=${_CXX_STANDARD}")
-
-set (CMAKE_CXX_EXTENSIONS 0) # https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html#prop_tgt:CXX_EXTENSIONS
-set (CMAKE_CXX_STANDARD_REQUIRED ON)
-
-if (COMPILER_GCC OR COMPILER_CLANG)
-    # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure.
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation")
-endif ()
-
-# Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc
-option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF)
-
-if (WITH_COVERAGE AND COMPILER_CLANG)
-    set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
-    # If we want to disable coverage for specific translation units
-    set(WITHOUT_COVERAGE "-fno-profile-instr-generate -fno-coverage-mapping")
-endif()
-
-if (WITH_COVERAGE AND COMPILER_GCC)
-    set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-arcs -ftest-coverage")
-    set(COVERAGE_OPTION "-lgcov")
-    set(WITHOUT_COVERAGE "-fno-profile-arcs -fno-test-coverage")
-endif()
-
-set(COMPILER_FLAGS "${COMPILER_FLAGS}")
-
-set (CMAKE_BUILD_COLOR_MAKEFILE          ON)
-set (CMAKE_CXX_FLAGS                     "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${PLATFORM_EXTRA_CXX_FLAG} ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS}")
-set (CMAKE_CXX_FLAGS_RELWITHDEBINFO      "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_CXX_FLAGS_ADD}")
-set (CMAKE_CXX_FLAGS_DEBUG               "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_CXX_FLAGS_ADD}")
-
-set (CMAKE_C_FLAGS                       "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${COMMON_WARNING_FLAGS} ${CMAKE_C_FLAGS_ADD}")
-set (CMAKE_C_FLAGS_RELWITHDEBINFO        "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_C_FLAGS_ADD}")
-set (CMAKE_C_FLAGS_DEBUG                 "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}")
-
-if (COMPILER_CLANG)
-    if (OS_DARWIN)
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main")
-    endif()
-
-    # Display absolute paths in error messages. Otherwise KDevelop fails to navigate to correct file and opens a new file instead.
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths")
-
-    if (NOT ENABLE_TESTS AND NOT SANITIZE)
-        # https://clang.llvm.org/docs/ThinLTO.html
-        # Applies to clang only.
-        # Disabled when building with tests or sanitizers.
-        option(ENABLE_THINLTO "Clang-specific link time optimization" ON)
-    endif()
-
-    # Set new experimental pass manager, it's a performance, build time and binary size win.
-    # Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
-
-    # We cannot afford to use LTO when compiling unit tests, and it's not enough
-    # to only supply -fno-lto at the final linking stage. So we disable it
-    # completely.
-    if (ENABLE_THINLTO AND NOT ENABLE_TESTS AND NOT SANITIZE)
-        # Link time optimization
-        set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -flto=thin")
-        set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -flto=thin")
-        set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -flto=thin")
-    elseif (ENABLE_THINLTO)
-        message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot enable ThinLTO")
-    endif ()
-
-    # Always prefer llvm tools when using clang. For instance, we cannot use GNU ar when llvm LTO is enabled
-    find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8")
-
-    if (LLVM_AR_PATH)
-        message(STATUS "Using llvm-ar: ${LLVM_AR_PATH}.")
-        set (CMAKE_AR ${LLVM_AR_PATH})
-    else ()
-        message(WARNING "Cannot find llvm-ar. System ar will be used instead. It does not work with ThinLTO.")
-    endif ()
-
-    find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9" "llvm-ranlib-8")
-
-    if (LLVM_RANLIB_PATH)
-        message(STATUS "Using llvm-ranlib: ${LLVM_RANLIB_PATH}.")
-        set (CMAKE_RANLIB ${LLVM_RANLIB_PATH})
-    else ()
-        message(WARNING "Cannot find llvm-ranlib. System ranlib will be used instead. It does not work with ThinLTO.")
-    endif ()
-
-elseif (ENABLE_THINLTO)
-    message (${RECONFIGURE_MESSAGE_LEVEL} "ThinLTO is only available with CLang")
-endif ()
-
-# Turns on all external libs like s3, kafka, ODBC, ...
-option(ENABLE_LIBRARIES "Enable all external libraries by default" ON)
-
-# We recommend avoiding this mode for production builds because we can't guarantee all needed libraries exist in your
-# system.
-# This mode exists for enthusiastic developers who are searching for trouble.
-# Useful for maintainers of OS packages.
-option (UNBUNDLED "Use system libraries instead of ones in contrib/" OFF)
-
-if (UNBUNDLED)
-    set(NOT_UNBUNDLED OFF)
-else ()
-    set(NOT_UNBUNDLED ON)
-endif ()
-
-if (UNBUNDLED OR NOT (OS_LINUX OR OS_DARWIN))
-    # Using system libs can cause a lot of warnings in includes (on macro expansion).
-    option(WERROR "Enable -Werror compiler option" OFF)
-else ()
-    option(WERROR "Enable -Werror compiler option" ON)
-endif ()
-
-option(WERROR "Enable -Werror compiler option" OFF)
-
-# Make this extra-checks for correct library dependencies.
-if (OS_LINUX AND NOT SANITIZE)
-    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined")
-    set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined")
-endif ()
-
-include(cmake/dbms_glob_sources.cmake)
-
-if (OS_LINUX OR OS_ANDROID)
-    include(cmake/linux/default_libs.cmake)
-elseif (OS_DARWIN)
-    include(cmake/darwin/default_libs.cmake)
-elseif (OS_FREEBSD)
-    include(cmake/freebsd/default_libs.cmake)
-endif ()
-
-######################################
-### Add targets below this comment ###
-######################################
-
-set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX")
-
-if (MAKE_STATIC_LIBRARIES)
-    set (CMAKE_POSITION_INDEPENDENT_CODE OFF)
-    if (OS_LINUX AND NOT ARCH_ARM)
-        # Slightly more efficient code can be generated
-        # It's disabled for ARM because otherwise ClickHouse cannot run on Android.
-        set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie")
-        set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie")
-        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no-pie")
-    endif ()
-else ()
-    set (CMAKE_POSITION_INDEPENDENT_CODE ON)
-endif ()
-
-# https://github.com/include-what-you-use/include-what-you-use
-option (USE_INCLUDE_WHAT_YOU_USE "Automatically reduce unneeded includes in source code (external tool)" OFF)
-
-if (USE_INCLUDE_WHAT_YOU_USE)
-    find_program(IWYU_PATH NAMES include-what-you-use iwyu)
-    if (NOT IWYU_PATH)
-        message(FATAL_ERROR "Could not find the program include-what-you-use")
-    endif()
-    if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
-        message(FATAL_ERROR "include-what-you-use requires CMake version at least 3.3.")
-    endif()
-endif ()
-
-if (ENABLE_TESTS)
-    message (STATUS "Unit tests are enabled")
-else()
-    message(STATUS "Unit tests are disabled")
-endif ()
-
-enable_testing() # Enable for tests without binary
-
-# when installing to /usr - place configs to /etc but for /usr/local place to /usr/local/etc
-if (CMAKE_INSTALL_PREFIX STREQUAL "/usr")
-    set (CLICKHOUSE_ETC_DIR "/etc")
-else ()
-    set (CLICKHOUSE_ETC_DIR "${CMAKE_INSTALL_PREFIX}/etc")
-endif ()
-
-message (STATUS
-    "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ;
-    USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES}
-    MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES}
-    SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES}
-    UNBUNDLED=${UNBUNDLED}
-    CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}")
-
-include (GNUInstallDirs)
-include (cmake/contrib_finder.cmake)
-
-find_contrib_lib(double-conversion) # Must be before parquet
-include (cmake/find/ssl.cmake)
-include (cmake/find/ldap.cmake) # after ssl
-include (cmake/find/icu.cmake)
-include (cmake/find/zlib.cmake)
-include (cmake/find/zstd.cmake)
-include (cmake/find/ltdl.cmake) # for odbc
-# openssl, zlib before poco
-include (cmake/find/sparsehash.cmake)
-include (cmake/find/re2.cmake)
-include (cmake/find/krb5.cmake)
-include (cmake/find/libgsasl.cmake)
-include (cmake/find/cyrus-sasl.cmake)
-include (cmake/find/rdkafka.cmake)
-include (cmake/find/amqpcpp.cmake)
-include (cmake/find/capnp.cmake)
-include (cmake/find/llvm.cmake)
-include (cmake/find/termcap.cmake) # for external static llvm
-include (cmake/find/h3.cmake)
-include (cmake/find/libxml2.cmake)
-include (cmake/find/brotli.cmake)
-include (cmake/find/protobuf.cmake)
-include (cmake/find/grpc.cmake)
-include (cmake/find/pdqsort.cmake)
-include (cmake/find/miniselect.cmake)
-include (cmake/find/hdfs3.cmake) # uses protobuf
-include (cmake/find/poco.cmake)
-include (cmake/find/curl.cmake)
-include (cmake/find/s3.cmake)
-include (cmake/find/base64.cmake)
-include (cmake/find/parquet.cmake)
-include (cmake/find/simdjson.cmake)
-include (cmake/find/fast_float.cmake)
-include (cmake/find/rapidjson.cmake)
-include (cmake/find/fastops.cmake)
-include (cmake/find/odbc.cmake)
-include (cmake/find/rocksdb.cmake)
-include (cmake/find/nuraft.cmake)
-
-
-if(NOT USE_INTERNAL_PARQUET_LIBRARY)
-    set (ENABLE_ORC OFF CACHE INTERNAL "")
-endif()
-include (cmake/find/orc.cmake)
-
-include (cmake/find/avro.cmake)
-include (cmake/find/msgpack.cmake)
-include (cmake/find/cassandra.cmake)
-include (cmake/find/sentry.cmake)
-include (cmake/find/stats.cmake)
-
-set (USE_INTERNAL_CITYHASH_LIBRARY ON CACHE INTERNAL "")
-find_contrib_lib(cityhash)
-
-find_contrib_lib(farmhash)
-
-if (ENABLE_TESTS)
-    include (cmake/find/gtest.cmake)
-endif ()
-
-# Need to process before "contrib" dir:
-include (cmake/find/mysqlclient.cmake)
-
-# When testing for memory leaks with Valgrind, don't link tcmalloc or jemalloc.
-
-include (cmake/print_flags.cmake)
-
-if (TARGET global-group)
-    install (EXPORT global DESTINATION cmake)
-endif ()
-
-add_subdirectory (contrib EXCLUDE_FROM_ALL)
-
-if (NOT ENABLE_JEMALLOC)
-    message (WARNING "Non default allocator is disabled. This is not recommended for production builds.")
-endif ()
-
-macro (add_executable target)
-    # invoke built-in add_executable
-    # explicitly acquire and interpose malloc symbols by clickhouse_malloc
-    # if GLIBC_COMPATIBILITY is ON and ENABLE_THINLTO is on than provide memcpy symbol explicitly to neutrialize thinlto's libcall generation.
-    if (GLIBC_COMPATIBILITY AND ENABLE_THINLTO)
-        _add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc> $<TARGET_OBJECTS:clickhouse_memcpy>)
-    else ()
-        _add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc>)
-    endif ()
-
-    get_target_property (type ${target} TYPE)
-    if (${type} STREQUAL EXECUTABLE)
-        # disabled for TSAN and gcc since libtsan.a provides overrides too
-        if (TARGET clickhouse_new_delete)
-            # operator::new/delete for executables (MemoryTracker stuff)
-            target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES})
-        endif()
-    endif()
-endmacro()
-
-set(ConfigIncludePath ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.")
-include_directories(${ConfigIncludePath})
-
-# Add as many warnings as possible for our own code.
-include (cmake/warnings.cmake)
-
-add_subdirectory (base)
-add_subdirectory (src)
-add_subdirectory (programs)
-add_subdirectory (tests)
-add_subdirectory (utils)
-
-include (cmake/print_include_directories.cmake)
-
-include (cmake/sanitize_target_link_libraries.cmake)

From fe78b31ed4d85e17b38aa16d1f4ea31502f0dc5b Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sat, 16 Jan 2021 20:35:41 +0800
Subject: [PATCH 0106/1238] Move register to the Misc group

---
 src/Functions/FunctionFile.cpp                   | 2 +-
 src/Functions/FunctionsConversion.cpp            | 2 --
 src/Functions/registerFunctionsMiscellaneous.cpp | 2 ++
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index 1de98cc3f38..d1e35c1d31e 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -119,7 +119,7 @@ namespace DB
     };
 
 
-    void registerFunctionFromFile(FunctionFactory & factory)
+    void registerFunctionFile(FunctionFactory & factory)
     {
         factory.registerFunction<FunctionFile>();
     }
diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index a6866ce0939..c59452ebab0 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -6,7 +6,6 @@ namespace DB
 {
 
 void registerFunctionFixedString(FunctionFactory & factory);
-void registerFunctionFromFile(FunctionFactory & factory);
 
 void registerFunctionsConversion(FunctionFactory & factory)
 {
@@ -37,7 +36,6 @@ void registerFunctionsConversion(FunctionFactory & factory)
     factory.registerFunction<FunctionToString>();
 
     registerFunctionFixedString(factory);
-    registerFunctionFromFile(factory);
     
     factory.registerFunction<FunctionToUnixTimestamp>();
 
diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp
index 653922bbced..de6d093e2b0 100644
--- a/src/Functions/registerFunctionsMiscellaneous.cpp
+++ b/src/Functions/registerFunctionsMiscellaneous.cpp
@@ -67,6 +67,7 @@ void registerFunctionInitializeAggregation(FunctionFactory &);
 void registerFunctionErrorCodeToName(FunctionFactory &);
 void registerFunctionTcpPort(FunctionFactory &);
 void registerFunctionByteSize(FunctionFactory &);
+void registerFunctionFile(FunctionFactory & factory);
 
 #if USE_ICU
 void registerFunctionConvertCharset(FunctionFactory &);
@@ -134,6 +135,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
     registerFunctionErrorCodeToName(factory);
     registerFunctionTcpPort(factory);
     registerFunctionByteSize(factory);
+    registerFunctionFile(factory);
 
 #if USE_ICU
     registerFunctionConvertCharset(factory);

From 5ba67b11132457b932b8f608522d8677a9ab4228 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sun, 17 Jan 2021 02:55:07 +0800
Subject: [PATCH 0107/1238] Add test case.

---
 .../01658_read_file_to_stringcolumn.reference | 20 +++++
 .../01658_read_file_to_stringcolumn.sh        | 76 +++++++++++++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
 create mode 100755 tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh

diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
new file mode 100644
index 00000000000..82bc7c9ca90
--- /dev/null
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
@@ -0,0 +1,20 @@
+aaaaaaaaa	bbbbbbbbb
+:0
+:0
+:0
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+:0
+:107
+:79
+:35
+699415
+aaaaaaaaa	bbbbbbbbb
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+699415	0
+:0
+:107
+:79
diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
new file mode 100755
index 00000000000..1ee68b3ff11
--- /dev/null
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+set -eu
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# Data preparation
+# When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple
+echo -n aaaaaaaaa > /var/lib/clickhouse/user_files/a.txt
+echo -n bbbbbbbbb > /var/lib/clickhouse/user_files/b.txt
+echo -n ccccccccc > /var/lib/clickhouse/user_files/c.txt
+echo -n ccccccccc > /tmp/c.txt
+mkdir /var/lib/clickhouse/user_files/dir
+
+### 1st TEST in CLIENT mode.
+${CLICKHOUSE_CLIENT} --query "drop table if exists data;"
+${CLICKHOUSE_CLIENT} --query "create table data (A String, B String) engine=MergeTree() order by A;"
+
+
+# Valid cases:
+${CLICKHOUSE_CLIENT} --query "select file('a.txt'), file('b.txt');";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "insert into data select file('a.txt'), file('b.txt');";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "insert into data select file('a.txt'), file('b.txt');";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "select file('c.txt'), * from data";echo ":"$?
+
+
+# Invalid cases: (Here using sub-shell to catch exception avoiding the test quit)
+# Test non-exists file
+echo "clickhouse-client --query "'"select file('"'nonexist.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+# Test isDir
+echo "clickhouse-client --query "'"select file('"'dir'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+# Test path out of the user_files directory. It's not allowed in client mode
+echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+
+
+
+### 2nd TEST in LOCAL mode.
+
+echo -n aaaaaaaaa > a.txt
+echo -n bbbbbbbbb > b.txt
+echo -n ccccccccc > c.txt
+mkdir dir
+#Test for large files, with length : 699415
+c_count=$(wc -c ${CURDIR}/01518_nullable_aggregate_states2.reference | awk '{print $1}')
+echo $c_count
+
+# Valid cases:
+# The default dir is the CWD path in LOCAL mode
+${CLICKHOUSE_LOCAL} --query "
+	drop table if exists data;
+	create table data (A String, B String) engine=MergeTree() order by A;
+	select file('a.txt'), file('b.txt');
+	insert into data select file('a.txt'), file('b.txt');
+	insert into data select file('a.txt'), file('b.txt');
+	select file('c.txt'), * from data;
+	select file('/tmp/c.txt'), * from data;
+	select $c_count, $c_count -length(file('${CURDIR}/01518_nullable_aggregate_states2.reference'))
+"
+echo ":"$?
+
+
+# Invalid cases: (Here using sub-shell to catch exception avoiding the test quit)
+# Test non-exists file
+echo "clickhouse-local --query "'"select file('"'nonexist.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+
+# Test isDir
+echo "clickhouse-local --query "'"select file('"'dir'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+
+# Restore
+rm -rf a.txt b.txt c.txt dir
+rm -rf /var/lib/clickhouse/user_files/a.txt
+rm -rf /var/lib/clickhouse/user_files/b.txt
+rm -rf /var/lib/clickhouse/user_files/c.txt
+rm -rf /tmp/c.txt
+rm -rf /var/lib/clickhouse/user_files/dir

From 8f3cdb69e6ee9f72e8fecfd3dca4cc527903faef Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sun, 17 Jan 2021 03:07:42 +0800
Subject: [PATCH 0108/1238] Delete several spaces just formatting

---
 src/Functions/FunctionsConversion.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 mode change 100644 => 100755 src/Functions/FunctionsConversion.cpp

diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
old mode 100644
new mode 100755
index c59452ebab0..257b852ecd8
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -36,7 +36,7 @@ void registerFunctionsConversion(FunctionFactory & factory)
     factory.registerFunction<FunctionToString>();
 
     registerFunctionFixedString(factory);
-    
+
     factory.registerFunction<FunctionToUnixTimestamp>();
 
     factory.registerFunction<CastOverloadResolver<CastType::nonAccurate>>(FunctionFactory::CaseInsensitive);

From 2379902e2adf789433989abdbf241f19e052597e Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sun, 17 Jan 2021 14:27:18 +0800
Subject: [PATCH 0109/1238] Return data type revise

---
 src/Functions/FunctionFile.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index d1e35c1d31e..e84fd15fbbd 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -41,8 +41,8 @@ namespace DB
 
         DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
         {
-            if (!isStringOrFixedString(arguments[0].type))
-                throw Exception(getName() + " is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED);
+            if (!isString(arguments[0].type))
+                throw Exception(getName() + " is only implemented for types String", ErrorCodes::NOT_IMPLEMENTED);
             return std::make_shared<DataTypeString>();
         }
 
@@ -78,7 +78,7 @@ namespace DB
                 in.readStrict(res_buf, file_len);
 
                 /*
-                //Method-2: Read directly into the String buf, which avoiding one copy from PageCache to ReadBuffer
+                //Method-2(Just for reference): Read directly into the String buf, which avoiding one copy from PageCache to ReadBuffer
                 int fd;
                 if (-1 == (fd = open(file_absolute_path.c_str(), O_RDONLY)))
                      throwFromErrnoWithPath("Cannot open file " + std::string(file_absolute_path), std::string(file_absolute_path),

From b3e44f202bad10356d5640585abb1f3054c8c26d Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Mon, 18 Jan 2021 11:10:52 +0800
Subject: [PATCH 0110/1238] add back CmakeLists.txt

---
 CMakeLists.txt | 568 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 568 insertions(+)
 create mode 100644 CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000000..9002f1df140
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,568 @@
+cmake_minimum_required(VERSION 3.3)
+
+foreach(policy
+        CMP0023
+        CMP0048 # CMake 3.0
+        CMP0074 # CMake 3.12
+        CMP0077
+        CMP0079
+    )
+    if(POLICY ${policy})
+        cmake_policy(SET ${policy} NEW)
+    endif()
+endforeach()
+
+# set default policy
+foreach(default_policy_var_name
+        # make option() honor normal variables for BUILD_SHARED_LIBS:
+        # - re2
+        # - snappy
+        CMAKE_POLICY_DEFAULT_CMP0077
+        # Google Test from sources uses too old cmake, 2.6.x, and CMP0022 should
+        # set, to avoid using deprecated LINK_INTERFACE_LIBRARIES(_<CONFIG>)? over
+        # INTERFACE_LINK_LIBRARIES.
+        CMAKE_POLICY_DEFAULT_CMP0022
+    )
+    set(${default_policy_var_name} NEW)
+endforeach()
+
+project(ClickHouse)
+
+# If turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue.
+option(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION
+   "Stop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF)
+   but is not possible to satisfy" ON)
+
+if(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION)
+    set(RECONFIGURE_MESSAGE_LEVEL FATAL_ERROR)
+else()
+    set(RECONFIGURE_MESSAGE_LEVEL STATUS)
+endif()
+
+include (cmake/arch.cmake)
+include (cmake/target.cmake)
+include (cmake/tools.cmake)
+include (cmake/analysis.cmake)
+
+# Ignore export() since we don't use it,
+# but it gets broken with a global targets via link_libraries()
+macro (export)
+endmacro ()
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/")
+set(CMAKE_EXPORT_COMPILE_COMMANDS 1) # Write compile_commands.json
+set(CMAKE_LINK_DEPENDS_NO_SHARED 1) # Do not relink all depended targets on .so
+set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE)
+set(CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a postfix.")    # To be consistent with CMakeLists from contrib libs.
+
+# Enable the ability to organize targets into hierarchies of "folders" for capable GUI-based IDEs.
+# For more info see https://cmake.org/cmake/help/latest/prop_gbl/USE_FOLDERS.html
+set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+
+# Check that submodules are present only if source was downloaded with git
+if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/boost")
+    message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init --recursive")
+endif ()
+
+include (cmake/find/ccache.cmake)
+
+option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile too long or to take too much memory while compiling" OFF)
+if (ENABLE_CHECK_HEAVY_BUILDS)
+    # set DATA (since RSS does not work since 2.6.x+) to 2G
+    set (RLIMIT_DATA 5000000000)
+    # set VIRT (RLIMIT_AS) to 10G (DATA*10)
+    set (RLIMIT_AS 10000000000)
+    # gcc10/gcc10/clang -fsanitize=memory is too heavy
+    if (SANITIZE STREQUAL "memory" OR COMPILER_GCC)
+       set (RLIMIT_DATA 10000000000)
+    endif()
+    set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=600)
+endif ()
+
+if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None")
+    set (CMAKE_BUILD_TYPE "RelWithDebInfo")
+    message (STATUS "CMAKE_BUILD_TYPE is not set, set to default = ${CMAKE_BUILD_TYPE}")
+endif ()
+message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
+
+string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
+
+option(USE_STATIC_LIBRARIES "Disable to use shared libraries" ON)
+option(MAKE_STATIC_LIBRARIES "Disable to make shared libraries" ${USE_STATIC_LIBRARIES})
+
+if (NOT MAKE_STATIC_LIBRARIES)
+    # DEVELOPER ONLY.
+    # Faster linking if turned on.
+    option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files")
+
+    option(CLICKHOUSE_SPLIT_BINARY
+        "Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled")
+endif ()
+
+if (MAKE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
+    message(FATAL_ERROR "Defining SPLIT_SHARED_LIBRARIES=1 without MAKE_STATIC_LIBRARIES=0 has no effect.")
+endif()
+
+if (NOT MAKE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
+    set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "")
+endif ()
+
+if (USE_STATIC_LIBRARIES)
+    list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
+endif ()
+
+# Implies ${WITH_COVERAGE}
+option (ENABLE_FUZZING "Fuzzy testing using libfuzzer" OFF)
+
+if (ENABLE_FUZZING)
+    message (STATUS "Fuzzing instrumentation enabled")
+    set (WITH_COVERAGE ON)
+    set (FUZZER "libfuzzer")
+endif()
+
+# Global libraries
+# See:
+# - default_libs.cmake
+# - sanitize.cmake
+add_library(global-libs INTERFACE)
+
+include (cmake/fuzzer.cmake)
+include (cmake/sanitize.cmake)
+
+if (CMAKE_GENERATOR STREQUAL "Ninja" AND NOT DISABLE_COLORED_BUILD)
+    # Turn on colored output. https://github.com/ninja-build/ninja/wiki/FAQ
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
+    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always")
+endif ()
+
+include (cmake/add_warning.cmake)
+
+if (NOT MSVC)
+    set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wall")    # -Werror and many more is also added inside cmake/warnings.cmake
+endif ()
+
+if (COMPILER_CLANG)
+    # clang: warning: argument unused during compilation: '-specs=/usr/share/dpkg/no-pie-compile.specs' [-Wunused-command-line-argument]
+    set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wno-unused-command-line-argument")
+    # generate ranges for fast "addr2line" search
+    if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
+        set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges")
+    endif ()
+endif ()
+
+# If turned `ON`, assumes the user has either the system GTest library or the bundled one.
+option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
+
+if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0")
+    # Only for Linux, x86_64.
+    # Implies ${ENABLE_FASTMEMCPY}
+    option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON)
+elseif(GLIBC_COMPATIBILITY)
+    message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration")
+endif ()
+
+if (NOT CMAKE_VERSION VERSION_GREATER "3.9.0")
+    message (WARNING "CMake version must be greater than 3.9.0 for production builds.")
+endif ()
+
+# Make sure the final executable has symbols exported
+set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
+
+if (OS_LINUX)
+    find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
+    if (OBJCOPY_PATH)
+        message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
+
+        if (ARCH_AMD64)
+            set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386)
+        elseif (ARCH_AARCH64)
+            set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64)
+        endif ()
+    else ()
+        message(FATAL_ERROR "Cannot find objcopy.")
+    endif ()
+endif ()
+
+if (OS_DARWIN)
+    set(WHOLE_ARCHIVE -all_load)
+    set(NO_WHOLE_ARCHIVE -noall_load)
+else ()
+    set(WHOLE_ARCHIVE --whole-archive)
+    set(NO_WHOLE_ARCHIVE --no-whole-archive)
+endif ()
+
+# Ignored if `lld` is used
+option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.")
+
+if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
+    # Can be lld or ld-lld.
+    if (LINKER_NAME MATCHES "lld$")
+        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
+        set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index")
+        message (STATUS "Adding .gdb-index via --gdb-index linker option.")
+    # we use another tool for gdb-index, because gold linker removes section .debug_aranges, which used inside clickhouse stacktraces
+    # http://sourceware-org.1504.n7.nabble.com/gold-No-debug-aranges-section-when-linking-with-gdb-index-td540965.html#a556932
+    elseif (LINKER_NAME MATCHES "gold$" AND ADD_GDB_INDEX_FOR_GOLD)
+        find_program (GDB_ADD_INDEX_EXE NAMES "gdb-add-index" DOC "Path to gdb-add-index executable")
+        if (NOT GDB_ADD_INDEX_EXE)
+            set (USE_GDB_ADD_INDEX 0)
+            message (WARNING "Cannot add gdb index to binaries, because gold linker is used, but gdb-add-index executable not found.")
+        else()
+            set (USE_GDB_ADD_INDEX 1)
+            message (STATUS "gdb-add-index found: ${GDB_ADD_INDEX_EXE}")
+        endif()
+    endif ()
+endif()
+
+# Create BuildID when using lld. For other linkers it is created by default.
+if (LINKER_NAME MATCHES "lld$")
+    # SHA1 is not cryptographically secure but it is the best what lld is offering.
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1")
+endif ()
+
+# Add a section with the hash of the compiled machine code for integrity checks.
+# Only for official builds, because adding a section can be time consuming (rewrite of several GB).
+# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
+if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
+    set (USE_BINARY_HASH 1)
+endif ()
+
+cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
+
+
+if(NOT AVAILABLE_PHYSICAL_MEMORY OR AVAILABLE_PHYSICAL_MEMORY GREATER 8000)
+    # Less `/tmp` usage, more RAM usage.
+    option(COMPILER_PIPE "-pipe compiler option" ON)
+endif()
+
+if(COMPILER_PIPE)
+    set(COMPILER_FLAGS "${COMPILER_FLAGS} -pipe")
+else()
+    message(STATUS "Disabling compiler -pipe option (have only ${AVAILABLE_PHYSICAL_MEMORY} mb of memory)")
+endif()
+
+if(NOT DISABLE_CPU_OPTIMIZE)
+    include(cmake/cpu_features.cmake)
+endif()
+
+option(ARCH_NATIVE "Add -march=native compiler flag")
+
+if (ARCH_NATIVE)
+    set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native")
+endif ()
+
+if (COMPILER_GCC OR COMPILER_CLANG)
+    # to make numeric_limits<__int128> works with GCC
+    set (_CXX_STANDARD "gnu++2a")
+else()
+    set (_CXX_STANDARD "c++2a")
+endif()
+
+# cmake < 3.12 doesn't support 20. We'll set CMAKE_CXX_FLAGS for now
+# set (CMAKE_CXX_STANDARD 20)
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=${_CXX_STANDARD}")
+
+set (CMAKE_CXX_EXTENSIONS 0) # https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html#prop_tgt:CXX_EXTENSIONS
+set (CMAKE_CXX_STANDARD_REQUIRED ON)
+
+if (COMPILER_GCC OR COMPILER_CLANG)
+    # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure.
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation")
+endif ()
+
+# Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc
+option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF)
+
+if (WITH_COVERAGE AND COMPILER_CLANG)
+    set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
+    # If we want to disable coverage for specific translation units
+    set(WITHOUT_COVERAGE "-fno-profile-instr-generate -fno-coverage-mapping")
+endif()
+
+if (WITH_COVERAGE AND COMPILER_GCC)
+    set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-arcs -ftest-coverage")
+    set(COVERAGE_OPTION "-lgcov")
+    set(WITHOUT_COVERAGE "-fno-profile-arcs -fno-test-coverage")
+endif()
+
+set(COMPILER_FLAGS "${COMPILER_FLAGS}")
+
+set (CMAKE_BUILD_COLOR_MAKEFILE          ON)
+set (CMAKE_CXX_FLAGS                     "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${PLATFORM_EXTRA_CXX_FLAG} ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS}")
+set (CMAKE_CXX_FLAGS_RELWITHDEBINFO      "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_CXX_FLAGS_ADD}")
+set (CMAKE_CXX_FLAGS_DEBUG               "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_CXX_FLAGS_ADD}")
+
+set (CMAKE_C_FLAGS                       "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${COMMON_WARNING_FLAGS} ${CMAKE_C_FLAGS_ADD}")
+set (CMAKE_C_FLAGS_RELWITHDEBINFO        "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_C_FLAGS_ADD}")
+set (CMAKE_C_FLAGS_DEBUG                 "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}")
+
+if (COMPILER_CLANG)
+    if (OS_DARWIN)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
+        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main")
+    endif()
+
+    # Display absolute paths in error messages. Otherwise KDevelop fails to navigate to correct file and opens a new file instead.
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths")
+
+    if (NOT ENABLE_TESTS AND NOT SANITIZE)
+        # https://clang.llvm.org/docs/ThinLTO.html
+        # Applies to clang only.
+        # Disabled when building with tests or sanitizers.
+        option(ENABLE_THINLTO "Clang-specific link time optimization" ON)
+    endif()
+
+    # Set new experimental pass manager, it's a performance, build time and binary size win.
+    # Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
+
+    # We cannot afford to use LTO when compiling unit tests, and it's not enough
+    # to only supply -fno-lto at the final linking stage. So we disable it
+    # completely.
+    if (ENABLE_THINLTO AND NOT ENABLE_TESTS AND NOT SANITIZE)
+        # Link time optimization
+        set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -flto=thin")
+        set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -flto=thin")
+        set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -flto=thin")
+    elseif (ENABLE_THINLTO)
+        message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot enable ThinLTO")
+    endif ()
+
+    # Always prefer llvm tools when using clang. For instance, we cannot use GNU ar when llvm LTO is enabled
+    find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8")
+
+    if (LLVM_AR_PATH)
+        message(STATUS "Using llvm-ar: ${LLVM_AR_PATH}.")
+        set (CMAKE_AR ${LLVM_AR_PATH})
+    else ()
+        message(WARNING "Cannot find llvm-ar. System ar will be used instead. It does not work with ThinLTO.")
+    endif ()
+
+    find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9" "llvm-ranlib-8")
+
+    if (LLVM_RANLIB_PATH)
+        message(STATUS "Using llvm-ranlib: ${LLVM_RANLIB_PATH}.")
+        set (CMAKE_RANLIB ${LLVM_RANLIB_PATH})
+    else ()
+        message(WARNING "Cannot find llvm-ranlib. System ranlib will be used instead. It does not work with ThinLTO.")
+    endif ()
+
+elseif (ENABLE_THINLTO)
+    message (${RECONFIGURE_MESSAGE_LEVEL} "ThinLTO is only available with CLang")
+endif ()
+
+# Turns on all external libs like s3, kafka, ODBC, ...
+option(ENABLE_LIBRARIES "Enable all external libraries by default" ON)
+
+# We recommend avoiding this mode for production builds because we can't guarantee all needed libraries exist in your
+# system.
+# This mode exists for enthusiastic developers who are searching for trouble.
+# Useful for maintainers of OS packages.
+option (UNBUNDLED "Use system libraries instead of ones in contrib/" OFF)
+
+if (UNBUNDLED)
+    set(NOT_UNBUNDLED OFF)
+else ()
+    set(NOT_UNBUNDLED ON)
+endif ()
+
+if (UNBUNDLED OR NOT (OS_LINUX OR OS_DARWIN))
+    # Using system libs can cause a lot of warnings in includes (on macro expansion).
+    option(WERROR "Enable -Werror compiler option" OFF)
+else ()
+    option(WERROR "Enable -Werror compiler option" ON)
+endif ()
+
+if (WERROR)
+    add_warning(error)
+endif ()
+
+# Make this extra-checks for correct library dependencies.
+if (OS_LINUX AND NOT SANITIZE)
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined")
+    set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined")
+endif ()
+
+include(cmake/dbms_glob_sources.cmake)
+
+if (OS_LINUX OR OS_ANDROID)
+    include(cmake/linux/default_libs.cmake)
+elseif (OS_DARWIN)
+    include(cmake/darwin/default_libs.cmake)
+elseif (OS_FREEBSD)
+    include(cmake/freebsd/default_libs.cmake)
+endif ()
+
+######################################
+### Add targets below this comment ###
+######################################
+
+set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX")
+
+if (MAKE_STATIC_LIBRARIES)
+    set (CMAKE_POSITION_INDEPENDENT_CODE OFF)
+    if (OS_LINUX AND NOT ARCH_ARM)
+        # Slightly more efficient code can be generated
+        # It's disabled for ARM because otherwise ClickHouse cannot run on Android.
+        set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie")
+        set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie")
+        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no-pie")
+    endif ()
+else ()
+    set (CMAKE_POSITION_INDEPENDENT_CODE ON)
+endif ()
+
+# https://github.com/include-what-you-use/include-what-you-use
+option (USE_INCLUDE_WHAT_YOU_USE "Automatically reduce unneeded includes in source code (external tool)" OFF)
+
+if (USE_INCLUDE_WHAT_YOU_USE)
+    find_program(IWYU_PATH NAMES include-what-you-use iwyu)
+    if (NOT IWYU_PATH)
+        message(FATAL_ERROR "Could not find the program include-what-you-use")
+    endif()
+    if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
+        message(FATAL_ERROR "include-what-you-use requires CMake version at least 3.3.")
+    endif()
+endif ()
+
+if (ENABLE_TESTS)
+    message (STATUS "Unit tests are enabled")
+else()
+    message(STATUS "Unit tests are disabled")
+endif ()
+
+enable_testing() # Enable for tests without binary
+
+# when installing to /usr - place configs to /etc but for /usr/local place to /usr/local/etc
+if (CMAKE_INSTALL_PREFIX STREQUAL "/usr")
+    set (CLICKHOUSE_ETC_DIR "/etc")
+else ()
+    set (CLICKHOUSE_ETC_DIR "${CMAKE_INSTALL_PREFIX}/etc")
+endif ()
+
+message (STATUS
+    "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ;
+    USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES}
+    MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES}
+    SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES}
+    UNBUNDLED=${UNBUNDLED}
+    CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}")
+
+include (GNUInstallDirs)
+include (cmake/contrib_finder.cmake)
+
+find_contrib_lib(double-conversion) # Must be before parquet
+include (cmake/find/ssl.cmake)
+include (cmake/find/ldap.cmake) # after ssl
+include (cmake/find/icu.cmake)
+include (cmake/find/zlib.cmake)
+include (cmake/find/zstd.cmake)
+include (cmake/find/ltdl.cmake) # for odbc
+# openssl, zlib before poco
+include (cmake/find/sparsehash.cmake)
+include (cmake/find/re2.cmake)
+include (cmake/find/krb5.cmake)
+include (cmake/find/libgsasl.cmake)
+include (cmake/find/cyrus-sasl.cmake)
+include (cmake/find/rdkafka.cmake)
+include (cmake/find/amqpcpp.cmake)
+include (cmake/find/capnp.cmake)
+include (cmake/find/llvm.cmake)
+include (cmake/find/termcap.cmake) # for external static llvm
+include (cmake/find/h3.cmake)
+include (cmake/find/libxml2.cmake)
+include (cmake/find/brotli.cmake)
+include (cmake/find/protobuf.cmake)
+include (cmake/find/grpc.cmake)
+include (cmake/find/pdqsort.cmake)
+include (cmake/find/miniselect.cmake)
+include (cmake/find/hdfs3.cmake) # uses protobuf
+include (cmake/find/poco.cmake)
+include (cmake/find/curl.cmake)
+include (cmake/find/s3.cmake)
+include (cmake/find/base64.cmake)
+include (cmake/find/parquet.cmake)
+include (cmake/find/simdjson.cmake)
+include (cmake/find/fast_float.cmake)
+include (cmake/find/rapidjson.cmake)
+include (cmake/find/fastops.cmake)
+include (cmake/find/odbc.cmake)
+include (cmake/find/rocksdb.cmake)
+include (cmake/find/libpqxx.cmake)
+include (cmake/find/nuraft.cmake)
+
+
+if(NOT USE_INTERNAL_PARQUET_LIBRARY)
+    set (ENABLE_ORC OFF CACHE INTERNAL "")
+endif()
+include (cmake/find/orc.cmake)
+
+include (cmake/find/avro.cmake)
+include (cmake/find/msgpack.cmake)
+include (cmake/find/cassandra.cmake)
+include (cmake/find/sentry.cmake)
+include (cmake/find/stats.cmake)
+
+set (USE_INTERNAL_CITYHASH_LIBRARY ON CACHE INTERNAL "")
+find_contrib_lib(cityhash)
+
+find_contrib_lib(farmhash)
+
+if (ENABLE_TESTS)
+    include (cmake/find/gtest.cmake)
+endif ()
+
+# Need to process before "contrib" dir:
+include (cmake/find/mysqlclient.cmake)
+
+# When testing for memory leaks with Valgrind, don't link tcmalloc or jemalloc.
+
+include (cmake/print_flags.cmake)
+
+if (TARGET global-group)
+    install (EXPORT global DESTINATION cmake)
+endif ()
+
+add_subdirectory (contrib EXCLUDE_FROM_ALL)
+
+if (NOT ENABLE_JEMALLOC)
+    message (WARNING "Non default allocator is disabled. This is not recommended for production builds.")
+endif ()
+
+macro (add_executable target)
+    # invoke built-in add_executable
+    # explicitly acquire and interpose malloc symbols by clickhouse_malloc
+    # if GLIBC_COMPATIBILITY is ON and ENABLE_THINLTO is on than provide memcpy symbol explicitly to neutrialize thinlto's libcall generation.
+    if (GLIBC_COMPATIBILITY AND ENABLE_THINLTO)
+        _add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc> $<TARGET_OBJECTS:clickhouse_memcpy>)
+    else ()
+        _add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc>)
+    endif ()
+
+    get_target_property (type ${target} TYPE)
+    if (${type} STREQUAL EXECUTABLE)
+        # disabled for TSAN and gcc since libtsan.a provides overrides too
+        if (TARGET clickhouse_new_delete)
+            # operator::new/delete for executables (MemoryTracker stuff)
+            target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES})
+        endif()
+    endif()
+endmacro()
+
+set(ConfigIncludePath ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.")
+include_directories(${ConfigIncludePath})
+
+# Add as many warnings as possible for our own code.
+include (cmake/warnings.cmake)
+
+add_subdirectory (base)
+add_subdirectory (src)
+add_subdirectory (programs)
+add_subdirectory (tests)
+add_subdirectory (utils)
+
+include (cmake/print_include_directories.cmake)
+
+include (cmake/sanitize_target_link_libraries.cmake)

From 2dda8ed1e046364b63933b2b990ea27089e4d298 Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Mon, 18 Jan 2021 13:37:09 +0300
Subject: [PATCH 0111/1238] Ability to backup-restore metadata files for DiskS3
 (minor fixes)

---
 .../test_merge_tree_s3_restore/test.py         | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py
index 9f4aab9f35d..346d9aced3f 100644
--- a/tests/integration/test_merge_tree_s3_restore/test.py
+++ b/tests/integration/test_merge_tree_s3_restore/test.py
@@ -142,7 +142,7 @@ def test_full_restore(cluster):
 
     node.stop_clickhouse()
     create_restore_file(node)
-    node.start_clickhouse()
+    node.start_clickhouse(10)
 
     assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
     assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
@@ -170,7 +170,7 @@ def test_restore_another_bucket_path(cluster):
 
     node_another_bucket.stop_clickhouse()
     create_restore_file(node_another_bucket, bucket="root")
-    node_another_bucket.start_clickhouse()
+    node_another_bucket.start_clickhouse(10)
 
     assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
     assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
@@ -181,7 +181,7 @@ def test_restore_another_bucket_path(cluster):
 
     node_another_bucket_path.stop_clickhouse()
     create_restore_file(node_another_bucket_path, bucket="root2", path="data")
-    node_another_bucket_path.start_clickhouse()
+    node_another_bucket_path.start_clickhouse(10)
 
     assert node_another_bucket_path.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
     assert node_another_bucket_path.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
@@ -223,7 +223,7 @@ def test_restore_different_revisions(cluster):
     drop_s3_metadata(node_another_bucket)
     purge_s3(cluster, cluster.minio_bucket_2)
     create_restore_file(node_another_bucket, revision=revision1, bucket="root")
-    node_another_bucket.start_clickhouse()
+    node_another_bucket.start_clickhouse(10)
 
     assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
     assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
@@ -234,7 +234,7 @@ def test_restore_different_revisions(cluster):
     drop_s3_metadata(node_another_bucket)
     purge_s3(cluster, cluster.minio_bucket_2)
     create_restore_file(node_another_bucket, revision=revision2, bucket="root")
-    node_another_bucket.start_clickhouse()
+    node_another_bucket.start_clickhouse(10)
 
     assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
     assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
@@ -245,7 +245,7 @@ def test_restore_different_revisions(cluster):
     drop_s3_metadata(node_another_bucket)
     purge_s3(cluster, cluster.minio_bucket_2)
     create_restore_file(node_another_bucket, revision=revision3, bucket="root")
-    node_another_bucket.start_clickhouse()
+    node_another_bucket.start_clickhouse(10)
 
     assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4)
     assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
@@ -277,7 +277,7 @@ def test_restore_mutations(cluster):
     drop_s3_metadata(node_another_bucket)
     purge_s3(cluster, cluster.minio_bucket_2)
     create_restore_file(node_another_bucket, revision=revision_before_mutation, bucket="root")
-    node_another_bucket.start_clickhouse()
+    node_another_bucket.start_clickhouse(10)
 
     assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
     assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
@@ -288,7 +288,7 @@ def test_restore_mutations(cluster):
     drop_s3_metadata(node_another_bucket)
     purge_s3(cluster, cluster.minio_bucket_2)
     create_restore_file(node_another_bucket, revision=revision_after_mutation, bucket="root")
-    node_another_bucket.start_clickhouse()
+    node_another_bucket.start_clickhouse(10)
 
     assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2)
     assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0)
@@ -302,7 +302,7 @@ def test_restore_mutations(cluster):
     purge_s3(cluster, cluster.minio_bucket_2)
     revision = (revision_before_mutation + revision_after_mutation) // 2
     create_restore_file(node_another_bucket, revision=revision, bucket="root")
-    node_another_bucket.start_clickhouse()
+    node_another_bucket.start_clickhouse(10)
 
     # Wait for unfinished mutation completion.
     time.sleep(3)

From 689655842419acf79351d7f79b960e48a4c3af7c Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 18 Jan 2021 19:03:26 +0300
Subject: [PATCH 0112/1238] Some code movements

---
 .../ZooKeeper => Coordination}/TestKeeperStorage.cpp        | 2 +-
 src/{Common/ZooKeeper => Coordination}/TestKeeperStorage.h  | 0
 src/Coordination/tests/gtest_for_build.cpp                  | 6 ------
 src/Interpreters/Context.cpp                                | 2 +-
 src/Server/TestKeeperTCPHandler.h                           | 2 +-
 5 files changed, 3 insertions(+), 9 deletions(-)
 rename src/{Common/ZooKeeper => Coordination}/TestKeeperStorage.cpp (99%)
 rename src/{Common/ZooKeeper => Coordination}/TestKeeperStorage.h (100%)

diff --git a/src/Common/ZooKeeper/TestKeeperStorage.cpp b/src/Coordination/TestKeeperStorage.cpp
similarity index 99%
rename from src/Common/ZooKeeper/TestKeeperStorage.cpp
rename to src/Coordination/TestKeeperStorage.cpp
index daadba6519e..00ce884ae7f 100644
--- a/src/Common/ZooKeeper/TestKeeperStorage.cpp
+++ b/src/Coordination/TestKeeperStorage.cpp
@@ -1,4 +1,4 @@
-#include <Common/ZooKeeper/TestKeeperStorage.h>
+#include <Coordination/TestKeeperStorage.h>
 #include <Common/ZooKeeper/IKeeper.h>
 #include <Common/setThreadName.h>
 #include <mutex>
diff --git a/src/Common/ZooKeeper/TestKeeperStorage.h b/src/Coordination/TestKeeperStorage.h
similarity index 100%
rename from src/Common/ZooKeeper/TestKeeperStorage.h
rename to src/Coordination/TestKeeperStorage.h
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index c13c5799ff7..188565de4ce 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -215,12 +215,6 @@ TEST(CoordinationTest, TestSummingRaft3)
         std::this_thread::sleep_for(std::chrono::milliseconds(100));
     }
 
-    while (s2.state_machine->getValue() != 78)
-    {
-        std::cout << "Waiting s2 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
     while (s3.state_machine->getValue() != 78)
     {
         std::cout << "Waiting s3 to apply entry\n";
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 2a8fdce869b..d1fdcd2955b 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -12,7 +12,7 @@
 #include <Common/Stopwatch.h>
 #include <Common/formatReadable.h>
 #include <Common/thread_local_rng.h>
-#include <Common/ZooKeeper/TestKeeperStorage.h>
+#include <Coordination/TestKeeperStorage.h>
 #include <Compression/ICompressionCodec.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <Formats/FormatFactory.h>
diff --git a/src/Server/TestKeeperTCPHandler.h b/src/Server/TestKeeperTCPHandler.h
index 14e38ae6bd5..03d5ba40ab4 100644
--- a/src/Server/TestKeeperTCPHandler.h
+++ b/src/Server/TestKeeperTCPHandler.h
@@ -6,7 +6,7 @@
 #include <Interpreters/Context.h>
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
 #include <Common/ZooKeeper/ZooKeeperConstants.h>
-#include <Common/ZooKeeper/TestKeeperStorage.h>
+#include <Coordination/TestKeeperStorage.h>
 #include <IO/WriteBufferFromPocoSocket.h>
 #include <IO/ReadBufferFromPocoSocket.h>
 #include <unordered_map>

From f346a9bf8b6e1cacaee538d376da668139b995e6 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 19 Jan 2021 04:00:39 +0300
Subject: [PATCH 0113/1238] frame grammar tmp

---
 src/Interpreters/AggregateDescription.h       | 22 ++++++-
 src/Interpreters/ExpressionAnalyzer.cpp       | 14 ++++
 src/Parsers/ASTWindowDefinition.cpp           | 21 ++++--
 src/Parsers/ASTWindowDefinition.h             |  4 ++
 src/Parsers/ExpressionElementParsers.cpp      | 65 +++++++++++++++++++
 .../0_stateless/01591_window_functions.sql    | 12 ++++
 6 files changed, 132 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/AggregateDescription.h b/src/Interpreters/AggregateDescription.h
index f1fc232d04d..89d1cdf4cb4 100644
--- a/src/Interpreters/AggregateDescription.h
+++ b/src/Interpreters/AggregateDescription.h
@@ -39,6 +39,26 @@ struct WindowFunctionDescription
     std::string dump() const;
 };
 
+struct WindowFrame
+{
+    enum class FrameType { Rows, Groups, Range };
+    enum class OffsetType { Unbounded, Current, Offset };
+
+    // This flag signifies that the frame properties were not set explicitly by
+    // user, but the fields of this structure still have to contain proper values
+    // for the default frame of ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW.
+    bool is_default = true;
+
+    FrameType type = FrameType::Rows;
+
+    /*
+     * We don't need these yet.
+     * OffsetType begin_offset = Unbounded;
+
+     * OffsetType end_offset = Current;
+     */
+};
+
 struct WindowDescription
 {
     std::string window_name;
@@ -54,7 +74,7 @@ struct WindowDescription
     // then by ORDER BY. This field holds this combined sort order.
     SortDescription full_sort_description;
 
-    // No frame info as of yet.
+    WindowFrame frame;
 
     // The window functions that are calculated for this window.
     std::vector<WindowFunctionDescription> window_functions;
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 76fc0cf419f..97beff6b365 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -515,6 +515,20 @@ void makeWindowDescription(WindowDescription & desc, const IAST * ast)
     desc.full_sort_description = desc.partition_by;
     desc.full_sort_description.insert(desc.full_sort_description.end(),
         desc.order_by.begin(), desc.order_by.end());
+
+    if (definition.frame.type != WindowFrame::FrameType::Rows)
+    {
+        std::string name = definition.frame.type == WindowFrame::FrameType::Rows
+            ? "ROWS"
+            : definition.frame.type == WindowFrame::FrameType::Groups
+                ? "GROUPS" : "RANGE";
+
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+            "Window frame '{}' is not implemented (while processing '{}')",
+            name, ast->formatForErrorMessage());
+    }
+
+    desc.frame = definition.frame;
 }
 
 void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
diff --git a/src/Parsers/ASTWindowDefinition.cpp b/src/Parsers/ASTWindowDefinition.cpp
index 79a4b4bf1c6..c726629d31b 100644
--- a/src/Parsers/ASTWindowDefinition.cpp
+++ b/src/Parsers/ASTWindowDefinition.cpp
@@ -22,6 +22,8 @@ ASTPtr ASTWindowDefinition::clone() const
         result->children.push_back(result->order_by);
     }
 
+    result->frame = frame;
+
     return result;
 }
 
@@ -31,12 +33,12 @@ String ASTWindowDefinition::getID(char) const
 }
 
 void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
-    FormatState & state, FormatStateStacked frame) const
+    FormatState & state, FormatStateStacked format_frame) const
 {
     if (partition_by)
     {
         settings.ostr << "PARTITION BY ";
-        partition_by->formatImpl(settings, state, frame);
+        partition_by->formatImpl(settings, state, format_frame);
     }
 
     if (partition_by && order_by)
@@ -47,7 +49,16 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
     if (order_by)
     {
         settings.ostr << "ORDER BY ";
-        order_by->formatImpl(settings, state, frame);
+        order_by->formatImpl(settings, state, format_frame);
+    }
+
+    if (!frame.is_default)
+    {
+        const auto name = frame.type == WindowFrame::FrameType::Rows
+            ? "ROWS" : frame.type == WindowFrame::FrameType::Groups
+                ? "GROUPS" : "RANGE";
+
+        settings.ostr << name << " BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW";
     }
 }
 
@@ -56,8 +67,8 @@ std::string ASTWindowDefinition::getDefaultWindowName() const
     WriteBufferFromOwnString ostr;
     FormatSettings settings{ostr, true /* one_line */};
     FormatState state;
-    FormatStateStacked frame;
-    formatImpl(settings, state, frame);
+    FormatStateStacked format_frame;
+    formatImpl(settings, state, format_frame);
     return ostr.str();
 }
 
diff --git a/src/Parsers/ASTWindowDefinition.h b/src/Parsers/ASTWindowDefinition.h
index bf74cf809f9..fa20b74b0fc 100644
--- a/src/Parsers/ASTWindowDefinition.h
+++ b/src/Parsers/ASTWindowDefinition.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <Interpreters/AggregateDescription.h>
+
 #include <Parsers/IAST.h>
 
 
@@ -12,6 +14,8 @@ struct ASTWindowDefinition : public IAST
 
     ASTPtr order_by;
 
+    WindowFrame frame;
+
 
     ASTPtr clone() const override;
 
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 39f8a3c951c..a80f4561eb0 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -504,6 +504,65 @@ bool ParserWindowReference::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     return parser_definition.parse(pos, function->window_definition, expected);
 }
 
+static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & pos,
+    Expected & expected)
+{
+    ParserKeyword keyword_rows("ROWS");
+    ParserKeyword keyword_groups("GROUPS");
+    ParserKeyword keyword_range("RANGE");
+
+    if (keyword_rows.ignore(pos, expected))
+    {
+        node->frame.type = WindowFrame::FrameType::Rows;
+    }
+    else if (keyword_groups.ignore(pos, expected))
+    {
+        node->frame.type = WindowFrame::FrameType::Groups;
+    }
+    else if (keyword_range.ignore(pos, expected))
+    {
+        node->frame.type = WindowFrame::FrameType::Range;
+    }
+    else
+    {
+        /* No frame clause. */
+        return true;
+    }
+
+    ParserKeyword keyword_between("BETWEEN");
+    ParserKeyword keyword_unbounded("UNBOUNDED");
+    ParserKeyword keyword_preceding("PRECEDING");
+    ParserKeyword keyword_and("AND");
+    ParserKeyword keyword_current_row("CURRENT ROW");
+
+    if (!keyword_between.ignore(pos, expected))
+    {
+        return false;
+    }
+
+    if (!keyword_unbounded.ignore(pos, expected))
+    {
+        return false;
+    }
+
+    if (!keyword_preceding.ignore(pos, expected))
+    {
+        return false;
+    }
+
+    if (!keyword_and.ignore(pos, expected))
+    {
+        return false;
+    }
+
+    if (!keyword_current_row.ignore(pos, expected))
+    {
+        return false;
+    }
+
+    return true;
+}
+
 bool ParserWindowDefinition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     auto result = std::make_shared<ASTWindowDefinition>();
@@ -548,6 +607,12 @@ bool ParserWindowDefinition::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
         }
     }
 
+    if (!tryParseFrameDefinition(result.get(), pos, expected))
+    {
+        /* Broken frame definition. */
+        return false;
+    }
+
     ParserToken parser_closing_bracket(TokenType::ClosingRoundBracket);
     if (!parser_closing_bracket.ignore(pos, expected))
     {
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 95afb9be408..e4858cd6dc6 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -92,3 +92,15 @@ from numbers(10)
 window
     w1 as (partition by intDiv(number, 3))
 ;
+
+-- ROWS frame
+select
+    sum(number)
+        over (order by number rows between unbounded preceding and current row)
+from numbers(3);
+
+
+select
+    sum(number)
+        over (order by number groups between unbounded preceding and current row)
+from numbers(3);

From 8463835c41a4d13d156dede6362069c051ad0e5f Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Tue, 19 Jan 2021 11:47:40 +0800
Subject: [PATCH 0114/1238] Remove extra semicolon

---
 src/Functions/FunctionFile.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index e84fd15fbbd..c24d6aef890 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -32,7 +32,7 @@ namespace DB
     public:
         static constexpr auto name = "file";
         static FunctionPtr create(const Context &context) { return std::make_shared<FunctionFile>(context); }
-        explicit FunctionFile(const Context &context_) : context(context_) {};
+        explicit FunctionFile(const Context &context_) : context(context_) {}
 
         String getName() const override { return name; }
 

From 47fb320651dd0db9fcc27e36f5e03661c1c0a53a Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Tue, 19 Jan 2021 14:04:25 +0800
Subject: [PATCH 0115/1238] Do little fix for Style check

---
 src/Functions/FunctionFile.cpp        | 2 --
 src/Functions/FunctionsConversion.cpp | 0
 2 files changed, 2 deletions(-)
 mode change 100755 => 100644 src/Functions/FunctionsConversion.cpp

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index c24d6aef890..c493b2a2b88 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -6,7 +6,6 @@
 #include <Poco/File.h>
 #include <Poco/Path.h>
 #include <Interpreters/Context.h>
-#include <fcntl.h>
 #include <unistd.h>
 
 namespace DB
@@ -15,7 +14,6 @@ namespace DB
     namespace ErrorCodes
     {
         extern const int ILLEGAL_COLUMN;
-        extern const int TOO_LARGE_STRING_SIZE;
         extern const int NOT_IMPLEMENTED;
         extern const int FILE_DOESNT_EXIST;
         extern const int CANNOT_OPEN_FILE;
diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
old mode 100755
new mode 100644

From 6eefa7a0a04e698dcb4f6676947c033f4df949c9 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Tue, 19 Jan 2021 15:14:15 +0800
Subject: [PATCH 0116/1238] Add mkdir

---
 tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index 1ee68b3ff11..863f39e7bdf 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -7,6 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 # Data preparation
 # When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple
+mkidr -p /var/lib/clickhouse/user_files/
 echo -n aaaaaaaaa > /var/lib/clickhouse/user_files/a.txt
 echo -n bbbbbbbbb > /var/lib/clickhouse/user_files/b.txt
 echo -n ccccccccc > /var/lib/clickhouse/user_files/c.txt

From 7c7dd69a88b79c2d07f1a564f34c30a99d57afa1 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Tue, 19 Jan 2021 17:18:21 +0800
Subject: [PATCH 0117/1238] Fix mkdir

---
 tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index 863f39e7bdf..1696fc710ad 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 # Data preparation
 # When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple
-mkidr -p /var/lib/clickhouse/user_files/
+mkdir -p /var/lib/clickhouse/user_files/
 echo -n aaaaaaaaa > /var/lib/clickhouse/user_files/a.txt
 echo -n bbbbbbbbb > /var/lib/clickhouse/user_files/b.txt
 echo -n ccccccccc > /var/lib/clickhouse/user_files/c.txt

From 1063b22b4c62b498d232f8acc10017663debdf21 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 19 Jan 2021 12:40:25 +0300
Subject: [PATCH 0118/1238] Add write buffer from nuraft

---
 src/Coordination/ReadBufferFromNuraftBuffer.h | 17 +++++
 .../WriteBufferFromNuraftBuffer.cpp           | 66 +++++++++++++++++++
 .../WriteBufferFromNuraftBuffer.h             | 30 +++++++++
 src/Coordination/tests/gtest_for_build.cpp    | 37 +++++++++++
 4 files changed, 150 insertions(+)
 create mode 100644 src/Coordination/ReadBufferFromNuraftBuffer.h
 create mode 100644 src/Coordination/WriteBufferFromNuraftBuffer.cpp
 create mode 100644 src/Coordination/WriteBufferFromNuraftBuffer.h

diff --git a/src/Coordination/ReadBufferFromNuraftBuffer.h b/src/Coordination/ReadBufferFromNuraftBuffer.h
new file mode 100644
index 00000000000..392a97bdd8f
--- /dev/null
+++ b/src/Coordination/ReadBufferFromNuraftBuffer.h
@@ -0,0 +1,17 @@
+#pragma once
+#include <IO/ReadBufferFromMemory.h>
+
+#include <libnuraft/nuraft.hxx>
+
+namespace DB
+{
+
+class ReadBufferFromNuraftBuffer : public ReadBufferFromMemory
+{
+public:
+    explicit ReadBufferFromNuraftBuffer(nuraft::ptr<nuraft::buffer> buffer)
+        : ReadBufferFromMemory(buffer->data_begin(), buffer->size())
+    {}
+};
+
+}
diff --git a/src/Coordination/WriteBufferFromNuraftBuffer.cpp b/src/Coordination/WriteBufferFromNuraftBuffer.cpp
new file mode 100644
index 00000000000..09e1034ae8f
--- /dev/null
+++ b/src/Coordination/WriteBufferFromNuraftBuffer.cpp
@@ -0,0 +1,66 @@
+#include <Coordination/WriteBufferFromNuraftBuffer.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_WRITE_AFTER_END_OF_BUFFER;
+}
+
+void WriteBufferFromNuraftBuffer::nextImpl()
+{
+    if (is_finished)
+        throw Exception("WriteBufferFromNuraftBuffer is finished", ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER);
+
+    size_t old_size = buffer->size();
+    /// pos may not be equal to vector.data() + old_size, because WriteBuffer::next() can be used to flush data
+    size_t pos_offset = pos - reinterpret_cast<Position>(buffer->data_begin());
+    nuraft::ptr<nuraft::buffer> new_buffer = nuraft::buffer::alloc(old_size * size_multiplier);
+    memcpy(new_buffer->data_begin(), buffer->data_begin(), buffer->size());
+    buffer = new_buffer;
+    internal_buffer = Buffer(reinterpret_cast<Position>(buffer->data_begin() + pos_offset), reinterpret_cast<Position>(buffer->data_begin() + buffer->size()));
+    working_buffer = internal_buffer;
+}
+
+WriteBufferFromNuraftBuffer::WriteBufferFromNuraftBuffer()
+    : WriteBuffer(nullptr, 0)
+{
+    buffer = nuraft::buffer::alloc(initial_size);
+    set(reinterpret_cast<Position>(buffer->data_begin()), buffer->size());
+}
+
+void WriteBufferFromNuraftBuffer::finalize()
+{
+    if (is_finished)
+        return;
+
+    is_finished = true;
+    size_t real_size = position() - reinterpret_cast<Position>(buffer->data_begin());
+    nuraft::ptr<nuraft::buffer> new_buffer = nuraft::buffer::alloc(real_size);
+    memcpy(new_buffer->data_begin(), buffer->data_begin(), real_size);
+    buffer = new_buffer;
+
+    /// Prevent further writes.
+    set(nullptr, 0);
+}
+
+nuraft::ptr<nuraft::buffer> WriteBufferFromNuraftBuffer::getBuffer()
+{
+    finalize();
+    return buffer;
+}
+
+ WriteBufferFromNuraftBuffer::~WriteBufferFromNuraftBuffer()
+{
+    try
+    {
+        finalize();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+    }
+}
+
+}
diff --git a/src/Coordination/WriteBufferFromNuraftBuffer.h b/src/Coordination/WriteBufferFromNuraftBuffer.h
new file mode 100644
index 00000000000..47a01fbc2a4
--- /dev/null
+++ b/src/Coordination/WriteBufferFromNuraftBuffer.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <IO/WriteBuffer.h>
+#include <libnuraft/nuraft.hxx>
+
+namespace DB
+{
+
+class WriteBufferFromNuraftBuffer : public WriteBuffer
+{
+private:
+    nuraft::ptr<nuraft::buffer> buffer;
+    bool is_finished = false;
+
+    static constexpr size_t initial_size = 32;
+    static constexpr size_t size_multiplier = 2;
+
+    void nextImpl() override;
+
+public:
+    WriteBufferFromNuraftBuffer();
+
+    void finalize() override final;
+    nuraft::ptr<nuraft::buffer> getBuffer();
+    bool isFinished() const { return is_finished; }
+
+    ~WriteBufferFromNuraftBuffer() override;
+};
+
+}
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index 188565de4ce..38602e48fae 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -4,6 +4,10 @@
 #include <Coordination/InMemoryStateManager.h>
 #include <Coordination/SummingStateMachine.h>
 #include <Coordination/LoggerWrapper.h>
+#include <Coordination/WriteBufferFromNuraftBuffer.h>
+#include <Coordination/ReadBufferFromNuraftBuffer.h>
+#include <Common/ZooKeeper/ZooKeeperCommon.h>
+#include <Common/ZooKeeper/ZooKeeperIO.h>
 #include <Common/Exception.h>
 #include <libnuraft/nuraft.hxx>
 #include <thread>
@@ -26,6 +30,39 @@ TEST(CoordinationTest, BuildTest)
     EXPECT_EQ(1, 1);
 }
 
+TEST(CoordinationTest, BufferSerde)
+{
+    Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Get);
+    request->xid = 3;
+    dynamic_cast<Coordination::ZooKeeperGetRequest *>(request.get())->path = "/path/value";
+
+    DB::WriteBufferFromNuraftBuffer wbuf;
+    request->write(wbuf);
+    auto nuraft_buffer = wbuf.getBuffer();
+    EXPECT_EQ(nuraft_buffer->size(), 28);
+
+    DB::ReadBufferFromNuraftBuffer rbuf(nuraft_buffer);
+
+    int32_t length;
+    Coordination::read(length, rbuf);
+    EXPECT_EQ(length + sizeof(length), nuraft_buffer->size());
+
+    int32_t xid;
+    Coordination::read(xid, rbuf);
+    EXPECT_EQ(xid, request->xid);
+
+    Coordination::OpNum opnum;
+    Coordination::read(opnum, rbuf);
+
+    Coordination::ZooKeeperRequestPtr request_read = Coordination::ZooKeeperRequestFactory::instance().get(opnum);
+    request_read->xid = xid;
+    request_read->readImpl(rbuf);
+
+    EXPECT_EQ(request_read->getOpNum(), Coordination::OpNum::Get);
+    EXPECT_EQ(request_read->xid, 3);
+    EXPECT_EQ(dynamic_cast<Coordination::ZooKeeperGetRequest *>(request_read.get())->path, "/path/value");
+}
+
 struct SummingRaftServer
 {
     SummingRaftServer(int server_id_, const std::string & hostname_, int port_)

From 3fb50dfa1b56cea7fb831870e24a28d46459c44c Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Tue, 19 Jan 2021 15:34:27 +0300
Subject: [PATCH 0119/1238] Initial implementation of inline frames

---
 base/daemon/BaseDaemon.cpp        |   5 +-
 src/Common/Dwarf.cpp              | 691 ++++++++++++++++++++++++++----
 src/Common/Dwarf.h                | 186 +++++++-
 src/Common/StackTrace.cpp         |  22 +-
 src/Common/StackTrace.h           |   5 +-
 src/Common/tests/symbol_index.cpp |   3 +-
 src/Functions/addressToLine.cpp   |   3 +-
 7 files changed, 813 insertions(+), 102 deletions(-)

diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp
index 4cf8a8d7ce9..c51609cc171 100644
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@@ -311,7 +311,8 @@ private:
         if (stack_trace.getSize())
         {
             /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace.
-            /// NOTE This still require memory allocations and mutex lock inside logger. BTW we can also print it to stderr using write syscalls.
+            /// NOTE: This still require memory allocations and mutex lock inside logger.
+            ///       BTW we can also print it to stderr using write syscalls.
 
             std::stringstream bare_stacktrace;
             bare_stacktrace << "Stack trace:";
@@ -324,7 +325,7 @@ private:
         /// Write symbolized stack trace line by line for better grep-ability.
         stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
 
-#if defined(__linux__)
+#if defined(OS_LINUX)
         /// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
         String calculated_binary_hash = getHashOfLoadedBinaryHex();
         if (daemon.stored_binary_hash.empty())
diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp
index 7a697a2c9ef..53eb9e8ec63 100644
--- a/src/Common/Dwarf.cpp
+++ b/src/Common/Dwarf.cpp
@@ -19,8 +19,6 @@
 /** This file was edited for ClickHouse.
   */
 
-#include <optional>
-
 #include <string.h>
 
 #include <Common/Elf.h>
@@ -43,6 +41,7 @@
 #define DW_FORM_ref4 0x13
 #define DW_FORM_data8 0x07
 #define DW_FORM_ref8 0x14
+#define DW_FORM_ref_sig8 0x20
 #define DW_FORM_sdata 0x0d
 #define DW_FORM_udata 0x0f
 #define DW_FORM_ref_udata 0x15
@@ -54,9 +53,24 @@
 #define DW_FORM_strp 0x0e
 #define DW_FORM_indirect 0x16
 #define DW_TAG_compile_unit 0x11
+#define DW_TAG_subprogram 0x2e
+#define DW_TAG_try_block 0x32
+#define DW_TAG_catch_block 0x25
+#define DW_TAG_entry_point 0x03
+#define DW_TAG_common_block 0x1a
+#define DW_TAG_lexical_block 0x0b
 #define DW_AT_stmt_list 0x10
 #define DW_AT_comp_dir 0x1b
 #define DW_AT_name 0x03
+#define DW_AT_high_pc 0x12
+#define DW_AT_low_pc 0x11
+#define DW_AT_entry_pc 0x52
+#define DW_AT_ranges 0x55
+#define DW_AT_abstract_origin 0x31
+#define DW_AT_call_line 0x59
+#define DW_AT_call_file 0x58
+#define DW_AT_linkage_name 0x6e
+#define DW_AT_specification 0x47
 #define DW_LNE_define_file 0x03
 #define DW_LNS_copy 0x01
 #define DW_LNS_advance_pc 0x02
@@ -99,6 +113,10 @@ Dwarf::Section::Section(std::string_view d) : is64Bit_(false), data_(d)
 
 namespace
 {
+// Maximum number of DIEAbbreviation to cache in a compilation unit. Used to
+// speed up inline function lookup.
+const uint32_t kMaxAbbreviationEntries = 1000;
+
 // All following read* functions read from a std::string_view, advancing the
 // std::string_view, and aborting if there's not enough room.
 
@@ -371,8 +389,11 @@ void Dwarf::init()
     // Optional: fast address range lookup. If missing .debug_info can
     // be used - but it's much slower (linear scan).
     getSection(".debug_aranges", &aranges_);
+
+    getSection(".debug_ranges", &ranges_);
 }
 
+// static
 bool Dwarf::readAbbreviation(std::string_view & section, DIEAbbreviation & abbr)
 {
     // abbreviation code
@@ -384,14 +405,14 @@ bool Dwarf::readAbbreviation(std::string_view & section, DIEAbbreviation & abbr)
     abbr.tag = readULEB(section);
 
     // does this entry have children?
-    abbr.hasChildren = (read<uint8_t>(section) != DW_CHILDREN_no);
+    abbr.has_children = (read<uint8_t>(section) != DW_CHILDREN_no);
 
     // attributes
     const char * attribute_begin = section.data();
     for (;;)
     {
         SAFE_CHECK(!section.empty(), "invalid attribute section");
-        auto attr = readAttribute(section);
+        auto attr = readAttributeSpec(section);
         if (attr.name == 0 && attr.form == 0)
             break;
     }
@@ -400,11 +421,161 @@ bool Dwarf::readAbbreviation(std::string_view & section, DIEAbbreviation & abbr)
     return true;
 }
 
-Dwarf::DIEAbbreviation::Attribute Dwarf::readAttribute(std::string_view & sp)
+// static
+void Dwarf::readCompilationUnitAbbrs(std::string_view abbrev, CompilationUnit & cu)
+{
+    abbrev.remove_prefix(cu.abbrev_offset);
+
+    DIEAbbreviation abbr;
+    while (readAbbreviation(abbrev, abbr))
+    {
+        // Abbreviation code 0 is reserved for null debugging information entries.
+        if (abbr.code != 0 && abbr.code <= kMaxAbbreviationEntries)
+        {
+            cu.abbr_cache[abbr.code - 1] = abbr;
+        }
+    }
+}
+
+size_t Dwarf::forEachChild(const CompilationUnit & cu, const Die & die, std::function<bool(const Die & die)> f) const
+{
+    size_t next_die_offset = forEachAttribute(cu, die, [&](const Attribute &) { return true; });
+    if (!die.abbr.has_children)
+    {
+        return next_die_offset;
+    }
+
+    auto child_die = getDieAtOffset(cu, next_die_offset);
+    while (child_die.code != 0)
+    {
+        if (!f(child_die))
+        {
+            return child_die.offset;
+        }
+
+        // NOTE: Don't run `f` over grandchildren, just skip over them.
+        size_t sibling_offset = forEachChild(cu, child_die, [](const Die &) { return true; });
+        child_die = getDieAtOffset(cu, sibling_offset);
+    }
+
+    // childDie is now a dummy die whose offset is to the code 0 marking the
+    // end of the children. Need to add one to get the offset of the next die.
+    return child_die.offset + 1;
+}
+
+/*
+ * Iterate over all attributes of the given DIE, calling the given callable
+ * for each. Iteration is stopped early if any of the calls return false.
+ */
+size_t Dwarf::forEachAttribute(const CompilationUnit & cu, const Die & die, std::function<bool(const Attribute & die)> f) const
+{
+    auto attrs = die.abbr.attributes;
+    auto values = std::string_view{info_.data() + die.offset + die.attr_offset, cu.offset + cu.size - die.offset - die.attr_offset};
+    while (auto spec = readAttributeSpec(attrs))
+    {
+        auto attr = readAttribute(die, spec, values);
+        if (!f(attr))
+        {
+            return static_cast<size_t>(-1);
+        }
+    }
+    return values.data() - info_.data();
+}
+
+Dwarf::Attribute Dwarf::readAttribute(const Die & die, AttributeSpec spec, std::string_view & info) const
+{
+    switch (spec.form)
+    {
+        case DW_FORM_addr:
+            return {spec, die, read<uintptr_t>(info)};
+        case DW_FORM_block1:
+            return {spec, die, readBytes(info, read<uint8_t>(info))};
+        case DW_FORM_block2:
+            return {spec, die, readBytes(info, read<uint16_t>(info))};
+        case DW_FORM_block4:
+            return {spec, die, readBytes(info, read<uint32_t>(info))};
+        case DW_FORM_block:
+            [[fallthrough]];
+        case DW_FORM_exprloc:
+            return {spec, die, readBytes(info, readULEB(info))};
+        case DW_FORM_data1:
+            [[fallthrough]];
+        case DW_FORM_ref1:
+            return {spec, die, read<uint8_t>(info)};
+        case DW_FORM_data2:
+            [[fallthrough]];
+        case DW_FORM_ref2:
+            return {spec, die, read<uint16_t>(info)};
+        case DW_FORM_data4:
+            [[fallthrough]];
+        case DW_FORM_ref4:
+            return {spec, die, read<uint32_t>(info)};
+        case DW_FORM_data8:
+            [[fallthrough]];
+        case DW_FORM_ref8:
+            [[fallthrough]];
+        case DW_FORM_ref_sig8:
+            return {spec, die, read<uint64_t>(info)};
+        case DW_FORM_sdata:
+            return {spec, die, uint64_t(readSLEB(info))};
+        case DW_FORM_udata:
+            [[fallthrough]];
+        case DW_FORM_ref_udata:
+            return {spec, die, readULEB(info)};
+        case DW_FORM_flag:
+            return {spec, die, read<uint8_t>(info)};
+        case DW_FORM_flag_present:
+            return {spec, die, 1u};
+        case DW_FORM_sec_offset:
+            [[fallthrough]];
+        case DW_FORM_ref_addr:
+            return {spec, die, readOffset(info, die.is64Bit)};
+        case DW_FORM_string:
+            return {spec, die, readNullTerminated(info)};
+        case DW_FORM_strp:
+            return {spec, die, getStringFromStringSection(readOffset(info, die.is64Bit))};
+        case DW_FORM_indirect: // form is explicitly specified
+            // Update spec with the actual FORM.
+            spec.form = readULEB(info);
+            return readAttribute(die, spec, info);
+        default:
+            SAFE_CHECK(false, "invalid attribute form");
+    }
+
+    return {spec, die, 0u};
+}
+
+// static
+Dwarf::AttributeSpec Dwarf::readAttributeSpec(std::string_view & sp)
 {
     return {readULEB(sp), readULEB(sp)};
 }
 
+// static
+Dwarf::CompilationUnit Dwarf::getCompilationUnit(std::string_view info, uint64_t offset)
+{
+    SAFE_CHECK(offset < info.size(), "unexpected offset");
+    CompilationUnit cu;
+    std::string_view chunk(info);
+    cu.offset = offset;
+    chunk.remove_prefix(offset);
+
+    auto initial_length = read<uint32_t>(chunk);
+    cu.is64Bit = (initial_length == uint32_t(-1));
+    cu.size = cu.is64Bit ? read<uint64_t>(chunk) : initial_length;
+    SAFE_CHECK(cu.size <= chunk.size(), "invalid chunk size");
+    cu.size += cu.is64Bit ? 12 : 4;
+
+    cu.version = read<uint16_t>(chunk);
+    SAFE_CHECK(cu.version >= 2 && cu.version <= 4, "invalid info version");
+    cu.abbrev_offset = readOffset(chunk, cu.is64Bit);
+    cu.addr_size = read<uint8_t>(chunk);
+    SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size");
+
+    cu.first_die = chunk.data() - info.data();
+    return cu;
+}
+
 Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset) const
 {
     // Linear search in the .debug_abbrev section, starting at offset
@@ -516,104 +687,403 @@ bool Dwarf::findDebugInfoOffset(uintptr_t address, std::string_view aranges, uin
     return false;
 }
 
+Dwarf::Die Dwarf::getDieAtOffset(const CompilationUnit & cu, uint64_t offset) const
+{
+    SAFE_CHECK(offset < info_.size(), "unexpected offset");
+    Die die;
+    std::string_view sp{info_.data() + offset, cu.offset + cu.size - offset};
+    die.offset = offset;
+    die.is64Bit = cu.is64Bit;
+    auto code = readULEB(sp);
+    die.code = code;
+    if (code == 0)
+    {
+        return die;
+    }
+    die.attr_offset = sp.data() - info_.data() - offset;
+    die.abbr = !cu.abbr_cache.empty() && die.code < kMaxAbbreviationEntries ? cu.abbr_cache[die.code - 1]
+                                                                            : getAbbreviation(die.code, cu.abbrev_offset);
+
+    return die;
+}
+
+Dwarf::Die Dwarf::findDefinitionDie(const CompilationUnit & cu, const Die & die) const
+{
+    // Find the real definition instead of declaration.
+    // DW_AT_specification: Incomplete, non-defining, or separate declaration
+    // corresponding to a declaration
+    auto offset = getAttribute<uint64_t>(cu, die, DW_AT_specification);
+    if (!offset)
+    {
+        return die;
+    }
+    return getDieAtOffset(cu, cu.offset + offset.value());
+}
+
 /**
  * Find the @locationInfo for @address in the compilation unit represented
  * by the @sp .debug_info entry.
  * Returns whether the address was found.
  * Advances @sp to the next entry in .debug_info.
  */
-bool Dwarf::findLocation(uintptr_t address, std::string_view & infoEntry, LocationInfo & locationInfo) const
+bool Dwarf::findLocation(
+    uintptr_t address,
+    const LocationInfoMode mode,
+    CompilationUnit & cu,
+    LocationInfo & info,
+    std::vector<SymbolizedFrame> & inline_frames) const
 {
-    // For each compilation unit compiled with a DWARF producer, a
-    // contribution is made to the .debug_info section of the object
-    // file. Each such contribution consists of a compilation unit
-    // header (see Section 7.5.1.1) followed by a single
-    // DW_TAG_compile_unit or DW_TAG_partial_unit debugging information
-    // entry, together with its children.
-
-    // 7.5.1.1 Compilation Unit Header
-    //  1. unit_length (4B or 12B): read by Section::next
-    //  2. version (2B)
-    //  3. debug_abbrev_offset (4B or 8B): offset into the .debug_abbrev section
-    //  4. address_size (1B)
-
-    Section debug_info_section(infoEntry);
-    std::string_view chunk;
-    SAFE_CHECK(debug_info_section.next(chunk), "invalid debug info");
-
-    auto version = read<uint16_t>(chunk);
-    SAFE_CHECK(version >= 2 && version <= 4, "invalid info version");
-    uint64_t abbrev_offset = readOffset(chunk, debug_info_section.is64Bit());
-    auto address_size = read<uint8_t>(chunk);
-    SAFE_CHECK(address_size == sizeof(uintptr_t), "invalid address size");
-
-    // We survived so far. The first (and only) DIE should be DW_TAG_compile_unit
-    // NOTE: - binutils <= 2.25 does not issue DW_TAG_partial_unit.
-    //       - dwarf compression tools like `dwz` may generate it.
-    // TODO(tudorb): Handle DW_TAG_partial_unit?
-    auto code = readULEB(chunk);
-    SAFE_CHECK(code != 0, "invalid code");
-    auto abbr = getAbbreviation(code, abbrev_offset);
-    SAFE_CHECK(abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry");
-    // Skip children entries, remove_prefix to the next compilation unit entry.
-    infoEntry.remove_prefix(chunk.end() - infoEntry.begin());
+    Die die = getDieAtOffset(cu, cu.first_die);
+    // Partial compilation unit (DW_TAG_partial_unit) is not supported.
+    SAFE_CHECK(die.abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry");
 
     // Read attributes, extracting the few we care about
-    bool found_line_offset = false;
-    uint64_t line_offset = 0;
+    std::optional<uint64_t> line_offset = 0;
     std::string_view compilation_directory;
-    std::string_view main_file_name;
+    std::optional<std::string_view> main_file_name;
+    std::optional<uint64_t> base_addr_cu;
 
-    DIEAbbreviation::Attribute attr;
-    std::string_view attributes = abbr.attributes;
-    for (;;)
-    {
-        attr = readAttribute(attributes);
-        if (attr.name == 0 && attr.form == 0)
-        {
-            break;
-        }
-        auto val = readAttributeValue(chunk, attr.form, debug_info_section.is64Bit());
-        switch (attr.name)
+    forEachAttribute(cu, die, [&](const Attribute & attr) {
+        switch (attr.spec.name)
         {
             case DW_AT_stmt_list:
                 // Offset in .debug_line for the line number VM program for this
                 // compilation unit
-                line_offset = std::get<uint64_t>(val);
-                found_line_offset = true;
+                line_offset = std::get<uint64_t>(attr.attr_value);
                 break;
             case DW_AT_comp_dir:
                 // Compilation directory
-                compilation_directory = std::get<std::string_view>(val);
+                compilation_directory = std::get<std::string_view>(attr.attr_value);
                 break;
             case DW_AT_name:
                 // File name of main file being compiled
-                main_file_name = std::get<std::string_view>(val);
+                main_file_name = std::get<std::string_view>(attr.attr_value);
+                break;
+            case DW_AT_low_pc:
+            case DW_AT_entry_pc:
+                // 2.17.1: historically DW_AT_low_pc was used. DW_AT_entry_pc was
+                // introduced in DWARF3. Support either to determine the base address of
+                // the CU.
+                base_addr_cu = std::get<uint64_t>(attr.attr_value);
                 break;
         }
-    }
+        // Iterate through all attributes until find all above.
+        return true;
+    });
 
-    if (!main_file_name.empty())
+    if (main_file_name)
     {
-        locationInfo.hasMainFile = true;
-        locationInfo.mainFile = Path(compilation_directory, "", main_file_name);
+        info.has_main_file = true;
+        info.main_file = Path(compilation_directory, "", *main_file_name);
     }
 
-    if (!found_line_offset)
+    if (!line_offset)
     {
         return false;
     }
 
     std::string_view line_section(line_);
-    line_section.remove_prefix(line_offset);
+    line_section.remove_prefix(*line_offset);
     LineNumberVM line_vm(line_section, compilation_directory);
 
     // Execute line number VM program to find file and line
-    locationInfo.hasFileAndLine = line_vm.findAddress(address, locationInfo.file, locationInfo.line);
-    return locationInfo.hasFileAndLine;
+    info.has_file_and_line = line_vm.findAddress(address, info.file, info.line);
+
+    bool check_inline = (mode == LocationInfoMode::FULL_WITH_INLINE);
+
+    if (info.has_file_and_line && check_inline)
+    {
+        // Re-get the compilation unit with abbreviation cached.
+        cu.abbr_cache.clear();
+        readCompilationUnitAbbrs(abbrev_, cu);
+
+        // Find the subprogram that matches the given address.
+        Die subprogram;
+        findSubProgramDieForAddress(cu, die, address, base_addr_cu, subprogram);
+
+        // Subprogram is the DIE of caller function.
+        if (check_inline && subprogram.abbr.has_children)
+        {
+            // Use an extra location and get its call file and call line, so that
+            // they can be used for the second last location when we don't have
+            // enough inline frames for all inline functions call stack.
+            const size_t max_size = Dwarf::kMaxInlineLocationInfoPerFrame + 1;
+            std::vector<CallLocation> call_locations;
+            call_locations.reserve(Dwarf::kMaxInlineLocationInfoPerFrame + 1);
+
+            findInlinedSubroutineDieForAddress(cu, subprogram, line_vm, address, base_addr_cu, call_locations, max_size);
+            size_t num_found = call_locations.size();
+
+            if (num_found > 0)
+            {
+                const auto inner_most_file = info.file;
+                const auto inner_most_line = info.line;
+
+                // Earlier we filled in locationInfo:
+                // - mainFile: the path to the CU -- the file where the non-inlined
+                //   call is made from.
+                // - file + line: the location of the inner-most inlined call.
+                // Here we already find inlined info so mainFile would be redundant.
+                info.has_main_file = false;
+                info.main_file = Path{};
+                // @findInlinedSubroutineDieForAddress fills inlineLocations[0] with the
+                // file+line of the non-inlined outer function making the call.
+                // locationInfo.name is already set by the caller by looking up the
+                // non-inlined function @address belongs to.
+                info.has_file_and_line = true;
+                info.file = call_locations[0].file;
+                info.line = call_locations[0].line;
+
+                // The next inlined subroutine's call file and call line is the current
+                // caller's location.
+                for (size_t i = 0; i < num_found - 1; i++)
+                {
+                    call_locations[i].file = call_locations[i + 1].file;
+                    call_locations[i].line = call_locations[i + 1].line;
+                }
+                // CallLocation for the inner-most inlined function:
+                // - will be computed if enough space was available in the passed
+                //   buffer.
+                // - will have a .name, but no !.file && !.line
+                // - its corresponding file+line is the one returned by LineVM based
+                //   on @address.
+                // Use the inner-most inlined file+line info we got from the LineVM.
+                call_locations[num_found - 1].file = inner_most_file;
+                call_locations[num_found - 1].line = inner_most_line;
+
+                // Fill in inline frames in reverse order (as expected by the caller).
+                std::reverse(call_locations.begin(), call_locations.end());
+                for (const auto & call_location : call_locations)
+                {
+                    SymbolizedFrame inline_frame;
+                    inline_frame.found = true;
+                    inline_frame.addr = address;
+                    inline_frame.name = call_location.name.data();
+                    inline_frame.location.has_file_and_line = true;
+                    inline_frame.location.file = call_location.file;
+                    inline_frame.location.line = call_location.line;
+                    inline_frames.push_back(inline_frame);
+                }
+            }
+        }
+    }
+
+    return info.has_file_and_line;
 }
 
-bool Dwarf::findAddress(uintptr_t address, LocationInfo & locationInfo, LocationInfoMode mode) const
+void Dwarf::findSubProgramDieForAddress(
+    const CompilationUnit & cu, const Die & die, uint64_t address, std::optional<uint64_t> base_addr_cu, Die & subprogram) const
+{
+    forEachChild(cu, die, [&](const Die & child_die) {
+        if (child_die.abbr.tag == DW_TAG_subprogram)
+        {
+            std::optional<uint64_t> low_pc;
+            std::optional<uint64_t> high_pc;
+            std::optional<bool> is_high_pc_addr;
+            std::optional<uint64_t> range_offset;
+            forEachAttribute(cu, child_die, [&](const Attribute & attr) {
+                switch (attr.spec.name)
+                {
+                    case DW_AT_ranges:
+                        range_offset = std::get<uint64_t>(attr.attr_value);
+                        break;
+                    case DW_AT_low_pc:
+                        low_pc = std::get<uint64_t>(attr.attr_value);
+                        break;
+                    case DW_AT_high_pc:
+                        // Value of DW_AT_high_pc attribute can be an address
+                        // (DW_FORM_addr) or an offset (DW_FORM_data).
+                        is_high_pc_addr = (attr.spec.form == DW_FORM_addr);
+                        high_pc = std::get<uint64_t>(attr.attr_value);
+                        break;
+                }
+                // Iterate through all attributes until find all above.
+                return true;
+            });
+            bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc
+                && (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc));
+            bool range_match = range_offset && isAddrInRangeList(address, base_addr_cu, range_offset.value(), cu.addr_size);
+            if (pc_match || range_match)
+            {
+                subprogram = child_die;
+                return false;
+            }
+        }
+
+        findSubProgramDieForAddress(cu, child_die, address, base_addr_cu, subprogram);
+
+        // Iterates through children until find the inline subprogram.
+        return true;
+    });
+}
+
+/**
+ * Find DW_TAG_inlined_subroutine child DIEs that contain @address and
+ * then extract:
+ * - Where was it called from (DW_AT_call_file & DW_AT_call_line):
+ *   the statement or expression that caused the inline expansion.
+ * - The inlined function's name. As a function may be inlined multiple
+ *   times, common attributes like DW_AT_linkage_name or DW_AT_name
+ *   are only stored in its "concrete out-of-line instance" (a
+ *   DW_TAG_subprogram) which we find using DW_AT_abstract_origin.
+ */
+void Dwarf::findInlinedSubroutineDieForAddress(
+    const CompilationUnit & cu,
+    const Die & die,
+    const LineNumberVM & line_vm,
+    uint64_t address,
+    std::optional<uint64_t> base_addr_cu,
+    std::vector<CallLocation> & locations,
+    const size_t max_size) const
+{
+    if (locations.size() >= max_size)
+    {
+        return;
+    }
+
+    forEachChild(cu, die, [&](const Die & child_die) {
+        // Between a DW_TAG_subprogram and and DW_TAG_inlined_subroutine we might
+        // have arbitrary intermediary "nodes", including DW_TAG_common_block,
+        // DW_TAG_lexical_block, DW_TAG_try_block, DW_TAG_catch_block and
+        // DW_TAG_with_stmt, etc.
+        // We can't filter with locationhere since its range may be not specified.
+        // See section 2.6.2: A location list containing only an end of list entry
+        // describes an object that exists in the source code but not in the
+        // executable program.
+        if (child_die.abbr.tag == DW_TAG_try_block || child_die.abbr.tag == DW_TAG_catch_block || child_die.abbr.tag == DW_TAG_entry_point
+            || child_die.abbr.tag == DW_TAG_common_block || child_die.abbr.tag == DW_TAG_lexical_block)
+        {
+            findInlinedSubroutineDieForAddress(cu, child_die, line_vm, address, base_addr_cu, locations, max_size);
+            return true;
+        }
+
+        std::optional<uint64_t> low_pc;
+        std::optional<uint64_t> high_pc;
+        std::optional<bool> is_high_pc_addr;
+        std::optional<uint64_t> abstract_origin;
+        std::optional<uint64_t> abstract_origin_ref_type;
+        std::optional<uint64_t> call_file;
+        std::optional<uint64_t> call_line;
+        std::optional<uint64_t> range_offset;
+        forEachAttribute(cu, child_die, [&](const Attribute & attr) {
+            switch (attr.spec.name)
+            {
+                case DW_AT_ranges:
+                    range_offset = std::get<uint64_t>(attr.attr_value);
+                    break;
+                case DW_AT_low_pc:
+                    low_pc = std::get<uint64_t>(attr.attr_value);
+                    break;
+                case DW_AT_high_pc:
+                    // Value of DW_AT_high_pc attribute can be an address
+                    // (DW_FORM_addr) or an offset (DW_FORM_data).
+                    is_high_pc_addr = (attr.spec.form == DW_FORM_addr);
+                    high_pc = std::get<uint64_t>(attr.attr_value);
+                    break;
+                case DW_AT_abstract_origin:
+                    abstract_origin_ref_type = attr.spec.form;
+                    abstract_origin = std::get<uint64_t>(attr.attr_value);
+                    break;
+                case DW_AT_call_line:
+                    call_line = std::get<uint64_t>(attr.attr_value);
+                    break;
+                case DW_AT_call_file:
+                    call_file = std::get<uint64_t>(attr.attr_value);
+                    break;
+            }
+            // Iterate through all until find all above attributes.
+            return true;
+        });
+
+        // 2.17 Code Addresses and Ranges
+        // Any debugging information entry describing an entity that has a
+        // machine code address or range of machine code addresses,
+        // which includes compilation units, module initialization, subroutines,
+        // ordinary blocks, try/catch blocks, labels and the like, may have
+        //  - A DW_AT_low_pc attribute for a single address,
+        //  - A DW_AT_low_pc and DW_AT_high_pc pair of attributes for a
+        //    single contiguous range of addresses, or
+        //  - A DW_AT_ranges attribute for a non-contiguous range of addresses.
+        // TODO: Support DW_TAG_entry_point and DW_TAG_common_block that don't
+        // have DW_AT_low_pc/DW_AT_high_pc pairs and DW_AT_ranges.
+        // TODO: Support relocated address which requires lookup in relocation map.
+        bool pc_match
+            = low_pc && high_pc && is_high_pc_addr && address >= *low_pc && (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc));
+        bool range_match = range_offset && isAddrInRangeList(address, base_addr_cu, range_offset.value(), cu.addr_size);
+        if (!pc_match && !range_match)
+        {
+            // Address doesn't match. Keep searching other children.
+            return true;
+        }
+
+        if (!abstract_origin || !abstract_origin_ref_type || !call_line || !call_file)
+        {
+            // We expect a single sibling DIE to match on addr, but it's missing
+            // required fields. Stop searching for other DIEs.
+            return false;
+        }
+
+        CallLocation location;
+        location.file = line_vm.getFullFileName(*call_file);
+        location.line = *call_line;
+
+        auto get_function_name = [&](const CompilationUnit & srcu, uint64_t die_offset) {
+            auto decl_die = getDieAtOffset(srcu, die_offset);
+            // Jump to the actual function definition instead of declaration for name
+            // and line info.
+            auto def_die = findDefinitionDie(srcu, decl_die);
+
+            std::string_view name;
+            // The file and line will be set in the next inline subroutine based on
+            // its DW_AT_call_file and DW_AT_call_line.
+            forEachAttribute(srcu, def_die, [&](const Attribute & attr) {
+                switch (attr.spec.name)
+                {
+                    case DW_AT_linkage_name:
+                        name = std::get<std::string_view>(attr.attr_value);
+                        break;
+                    case DW_AT_name:
+                        // NOTE: when DW_AT_linkage_name and DW_AT_name match, dwarf
+                        // emitters omit DW_AT_linkage_name (to save space). If present
+                        // DW_AT_linkage_name should always be preferred (mangled C++ name
+                        // vs just the function name).
+                        if (name.empty())
+                        {
+                            name = std::get<std::string_view>(attr.attr_value);
+                        }
+                        break;
+                }
+                return true;
+            });
+            return name;
+        };
+
+        // DW_AT_abstract_origin is a reference. There a 3 types of references:
+        // - the reference can identify any debugging information entry within the
+        //   compilation unit (DW_FORM_ref1, DW_FORM_ref2, DW_FORM_ref4,
+        //   DW_FORM_ref8, DW_FORM_ref_udata). This type of reference is an offset
+        //   from the first byte of the compilation header for the compilation unit
+        //   containing the reference.
+        // - the reference can identify any debugging information entry within a
+        //   .debug_info section; in particular, it may refer to an entry in a
+        //   different compilation unit (DW_FORM_ref_addr)
+        // - the reference can identify any debugging information type entry that
+        //   has been placed in its own type unit.
+        //   Not applicable for DW_AT_abstract_origin.
+        location.name = (*abstract_origin_ref_type != DW_FORM_ref_addr)
+            ? get_function_name(cu, cu.offset + *abstract_origin)
+            : get_function_name(findCompilationUnit(info_, *abstract_origin), *abstract_origin);
+
+        locations.push_back(location);
+
+        findInlinedSubroutineDieForAddress(cu, child_die, line_vm, address, base_addr_cu, locations, max_size);
+
+        return false;
+    });
+}
+
+bool Dwarf::findAddress(
+    uintptr_t address, LocationInfo & locationInfo, LocationInfoMode mode, std::vector<SymbolizedFrame> & inline_frames) const
 {
     locationInfo = LocationInfo();
 
@@ -635,10 +1105,9 @@ bool Dwarf::findAddress(uintptr_t address, LocationInfo & locationInfo, Location
         if (findDebugInfoOffset(address, aranges_, offset))
         {
             // Read compilation unit header from .debug_info
-            std::string_view info_entry(info_);
-            info_entry.remove_prefix(offset);
-            findLocation(address, info_entry, locationInfo);
-            return locationInfo.hasFileAndLine;
+            auto unit = getCompilationUnit(info_, offset);
+            findLocation(address, mode, unit, locationInfo, inline_frames);
+            return locationInfo.has_file_and_line;
         }
         else if (mode == LocationInfoMode::FAST)
         {
@@ -650,20 +1119,92 @@ bool Dwarf::findAddress(uintptr_t address, LocationInfo & locationInfo, Location
         }
         else
         {
-            SAFE_CHECK(mode == LocationInfoMode::FULL, "unexpected mode");
+            SAFE_CHECK(mode == LocationInfoMode::FULL || mode == LocationInfoMode::FULL_WITH_INLINE, "unexpected mode");
             // Fall back to the linear scan.
         }
     }
 
     // Slow path (linear scan): Iterate over all .debug_info entries
     // and look for the address in each compilation unit.
-    std::string_view info_entry(info_);
-    while (!info_entry.empty() && !locationInfo.hasFileAndLine)
-        findLocation(address, info_entry, locationInfo);
+    uint64_t offset = 0;
+    while (offset < info_.size() && !locationInfo.has_file_and_line)
+    {
+        auto unit = getCompilationUnit(info_, offset);
+        offset += unit.size;
+        findLocation(address, mode, unit, locationInfo, inline_frames);
+    }
 
-    return locationInfo.hasFileAndLine;
+    return locationInfo.has_file_and_line;
 }
 
+bool Dwarf::isAddrInRangeList(uint64_t address, std::optional<uint64_t> base_addr, size_t offset, uint8_t addr_size) const
+{
+    SAFE_CHECK(addr_size == 4 || addr_size == 8, "wrong address size");
+    if (ranges_.empty())
+    {
+        return false;
+    }
+
+    const bool is64BitAddr = addr_size == 8;
+    std::string_view sp = ranges_;
+    sp.remove_prefix(offset);
+    const uint64_t max_addr = is64BitAddr ? std::numeric_limits<uint64_t>::max() : std::numeric_limits<uint32_t>::max();
+    while (!sp.empty())
+    {
+        uint64_t begin = readOffset(sp, is64BitAddr);
+        uint64_t end = readOffset(sp, is64BitAddr);
+        // The range list entry is a base address selection entry.
+        if (begin == max_addr)
+        {
+            base_addr = end;
+            continue;
+        }
+        // The range list entry is an end of list entry.
+        if (begin == 0 && end == 0)
+        {
+            break;
+        }
+        // Check if the given address falls in the range list entry.
+        // 2.17.3 Non-Contiguous Address Ranges
+        // The applicable base address of a range list entry is determined by the
+        // closest preceding base address selection entry (see below) in the same
+        // range list. If there is no such selection entry, then the applicable base
+        // address defaults to the base address of the compilation unit.
+        if (base_addr && address >= begin + *base_addr && address < end + *base_addr)
+        {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+// static
+Dwarf::CompilationUnit Dwarf::findCompilationUnit(std::string_view info, uint64_t targetOffset)
+{
+    SAFE_CHECK(targetOffset < info.size(), "unexpected target address");
+    uint64_t offset = 0;
+    while (offset < info.size())
+    {
+        std::string_view chunk(info);
+        chunk.remove_prefix(offset);
+
+        auto initial_length = read<uint32_t>(chunk);
+        auto is64Bit = (initial_length == uint32_t(-1));
+        auto size = is64Bit ? read<uint64_t>(chunk) : initial_length;
+        SAFE_CHECK(size <= chunk.size(), "invalid chunk size");
+        size += is64Bit ? 12 : 4;
+
+        if (offset + size > targetOffset)
+        {
+            break;
+        }
+        offset += size;
+    }
+    return getCompilationUnit(info, offset);
+}
+
+
 Dwarf::LineNumberVM::LineNumberVM(std::string_view data, std::string_view compilationDirectory)
     : compilationDirectory_(compilationDirectory)
 {
diff --git a/src/Common/Dwarf.h b/src/Common/Dwarf.h
index 40badc1c5a4..fce65648b70 100644
--- a/src/Common/Dwarf.h
+++ b/src/Common/Dwarf.h
@@ -21,9 +21,11 @@
 /** This file was edited for ClickHouse.
   */
 
+#include <optional>
 #include <string>
 #include <string_view>
 #include <variant>
+#include <vector>
 
 
 namespace DB
@@ -63,6 +65,12 @@ public:
     /** Create a DWARF parser around an ELF file. */
     explicit Dwarf(const Elf & elf);
 
+    /**
+     * More than one location info may exist if current frame is an inline
+     * function call.
+     */
+    static constexpr uint32_t kMaxInlineLocationInfoPerFrame = 10;
+
     /**
       * Represent a file path a s collection of three parts (base directory,
       * subdirectory, and file).
@@ -107,6 +115,14 @@ public:
         std::string_view file_;
     };
 
+    // Indicates inline funtion `name` is called  at `line@file`.
+    struct CallLocation
+    {
+        Path file = {};
+        uint64_t line;
+        std::string_view name;
+    };
+
     enum class LocationInfoMode
     {
         // Don't resolve location info.
@@ -115,28 +131,45 @@ public:
         FAST,
         // Scan all CU in .debug_info (slow!) on .debug_aranges lookup failure.
         FULL,
+        // Scan .debug_info (super slower, use with caution) for inline functions in
+        // addition to FULL.
+        FULL_WITH_INLINE,
     };
 
     struct LocationInfo
     {
-        bool hasMainFile = false;
-        Path mainFile;
+        bool has_main_file = false;
+        Path main_file;
 
-        bool hasFileAndLine = false;
+        bool has_file_and_line = false;
         Path file;
         uint64_t line = 0;
     };
 
+    /**
+     * Frame information: symbol name and location.
+     */
+    struct SymbolizedFrame
+    {
+        bool found = false;
+        uintptr_t addr = 0;
+        // Mangled symbol name. Use `folly::demangle()` to demangle it.
+        const char * name = nullptr;
+        LocationInfo location;
+        std::shared_ptr<Elf> file;
+
+        void clear() { *this = SymbolizedFrame(); }
+    };
+
     /** Find the file and line number information corresponding to address.
       * The address must be physical - offset in object file without offset in virtual memory where the object is loaded.
       */
-    bool findAddress(uintptr_t address, LocationInfo & info, LocationInfoMode mode) const;
+    bool findAddress(uintptr_t address, LocationInfo & info, LocationInfoMode mode, std::vector<SymbolizedFrame> & inline_frames) const;
 
 private:
     static bool findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t & offset);
 
     void init();
-    bool findLocation(uintptr_t address, std::string_view & infoEntry, LocationInfo & info) const;
 
     const Elf * elf_;
 
@@ -169,17 +202,81 @@ private:
     {
         uint64_t code;
         uint64_t tag;
-        bool hasChildren;
-
-        struct Attribute
-        {
-            uint64_t name;
-            uint64_t form;
-        };
+        bool has_children = false;
 
         std::string_view attributes;
     };
 
+    // Debugging information entry to define a low-level representation of a
+    // source program. Each debugging information entry consists of an identifying
+    // tag and a series of attributes. An entry, or group of entries together,
+    // provide a description of a corresponding entity in the source program.
+    struct Die
+    {
+        bool is64Bit;
+        // Offset from start to first attribute
+        uint8_t attr_offset;
+        // Offset within debug info.
+        uint32_t offset;
+        uint64_t code;
+        DIEAbbreviation abbr;
+    };
+
+    struct AttributeSpec
+    {
+        uint64_t name = 0;
+        uint64_t form = 0;
+
+        explicit operator bool() const { return name != 0 || form != 0; }
+    };
+
+    struct Attribute
+    {
+        AttributeSpec spec;
+        const Die & die;
+        std::variant<uint64_t, std::string_view> attr_value;
+    };
+
+    struct CompilationUnit
+    {
+        bool is64Bit;
+        uint8_t version;
+        uint8_t addr_size;
+        // Offset in .debug_info of this compilation unit.
+        uint32_t offset;
+        uint32_t size;
+        // Offset in .debug_info for the first DIE in this compilation unit.
+        uint32_t first_die;
+        uint64_t abbrev_offset;
+        // Only the CompilationUnit that contains the caller functions needs this cache.
+        // Indexed by (abbr.code - 1) if (abbr.code - 1) < abbrCache.size();
+        std::vector<DIEAbbreviation> abbr_cache;
+    };
+
+    static CompilationUnit getCompilationUnit(std::string_view info, uint64_t offset);
+
+    /** cu must exist during the life cycle of created detail::Die. */
+    Die getDieAtOffset(const CompilationUnit & cu, uint64_t offset) const;
+
+    /**
+     * Find the actual definition DIE instead of declaration for the given die.
+     */
+    Die findDefinitionDie(const CompilationUnit & cu, const Die & die) const;
+
+    bool findLocation(
+        uintptr_t address,
+        LocationInfoMode mode,
+        CompilationUnit & cu,
+        LocationInfo & info,
+        std::vector<SymbolizedFrame> & inline_frames) const;
+
+    /**
+     * Finds a subprogram debugging info entry that contains a given address among
+     * children of given die. Depth first search.
+     */
+    void findSubProgramDieForAddress(
+        const CompilationUnit & cu, const Die & die, uint64_t address, std::optional<uint64_t> base_addr_cu, Die & subprogram) const;
+
     // Interpreter for the line number bytecode VM
     class LineNumberVM
     {
@@ -188,6 +285,13 @@ private:
 
         bool findAddress(uintptr_t target, Path & file, uint64_t & line);
 
+        /** Gets full file name at given index including directory. */
+        Path getFullFileName(uint64_t index) const
+        {
+            auto fn = getFileName(index);
+            return Path({}, getIncludeDirectory(fn.directoryIndex), fn.relativeName);
+        }
+
     private:
         void init();
         void reset();
@@ -259,18 +363,50 @@ private:
         uint64_t discriminator_;
     };
 
+    /**
+     * Finds inlined subroutine DIEs and their caller lines that contains a given
+     * address among children of given die. Depth first search.
+     */
+    void findInlinedSubroutineDieForAddress(
+        const CompilationUnit & cu,
+        const Die & die,
+        const LineNumberVM & line_vm,
+        uint64_t address,
+        std::optional<uint64_t> base_addr_cu,
+        std::vector<CallLocation> & locations,
+        size_t max_size) const;
+
     // Read an abbreviation from a std::string_view, return true if at end; remove_prefix section
     static bool readAbbreviation(std::string_view & section, DIEAbbreviation & abbr);
 
+    static void readCompilationUnitAbbrs(std::string_view abbrev, CompilationUnit & cu);
+
+    /**
+     * Iterates over all children of a debugging info entry, calling the given
+     * callable for each. Iteration is stopped early if any of the calls return
+     * false. Returns the offset of next DIE after iterations.
+     */
+    size_t forEachChild(const CompilationUnit & cu, const Die & die, std::function<bool(const Die & die)> f) const;
+
     // Get abbreviation corresponding to a code, in the chunk starting at
     // offset in the .debug_abbrev section
     DIEAbbreviation getAbbreviation(uint64_t code, uint64_t offset) const;
 
+    /**
+     * Iterates over all attributes of a debugging info entry,  calling the given
+     * callable for each. If all attributes are visited, then return the offset of
+     * next DIE, or else iteration is stopped early and return size_t(-1) if any
+     * of the calls return false.
+     */
+    size_t forEachAttribute(const CompilationUnit & cu, const Die & die, std::function<bool(const Attribute & die)> f) const;
+
+    Attribute readAttribute(const Die & die, AttributeSpec spec, std::string_view & info) const;
+
     // Read one attribute <name, form> pair, remove_prefix sp; returns <0, 0> at end.
-    static DIEAbbreviation::Attribute readAttribute(std::string_view & sp);
+    static AttributeSpec readAttributeSpec(std::string_view & sp);
 
     // Read one attribute value, remove_prefix sp
-    typedef std::variant<uint64_t, std::string_view> AttributeValue;
+    using AttributeValue = std::variant<uint64_t, std::string_view>;
     AttributeValue readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const;
 
     // Get an ELF section by name, return true if found
@@ -279,11 +415,33 @@ private:
     // Get a string from the .debug_str section
     std::string_view getStringFromStringSection(uint64_t offset) const;
 
+    template <class T>
+    std::optional<T> getAttribute(const CompilationUnit & cu, const Die & die, uint64_t attr_name) const
+    {
+        std::optional<T> result;
+        forEachAttribute(cu, die, [&](const Attribute & attr) {
+            if (attr.spec.name == attr_name)
+            {
+                result = std::get<T>(attr.attr_value);
+                return false;
+            }
+            return true;
+        });
+        return result;
+    }
+
+    // Check if the given address is in the range list at the given offset in .debug_ranges.
+    bool isAddrInRangeList(uint64_t address, std::optional<uint64_t> base_addr, size_t offset, uint8_t addr_size) const;
+
+    // Finds the Compilation Unit starting at offset.
+    static CompilationUnit findCompilationUnit(std::string_view info, uint64_t targetOffset);
+
     std::string_view info_; // .debug_info
     std::string_view abbrev_; // .debug_abbrev
     std::string_view aranges_; // .debug_aranges
     std::string_view line_; // .debug_line
     std::string_view strings_; // .debug_str
+    std::string_view ranges_; // .debug_ranges
 };
 
 }
diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp
index b285a45bdc5..88d3a66ba72 100644
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@@ -220,7 +220,9 @@ void StackTrace::symbolize(const StackTrace::FramePointers & frame_pointers, siz
                 auto dwarf_it = dwarfs.try_emplace(object->name, *object->elf).first;
 
                 DB::Dwarf::LocationInfo location;
-                if (dwarf_it->second.findAddress(uintptr_t(current_frame.physical_addr), location, DB::Dwarf::LocationInfoMode::FAST))
+                std::vector<DB::Dwarf::SymbolizedFrame> inline_frames;
+                if (dwarf_it->second.findAddress(
+                        uintptr_t(current_frame.physical_addr), location, DB::Dwarf::LocationInfoMode::FAST, inline_frames))
                 {
                     current_frame.file = location.file.toString();
                     current_frame.line = location.line;
@@ -311,7 +313,11 @@ const StackTrace::FramePointers & StackTrace::getFramePointers() const
 }
 
 static void toStringEveryLineImpl(
-    const StackTrace::FramePointers & frame_pointers, size_t offset, size_t size, std::function<void(const std::string &)> callback)
+    bool fatal,
+    const StackTrace::FramePointers & frame_pointers,
+    size_t offset,
+    size_t size,
+    std::function<void(const std::string &)> callback)
 {
     if (size == 0)
         return callback("<Empty trace>");
@@ -321,7 +327,7 @@ static void toStringEveryLineImpl(
     const DB::SymbolIndex & symbol_index = *symbol_index_ptr;
     std::unordered_map<std::string, DB::Dwarf> dwarfs;
 
-    std::stringstream out;      // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+    std::stringstream out;  // STYLE_CHECK_ALLOW_STD_STRING_STREAM
     out.exceptions(std::ios::failbit);
 
     for (size_t i = offset; i < size; ++i)
@@ -340,7 +346,9 @@ static void toStringEveryLineImpl(
                 auto dwarf_it = dwarfs.try_emplace(object->name, *object->elf).first;
 
                 DB::Dwarf::LocationInfo location;
-                if (dwarf_it->second.findAddress(uintptr_t(physical_addr), location, DB::Dwarf::LocationInfoMode::FAST))
+                std::vector<DB::Dwarf::SymbolizedFrame> inline_frames;  // TODO: mix with StackTrace frames
+                auto mode = fatal ? DB::Dwarf::LocationInfoMode::FULL_WITH_INLINE : DB::Dwarf::LocationInfoMode::FAST;
+                if (dwarf_it->second.findAddress(uintptr_t(physical_addr), location, mode, inline_frames))
                     out << location.file.toString() << ":" << location.line << ": ";
             }
         }
@@ -361,7 +369,7 @@ static void toStringEveryLineImpl(
         out.str({});
     }
 #else
-    std::stringstream out;      // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+    std::stringstream out;  // STYLE_CHECK_ALLOW_STD_STRING_STREAM
     out.exceptions(std::ios::failbit);
 
     for (size_t i = offset; i < size; ++i)
@@ -379,13 +387,13 @@ static std::string toStringImpl(const StackTrace::FramePointers & frame_pointers
 {
     std::stringstream out;      // STYLE_CHECK_ALLOW_STD_STRING_STREAM
     out.exceptions(std::ios::failbit);
-    toStringEveryLineImpl(frame_pointers, offset, size, [&](const std::string & str) { out << str << '\n'; });
+    toStringEveryLineImpl(false, frame_pointers, offset, size, [&](const std::string & str) { out << str << '\n'; });
     return out.str();
 }
 
 void StackTrace::toStringEveryLine(std::function<void(const std::string &)> callback) const
 {
-    toStringEveryLineImpl(frame_pointers, offset, size, std::move(callback));
+    toStringEveryLineImpl(true, frame_pointers, offset, size, std::move(callback));
 }
 
 
diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h
index 3ae4b964838..26def2f32b2 100644
--- a/src/Common/StackTrace.h
+++ b/src/Common/StackTrace.h
@@ -43,10 +43,10 @@ public:
 
     /// Tries to capture stack trace. Fallbacks on parsing caller address from
     /// signal context if no stack trace could be captured
-    StackTrace(const ucontext_t & signal_context);
+    explicit StackTrace(const ucontext_t & signal_context);
 
     /// Creates empty object for deferred initialization
-    StackTrace(NoCapture);
+    explicit StackTrace(NoCapture);
 
     size_t getSize() const;
     size_t getOffset() const;
@@ -57,6 +57,7 @@ public:
     static void symbolize(const FramePointers & frame_pointers, size_t offset, size_t size, StackTrace::Frames & frames);
 
     void toStringEveryLine(std::function<void(const std::string &)> callback) const;
+
 protected:
     void tryCapture();
 
diff --git a/src/Common/tests/symbol_index.cpp b/src/Common/tests/symbol_index.cpp
index 3811bbbdd71..bb634bee49e 100644
--- a/src/Common/tests/symbol_index.cpp
+++ b/src/Common/tests/symbol_index.cpp
@@ -50,7 +50,8 @@ int main(int argc, char ** argv)
     Dwarf dwarf(*object->elf);
 
     Dwarf::LocationInfo location;
-    if (dwarf.findAddress(uintptr_t(address) - uintptr_t(info.dli_fbase), location, Dwarf::LocationInfoMode::FAST))
+    std::vector<Dwarf::SymbolizedFrame> frames;
+    if (dwarf.findAddress(uintptr_t(address) - uintptr_t(info.dli_fbase), location, Dwarf::LocationInfoMode::FAST, frames))
         std::cerr << location.file.toString() << ":" << location.line << "\n";
     else
         std::cerr << "Dwarf: Not found\n";
diff --git a/src/Functions/addressToLine.cpp b/src/Functions/addressToLine.cpp
index 59e347dd348..6f529de77ed 100644
--- a/src/Functions/addressToLine.cpp
+++ b/src/Functions/addressToLine.cpp
@@ -116,7 +116,8 @@ private:
                 return {};
 
             Dwarf::LocationInfo location;
-            if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, Dwarf::LocationInfoMode::FAST))
+            std::vector<Dwarf::SymbolizedFrame> frames;  // NOTE: not used in FAST mode.
+            if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, Dwarf::LocationInfoMode::FAST, frames))
             {
                 const char * arena_begin = nullptr;
                 WriteBufferFromArena out(cache.arena, arena_begin);

From 4879adb527554afeaa0cb1bf3e20049b0e3309ee Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 20 Jan 2021 08:27:42 +0300
Subject: [PATCH 0120/1238] Avoid mixing output from parallel test runs.

Also mark some tests as sequential-only.
---
 tests/clickhouse-test                         | 88 +++++++++++--------
 ...ionary_invalidate_query_switchover_long.sh | 34 +++----
 ...em_reload_dictionary_reloads_completely.sh | 34 +++----
 ...dictionary_attribute_properties_values.sql | 22 ++---
 .../01045_dictionaries_restrictions.sql       | 12 +--
 .../0_stateless/01451_detach_drop_part.sql    | 44 +++++-----
 tests/queries/skip_list.json                  |  5 +-
 7 files changed, 128 insertions(+), 111 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 78affcf8da0..14a979db349 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -274,7 +274,9 @@ def run_tests_array(all_tests_with_params):
 
     def print_test_time(test_time):
         if args.print_time:
-            print(" {0:.2f} sec.".format(test_time), end='')
+            return " {0:.2f} sec.".format(test_time)
+        else:
+            return ''
 
     if len(all_tests):
         print("\nRunning {} {} tests.".format(len(all_tests), suite) + "\n")
@@ -291,36 +293,43 @@ def run_tests_array(all_tests_with_params):
         (name, ext) = os.path.splitext(case)
 
         try:
-            sys.stdout.flush()
-            sys.stdout.write("{0:72}".format(name + ": "))
-            # This flush is needed so you can see the test name of the long running test before it will finish.
-            sys.stdout.flush()
+            status = ''
+            is_concurrent = multiprocessing.current_process().name != "MainProcess";
+            if not is_concurrent:
+                sys.stdout.flush()
+                sys.stdout.write("{0:72}".format(name + ": "))
+                # This flush is needed so you can see the test name of the long
+                # running test before it will finish. But don't do it in parallel
+                # mode, so that the lines don't mix.
+                sys.stdout.flush()
+            else:
+                status = "{0:72}".format(name + ": ");
 
             if args.skip and any(s in name for s in args.skip):
-                print(MSG_SKIPPED + " - skip")
+                status += MSG_SKIPPED + " - skip\n"
                 skipped_total += 1
             elif not args.zookeeper and ('zookeeper' in name
                     or 'replica' in name):
-                print(MSG_SKIPPED + " - no zookeeper")
+                status += MSG_SKIPPED + " - no zookeeper\n"
                 skipped_total += 1
             elif not args.shard and ('shard' in name
                     or 'distributed' in name
                     or 'global' in name):
-                print(MSG_SKIPPED + " - no shard")
+                status += MSG_SKIPPED + " - no shard\n"
                 skipped_total += 1
             elif not args.no_long and ('long' in name
                     # Tests for races and deadlocks usually are runned in loop
                     #  for significant amount of time
                     or 'deadlock' in name
                     or 'race' in name):
-                print(MSG_SKIPPED + " - no long")
+                status += MSG_SKIPPED + " - no long\n"
                 skipped_total += 1
             else:
                 disabled_file = os.path.join(suite_dir, name) + '.disabled'
 
                 if os.path.exists(disabled_file) and not args.disabled:
                     message = open(disabled_file, 'r').read()
-                    print(MSG_SKIPPED + " - " + message)
+                    status += MSG_SKIPPED + " - " + message + "\n"
                 else:
 
                     if args.testname:
@@ -347,11 +356,11 @@ def run_tests_array(all_tests_with_params):
                                 raise
 
                         failures += 1
-                        print(MSG_FAIL, end='')
-                        print_test_time(total_time)
-                        print(" - Timeout!")
+                        status += MSG_FAIL
+                        status += print_test_time(total_time)
+                        status += " - Timeout!\n"
                         if stderr:
-                            print(stderr)
+                            status += stderr
                     else:
                         counter = 1
                         while proc.returncode != 0 and need_retry(stderr):
@@ -364,12 +373,12 @@ def run_tests_array(all_tests_with_params):
                         if proc.returncode != 0:
                             failures += 1
                             failures_chain += 1
-                            print(MSG_FAIL, end='')
-                            print_test_time(total_time)
-                            print(" - return code {}".format(proc.returncode))
+                            status += MSG_FAIL
+                            status += print_test_time(total_time)
+                            status += f' - return code {proc.returncode}\n'
 
                             if stderr:
-                                print(stderr)
+                                status += stderr
 
                             # Stop on fatal errors like segmentation fault. They are sent to client via logs.
                             if ' <Fatal> ' in stderr:
@@ -379,46 +388,51 @@ def run_tests_array(all_tests_with_params):
                                 SERVER_DIED = True
 
                             if os.path.isfile(stdout_file):
-                                print(", result:\n")
-                                print('\n'.join(open(stdout_file).read().split('\n')[:100]))
+                                status += ", result:\n\n"
+                                status += '\n'.join(
+                                    open(stdout_file).read().split('\n')[:100])
+                                status += '\n';
 
                         elif stderr:
                             failures += 1
                             failures_chain += 1
-                            print(MSG_FAIL, end='')
-                            print_test_time(total_time)
-                            print(" - having stderror:\n{}".format(
-                                '\n'.join(stderr.split('\n')[:100])))
+                            status += MSG_FAIL
+                            status += print_test_time(total_time)
+                            status += " - having stderror:\n{}\n".format(
+                                '\n'.join(stderr.split('\n')[:100]))
                         elif 'Exception' in stdout:
                             failures += 1
                             failures_chain += 1
-                            print(MSG_FAIL, end='')
-                            print_test_time(total_time)
-                            print(" - having exception:\n{}".format(
-                                '\n'.join(stdout.split('\n')[:100])))
+                            status += MSG_FAIL
+                            status += print_test_time(total_time)
+                            status += " - having exception:\n{}\n".format(
+                                '\n'.join(stdout.split('\n')[:100]))
                         elif not os.path.isfile(reference_file):
-                            print(MSG_UNKNOWN, end='')
-                            print_test_time(total_time)
-                            print(" - no reference file")
+                            status += MSG_UNKNOWN
+                            status += print_test_time(total_time)
+                            status += " - no reference file\n"
                         else:
                             result_is_different = subprocess.call(['diff', '-q', reference_file, stdout_file], stdout=PIPE)
 
                             if result_is_different:
                                 diff = Popen(['diff', '-U', str(args.unified), reference_file, stdout_file], stdout=PIPE, universal_newlines=True).communicate()[0]
                                 failures += 1
-                                print(MSG_FAIL, end='')
-                                print_test_time(total_time)
-                                print(" - result differs with reference:\n{}".format(diff))
+                                status += MSG_FAIL
+                                status += print_test_time(total_time)
+                                status += " - result differs with reference:\n{}\n".format(diff)
                             else:
                                 passed_total += 1
                                 failures_chain = 0
-                                print(MSG_OK, end='')
-                                print_test_time(total_time)
-                                print()
+                                status += MSG_OK
+                                status += print_test_time(total_time)
+                                status += "\n"
                                 if os.path.exists(stdout_file):
                                     os.remove(stdout_file)
                                 if os.path.exists(stderr_file):
                                     os.remove(stderr_file)
+
+            sys.stdout.write(status)
+            sys.stdout.flush()
         except KeyboardInterrupt as e:
             print(colored("Break tests execution", args, "red"))
             raise e
diff --git a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh
index 93a807a923e..66571f456d9 100755
--- a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh
+++ b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh
@@ -5,12 +5,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 
-$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb"
+$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb_01041_01040"
 
-$CLICKHOUSE_CLIENT --query "CREATE DATABASE dictdb"
+$CLICKHOUSE_CLIENT --query "CREATE DATABASE dictdb_01041_01040"
 
 $CLICKHOUSE_CLIENT --query "
-CREATE TABLE dictdb.dict_invalidate
+CREATE TABLE dictdb_01041_01040.dict_invalidate
 ENGINE = Memory AS
 SELECT
     122 as dummy,
@@ -19,31 +19,31 @@ FROM system.one"
 
 
 $CLICKHOUSE_CLIENT --query "
-CREATE DICTIONARY dictdb.invalidate
+CREATE DICTIONARY dictdb_01041_01040.invalidate
 (
   dummy UInt64,
   two UInt8 EXPRESSION dummy
 )
 PRIMARY KEY dummy
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_invalidate' DB 'dictdb' INVALIDATE_QUERY 'select max(last_time) from dictdb.dict_invalidate'))
+SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_invalidate' DB 'dictdb_01041_01040' INVALIDATE_QUERY 'select max(last_time) from dictdb_01041_01040.dict_invalidate'))
 LIFETIME(MIN 0 MAX 1)
 LAYOUT(FLAT())"
 
-$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('dictdb.invalidate', 'two', toUInt64(122))"
+$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('dictdb_01041_01040.invalidate', 'two', toUInt64(122))"
 
 # No exception happened
-$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'"
+$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'"
 
-$CLICKHOUSE_CLIENT --query "DROP TABLE dictdb.dict_invalidate"
+$CLICKHOUSE_CLIENT --query "DROP TABLE dictdb_01041_01040.dict_invalidate"
 
 function check_exception_detected()
 {
 
-    query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'" 2>&1)
+    query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1)
 
     while [ -z "$query_result" ]
     do
-        query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'" 2>&1)
+        query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1)
         sleep 0.1
     done
 }
@@ -52,10 +52,10 @@ function check_exception_detected()
 export -f check_exception_detected;
 timeout 30 bash -c check_exception_detected 2> /dev/null
 
-$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'" 2>&1 | grep -Eo "Table dictdb.dict_invalidate .* exist"
+$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1 | grep -Eo "Table dictdb_01041_01040.dict_invalidate .* exist"
 
 $CLICKHOUSE_CLIENT --query "
-CREATE TABLE dictdb.dict_invalidate
+CREATE TABLE dictdb_01041_01040.dict_invalidate
 ENGINE = Memory AS
 SELECT
     133 as dummy,
@@ -64,11 +64,11 @@ FROM system.one"
 
 function check_exception_fixed()
 {
-    query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'" 2>&1)
+    query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1)
 
     while [ "$query_result" ]
     do
-        query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'" 2>&1)
+        query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1)
         sleep 0.1
     done
 }
@@ -77,7 +77,7 @@ export -f check_exception_fixed;
 # it may take a while until dictionary reloads
 timeout 60 bash -c check_exception_fixed 2> /dev/null
 
-$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'" 2>&1
-$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('dictdb.invalidate', 'two', toUInt64(133))"
+$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1
+$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('dictdb_01041_01040.invalidate', 'two', toUInt64(133))"
 
-$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb"
+$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb_01041_01040"
diff --git a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh
index b466b863f3b..512d12866c4 100755
--- a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh
+++ b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh
@@ -8,40 +8,40 @@ set -e -o pipefail
 
 # Run the client.
 $CLICKHOUSE_CLIENT --multiquery <<'EOF'
-DROP DATABASE IF EXISTS dictdb;
-CREATE DATABASE dictdb;
-CREATE TABLE dictdb.table(x Int64, y Int64, insert_time DateTime) ENGINE = MergeTree ORDER BY tuple();
-INSERT INTO dictdb.table VALUES (12, 102, now());
+DROP DATABASE IF EXISTS dictdb_01042;
+CREATE DATABASE dictdb_01042;
+CREATE TABLE dictdb_01042.table(x Int64, y Int64, insert_time DateTime) ENGINE = MergeTree ORDER BY tuple();
+INSERT INTO dictdb_01042.table VALUES (12, 102, now());
 
-CREATE DICTIONARY dictdb.dict
+CREATE DICTIONARY dictdb_01042.dict
 (
   x Int64 DEFAULT -1,
   y Int64 DEFAULT -1,
   insert_time DateTime
 )
 PRIMARY KEY x
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table' DB 'dictdb' UPDATE_FIELD 'insert_time'))
+SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table' DB 'dictdb_01042' UPDATE_FIELD 'insert_time'))
 LAYOUT(FLAT())
 LIFETIME(1);
 EOF
 
-$CLICKHOUSE_CLIENT --query "SELECT '12 -> ', dictGetInt64('dictdb.dict', 'y', toUInt64(12))"
+$CLICKHOUSE_CLIENT --query "SELECT '12 -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(12))"
 
-$CLICKHOUSE_CLIENT --query "INSERT INTO dictdb.table VALUES (13, 103, now())"
-$CLICKHOUSE_CLIENT --query "INSERT INTO dictdb.table VALUES (14, 104, now() - INTERVAL 1 DAY)"
+$CLICKHOUSE_CLIENT --query "INSERT INTO dictdb_01042.table VALUES (13, 103, now())"
+$CLICKHOUSE_CLIENT --query "INSERT INTO dictdb_01042.table VALUES (14, 104, now() - INTERVAL 1 DAY)"
 
-while [ "$(${CLICKHOUSE_CLIENT} --query "SELECT dictGetInt64('dictdb.dict', 'y', toUInt64(13))")" = -1 ]
+while [ "$(${CLICKHOUSE_CLIENT} --query "SELECT dictGetInt64('dictdb_01042.dict', 'y', toUInt64(13))")" = -1 ]
     do
         sleep 0.5
     done
 
-$CLICKHOUSE_CLIENT --query "SELECT '13 -> ', dictGetInt64('dictdb.dict', 'y', toUInt64(13))"
-$CLICKHOUSE_CLIENT --query "SELECT '14 -> ', dictGetInt64('dictdb.dict', 'y', toUInt64(14))"
+$CLICKHOUSE_CLIENT --query "SELECT '13 -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(13))"
+$CLICKHOUSE_CLIENT --query "SELECT '14 -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(14))"
 
-$CLICKHOUSE_CLIENT --query "SYSTEM RELOAD DICTIONARY 'dictdb.dict'"
+$CLICKHOUSE_CLIENT --query "SYSTEM RELOAD DICTIONARY 'dictdb_01042.dict'"
 
-$CLICKHOUSE_CLIENT --query "SELECT '12(r) -> ', dictGetInt64('dictdb.dict', 'y', toUInt64(12))"
-$CLICKHOUSE_CLIENT --query "SELECT '13(r) -> ', dictGetInt64('dictdb.dict', 'y', toUInt64(13))"
-$CLICKHOUSE_CLIENT --query "SELECT '14(r) -> ', dictGetInt64('dictdb.dict', 'y', toUInt64(14))"
+$CLICKHOUSE_CLIENT --query "SELECT '12(r) -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(12))"
+$CLICKHOUSE_CLIENT --query "SELECT '13(r) -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(13))"
+$CLICKHOUSE_CLIENT --query "SELECT '14(r) -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(14))"
 
-$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb"
+$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb_01042"
diff --git a/tests/queries/0_stateless/01043_dictionary_attribute_properties_values.sql b/tests/queries/0_stateless/01043_dictionary_attribute_properties_values.sql
index 5e448862603..5e150cfed9c 100644
--- a/tests/queries/0_stateless/01043_dictionary_attribute_properties_values.sql
+++ b/tests/queries/0_stateless/01043_dictionary_attribute_properties_values.sql
@@ -1,11 +1,11 @@
-DROP DATABASE IF EXISTS dictdb;
-CREATE DATABASE dictdb;
+DROP DATABASE IF EXISTS dictdb_01043;
+CREATE DATABASE dictdb_01043;
 
-CREATE TABLE dictdb.dicttbl(key Int64, value_default String, value_expression String) ENGINE = MergeTree ORDER BY tuple();
-INSERT INTO dictdb.dicttbl VALUES (12, 'hello', '55:66:77');
+CREATE TABLE dictdb_01043.dicttbl(key Int64, value_default String, value_expression String) ENGINE = MergeTree ORDER BY tuple();
+INSERT INTO dictdb_01043.dicttbl VALUES (12, 'hello', '55:66:77');
 
 
-CREATE DICTIONARY dictdb.dict
+CREATE DICTIONARY dictdb_01043.dict
 (
   key Int64 DEFAULT -1,
   value_default String DEFAULT 'world',
@@ -13,15 +13,15 @@ CREATE DICTIONARY dictdb.dict
 
 )
 PRIMARY KEY key
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dicttbl' DB 'dictdb'))
+SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dicttbl' DB 'dictdb_01043'))
 LAYOUT(FLAT())
 LIFETIME(1);
 
 
-SELECT dictGetString('dictdb.dict', 'value_default', toUInt64(12));
-SELECT dictGetString('dictdb.dict', 'value_default', toUInt64(14));
+SELECT dictGetString('dictdb_01043.dict', 'value_default', toUInt64(12));
+SELECT dictGetString('dictdb_01043.dict', 'value_default', toUInt64(14));
 
-SELECT dictGetString('dictdb.dict', 'value_expression', toUInt64(12));
-SELECT dictGetString('dictdb.dict', 'value_expression', toUInt64(14));
+SELECT dictGetString('dictdb_01043.dict', 'value_expression', toUInt64(12));
+SELECT dictGetString('dictdb_01043.dict', 'value_expression', toUInt64(14));
 
-DROP DATABASE IF EXISTS dictdb;
+DROP DATABASE IF EXISTS dictdb_01043;
diff --git a/tests/queries/0_stateless/01045_dictionaries_restrictions.sql b/tests/queries/0_stateless/01045_dictionaries_restrictions.sql
index 909e2fe8ad4..0bc2f6f9f13 100644
--- a/tests/queries/0_stateless/01045_dictionaries_restrictions.sql
+++ b/tests/queries/0_stateless/01045_dictionaries_restrictions.sql
@@ -1,8 +1,8 @@
-DROP DATABASE IF EXISTS dictdb;
+DROP DATABASE IF EXISTS dictdb_01045;
 
-CREATE DATABASE dictdb;
+CREATE DATABASE dictdb_01045;
 
-CREATE DICTIONARY dictdb.restricted_dict (
+CREATE DICTIONARY dictdb_01045.restricted_dict (
   key UInt64,
   value String
 )
@@ -12,10 +12,10 @@ LIFETIME(MIN 0 MAX 1)
 LAYOUT(CACHE(SIZE_IN_CELLS 10));
 
 -- because of lazy load we can check only in dictGet query
-select dictGetString('dictdb.restricted_dict', 'value', toUInt64(1));  -- {serverError 482}
+select dictGetString('dictdb_01045.restricted_dict', 'value', toUInt64(1));  -- {serverError 482}
 
 select 'Ok.';
 
-DROP DICTIONARY IF EXISTS dictdb.restricted_dict;
+DROP DICTIONARY IF EXISTS dictdb_01045.restricted_dict;
 
-DROP DATABASE IF EXISTS dictdb;
+DROP DATABASE IF EXISTS dictdb_01045;
diff --git a/tests/queries/0_stateless/01451_detach_drop_part.sql b/tests/queries/0_stateless/01451_detach_drop_part.sql
index 84973da5f25..d70f4e37de4 100644
--- a/tests/queries/0_stateless/01451_detach_drop_part.sql
+++ b/tests/queries/0_stateless/01451_detach_drop_part.sql
@@ -1,42 +1,42 @@
-DROP TABLE IF EXISTS mt;
+DROP TABLE IF EXISTS mt_01451;
 
-CREATE TABLE mt (v UInt8) ENGINE = MergeTree() order by tuple();
-SYSTEM STOP MERGES mt;
+CREATE TABLE mt_01451 (v UInt8) ENGINE = MergeTree() order by tuple();
+SYSTEM STOP MERGES mt_01451;
 
-INSERT INTO mt VALUES (0);
-INSERT INTO mt VALUES (1);
-INSERT INTO mt VALUES (2);
+INSERT INTO mt_01451 VALUES (0);
+INSERT INTO mt_01451 VALUES (1);
+INSERT INTO mt_01451 VALUES (2);
 
-SELECT v FROM mt ORDER BY v;
+SELECT v FROM mt_01451 ORDER BY v;
 
-ALTER TABLE mt DETACH PART 'all_100_100_0'; -- { serverError 232 }
+ALTER TABLE mt_01451 DETACH PART 'all_100_100_0'; -- { serverError 232 }
 
-ALTER TABLE mt DETACH PART 'all_2_2_0';
+ALTER TABLE mt_01451 DETACH PART 'all_2_2_0';
 
-SELECT v FROM mt ORDER BY v;
+SELECT v FROM mt_01451 ORDER BY v;
 
-SELECT name FROM system.detached_parts WHERE table = 'mt';
+SELECT name FROM system.detached_parts WHERE table = 'mt_01451';
 
-ALTER TABLE mt ATTACH PART 'all_2_2_0';
+ALTER TABLE mt_01451 ATTACH PART 'all_2_2_0';
 
-SELECT v FROM mt ORDER BY v;
+SELECT v FROM mt_01451 ORDER BY v;
 
-SELECT name FROM system.detached_parts WHERE table = 'mt';
+SELECT name FROM system.detached_parts WHERE table = 'mt_01451';
 
 SELECT '-- drop part --';
 
-ALTER TABLE mt DROP PART 'all_4_4_0';
+ALTER TABLE mt_01451 DROP PART 'all_4_4_0';
 
-ALTER TABLE mt ATTACH PART 'all_4_4_0'; -- { serverError 233 }
+ALTER TABLE mt_01451 ATTACH PART 'all_4_4_0'; -- { serverError 233 }
 
-SELECT v FROM mt ORDER BY v;
+SELECT v FROM mt_01451 ORDER BY v;
 
 SELECT '-- resume merges --';
-SYSTEM START MERGES mt;
-OPTIMIZE TABLE mt FINAL;
+SYSTEM START MERGES mt_01451;
+OPTIMIZE TABLE mt_01451 FINAL;
 
-SELECT v FROM mt ORDER BY v;
+SELECT v FROM mt_01451 ORDER BY v;
 
-SELECT name FROM system.parts WHERE table = 'mt' AND active;
+SELECT name FROM system.parts WHERE table = 'mt_01451' AND active;
 
-DROP TABLE mt;
+DROP TABLE mt_01451;
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index c2e8d1263f1..568577edff2 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -476,12 +476,15 @@
         "01494_storage_join_persistency",
         "01516_drop_table_stress",
         "01541_max_memory_usage_for_user",
+        "01646_system_restart_replicas_smoke", // system restart replicas is a global query
+        "01600_count_of_parts_metrics", // tests global system metrics
         "attach",
         "ddl_dictionaries",
         "dictionary",
         "limit_memory",
         "live_view",
         "memory_leak",
-        "memory_limit"
+        "memory_limit",
+        "polygon_dicts" // they use an explicitly specified database
     ]
 }

From 6e4b9aa862bb1f364d3a7d5f03d8db241f257d67 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 20 Jan 2021 10:10:54 +0300
Subject: [PATCH 0121/1238] don't use simple transform

---
 src/Processors/Transforms/WindowTransform.cpp | 95 +++++++++++++++++--
 src/Processors/Transforms/WindowTransform.h   | 27 +++++-
 .../01591_window_functions.reference          |  8 ++
 .../0_stateless/01591_window_functions.sql    |  8 +-
 4 files changed, 126 insertions(+), 12 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index b200e306213..aac7c336c84 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -10,15 +10,15 @@ namespace DB
 WindowTransform::WindowTransform(const Block & input_header_,
         const Block & output_header_,
         const WindowDescription & window_description_,
-        const std::vector<WindowFunctionDescription> & window_function_descriptions
-        )
-    : ISimpleTransform(input_header_, output_header_,
-        false /* skip_empty_chunks */)
+        const std::vector<WindowFunctionDescription> & functions)
+    : IProcessor({input_header_}, {output_header_})
+    , input(inputs.front())
+    , output(outputs.front())
     , input_header(input_header_)
     , window_description(window_description_)
 {
-    workspaces.reserve(window_function_descriptions.size());
-    for (const auto & f : window_function_descriptions)
+    workspaces.reserve(functions.size());
+    for (const auto & f : functions)
     {
         WindowFunctionWorkspace workspace;
         workspace.window_function = f;
@@ -186,4 +186,87 @@ void WindowTransform::transform(Chunk & chunk)
     chunk.setColumns(std::move(columns), num_rows);
 }
 
+IProcessor::Status WindowTransform::prepare()
+{
+    /// Check can output.
+    if (output.isFinished())
+    {
+        input.close();
+        return Status::Finished;
+    }
+
+    if (!output.canPush())
+    {
+        input.setNotNeeded();
+        return Status::PortFull;
+    }
+
+    /// Output if has data.
+    if (has_output)
+    {
+        output.pushData(std::move(output_data));
+        has_output = false;
+
+        return Status::PortFull;
+    }
+
+    /// Check can input.
+    if (!has_input)
+    {
+        if (input.isFinished())
+        {
+            output.finish();
+            return Status::Finished;
+        }
+
+        input.setNeeded();
+
+        if (!input.hasData())
+            return Status::NeedData;
+
+        input_data = input.pullData(true /* set_not_needed */);
+        has_input = true;
+
+        if (input_data.exception)
+        {
+            /// No more data needed. Exception will be thrown (or swallowed) later.
+            input.setNotNeeded();
+        }
+    }
+
+    /// Now transform.
+    return Status::Ready;
+}
+
+void WindowTransform::work()
+{
+    if (input_data.exception)
+    {
+        /// Skip transform in case of exception.
+        output_data = std::move(input_data);
+        has_input = false;
+        has_output = true;
+        return;
+    }
+
+    try
+    {
+        transform(input_data.chunk);
+        output_data.chunk.swap(input_data.chunk);
+    }
+    catch (DB::Exception &)
+    {
+        output_data.exception = std::current_exception();
+        has_output = true;
+        has_input = false;
+        return;
+    }
+
+    has_input = false;
+
+    if (output_data.chunk)
+        has_output = true;
+}
+
+
 }
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 3d284263171..ce7723846c9 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -29,7 +29,7 @@ struct WindowFunctionWorkspace
  * Computes several window functions that share the same window. The input must
  * be sorted correctly for this window (PARTITION BY, then ORDER BY).
  */
-class WindowTransform : public ISimpleTransform
+class WindowTransform : public IProcessor /* public ISimpleTransform */
 {
 public:
     WindowTransform(
@@ -48,9 +48,32 @@ public:
 
     static Block transformHeader(Block header, const ExpressionActionsPtr & expression);
 
-    void transform(Chunk & chunk) override;
+    /*
+     * (former) Implemetation of ISimpleTransform.
+     */
+    void transform(Chunk & chunk) /*override*/;
+
+    /*
+     * Implementation of IProcessor;
+     */
+    Status prepare() override;
+    void work() override;
 
 public:
+    /*
+     * Data (formerly) inherited from ISimpleTransform.
+     */
+    InputPort & input;
+    OutputPort & output;
+
+    bool has_input = false;
+    Port::Data input_data;
+    bool has_output = false;
+    Port::Data output_data;
+
+    /*
+     * Data for window transform itself.
+     */
     Block input_header;
 
     WindowDescription window_description;
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 45cb4ac3994..3b4405ff865 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -222,3 +222,11 @@ window
 13	13
 21	21
 9	9
+-- ROWS frame
+select
+    sum(number)
+        over (order by number rows between unbounded preceding and current row)
+from numbers(3);
+0
+1
+3
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index e4858cd6dc6..c942befa658 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -100,7 +100,7 @@ select
 from numbers(3);
 
 
-select
-    sum(number)
-        over (order by number groups between unbounded preceding and current row)
-from numbers(3);
+--select
+--    sum(number)
+--        over (order by number groups between unbounded preceding and current row)
+--from numbers(3);

From 2bb28fbc14f7667d6ab6e3ef942595054a1a4621 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Wed, 20 Jan 2021 16:03:25 +0300
Subject: [PATCH 0122/1238] Print inline frames augmenting usual ones

---
 src/Common/Dwarf.cpp              |  7 ++++---
 src/Common/Dwarf.h                |  8 ++++----
 src/Common/StackTrace.cpp         | 14 +++++++++++---
 src/Common/SymbolIndex.h          |  2 +-
 src/Common/tests/symbol_index.cpp |  2 +-
 src/Functions/addressToLine.cpp   |  2 +-
 6 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp
index 53eb9e8ec63..14e6e1072b6 100644
--- a/src/Common/Dwarf.cpp
+++ b/src/Common/Dwarf.cpp
@@ -98,7 +98,7 @@ namespace ErrorCodes
 }
 
 
-Dwarf::Dwarf(const Elf & elf) : elf_(&elf)
+Dwarf::Dwarf(const std::shared_ptr<Elf> & elf) : elf_(elf)
 {
     init();
 }
@@ -176,7 +176,7 @@ uint64_t readOffset(std::string_view & sp, bool is64Bit)
 // Read "len" bytes
 std::string_view readBytes(std::string_view & sp, uint64_t len)
 {
-    SAFE_CHECK(len >= sp.size(), "invalid string length");
+    SAFE_CHECK(len <= sp.size(), "invalid string length: " + std::to_string(len) + " vs. " + std::to_string(sp.size()));
     std::string_view ret(sp.data(), len);
     sp.remove_prefix(len);
     return ret;
@@ -382,7 +382,7 @@ void Dwarf::init()
         || !getSection(".debug_line", &line_)
         || !getSection(".debug_str", &strings_))
     {
-        elf_ = nullptr;
+        elf_.reset();
         return;
     }
 
@@ -795,6 +795,7 @@ bool Dwarf::findLocation(
     {
         // Re-get the compilation unit with abbreviation cached.
         cu.abbr_cache.clear();
+        cu.abbr_cache.resize(kMaxAbbreviationEntries);
         readCompilationUnitAbbrs(abbrev_, cu);
 
         // Find the subprogram that matches the given address.
diff --git a/src/Common/Dwarf.h b/src/Common/Dwarf.h
index fce65648b70..065ef6e3f5b 100644
--- a/src/Common/Dwarf.h
+++ b/src/Common/Dwarf.h
@@ -63,7 +63,7 @@ class Dwarf final
     // be live for as long as the passed-in Elf is live.
 public:
     /** Create a DWARF parser around an ELF file. */
-    explicit Dwarf(const Elf & elf);
+    explicit Dwarf(const std::shared_ptr<Elf> & elf);
 
     /**
      * More than one location info may exist if current frame is an inline
@@ -78,7 +78,7 @@ public:
     class Path
     {
     public:
-        Path() {}
+        Path() = default;
 
         Path(std::string_view baseDir, std::string_view subDir, std::string_view file);
 
@@ -156,7 +156,7 @@ public:
         // Mangled symbol name. Use `folly::demangle()` to demangle it.
         const char * name = nullptr;
         LocationInfo location;
-        std::shared_ptr<Elf> file;
+        std::shared_ptr<const Elf> file;
 
         void clear() { *this = SymbolizedFrame(); }
     };
@@ -171,7 +171,7 @@ private:
 
     void init();
 
-    const Elf * elf_;
+    std::shared_ptr<const Elf> elf_;
 
     // DWARF section made up of chunks, each prefixed with a length header.
     // The length indicates whether the chunk is DWARF-32 or DWARF-64, which
diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp
index 88d3a66ba72..b1032786eca 100644
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@@ -217,7 +217,7 @@ void StackTrace::symbolize(const StackTrace::FramePointers & frame_pointers, siz
             current_frame.object = object->name;
             if (std::filesystem::exists(current_frame.object.value()))
             {
-                auto dwarf_it = dwarfs.try_emplace(object->name, *object->elf).first;
+                auto dwarf_it = dwarfs.try_emplace(object->name, object->elf).first;
 
                 DB::Dwarf::LocationInfo location;
                 std::vector<DB::Dwarf::SymbolizedFrame> inline_frames;
@@ -332,6 +332,7 @@ static void toStringEveryLineImpl(
 
     for (size_t i = offset; i < size; ++i)
     {
+        std::vector<DB::Dwarf::SymbolizedFrame> inline_frames;
         const void * virtual_addr = frame_pointers[i];
         const auto * object = symbol_index.findObject(virtual_addr);
         uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0;
@@ -343,10 +344,9 @@ static void toStringEveryLineImpl(
         {
             if (std::filesystem::exists(object->name))
             {
-                auto dwarf_it = dwarfs.try_emplace(object->name, *object->elf).first;
+                auto dwarf_it = dwarfs.try_emplace(object->name, object->elf).first;
 
                 DB::Dwarf::LocationInfo location;
-                std::vector<DB::Dwarf::SymbolizedFrame> inline_frames;  // TODO: mix with StackTrace frames
                 auto mode = fatal ? DB::Dwarf::LocationInfoMode::FULL_WITH_INLINE : DB::Dwarf::LocationInfoMode::FAST;
                 if (dwarf_it->second.findAddress(uintptr_t(physical_addr), location, mode, inline_frames))
                     out << location.file.toString() << ":" << location.line << ": ";
@@ -365,6 +365,14 @@ static void toStringEveryLineImpl(
         out << " @ " << physical_addr;
         out << " in " << (object ? object->name : "?");
 
+        for (size_t j = 0; j < inline_frames.size(); ++j)
+        {
+            const auto & frame = inline_frames[j];
+            int status = 0;
+            callback(fmt::format("{}.{}. inlined from {}:{}: {}",
+                     i, j+1, frame.location.file.toString(), frame.location.line, demangle(frame.name, status)));
+        }
+
         callback(out.str());
         out.str({});
     }
diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h
index b310f90988e..65e446a7fc4 100644
--- a/src/Common/SymbolIndex.h
+++ b/src/Common/SymbolIndex.h
@@ -36,7 +36,7 @@ public:
         const void * address_begin;
         const void * address_end;
         std::string name;
-        std::unique_ptr<Elf> elf;
+        std::shared_ptr<Elf> elf;
     };
 
     /// Address in virtual memory should be passed. These addresses include offset where the object is loaded in memory.
diff --git a/src/Common/tests/symbol_index.cpp b/src/Common/tests/symbol_index.cpp
index bb634bee49e..496fa7dc3fe 100644
--- a/src/Common/tests/symbol_index.cpp
+++ b/src/Common/tests/symbol_index.cpp
@@ -47,7 +47,7 @@ int main(int argc, char ** argv)
         std::cerr << "dladdr: Not found\n";
 
     const auto * object = symbol_index.findObject(getAddress());
-    Dwarf dwarf(*object->elf);
+    Dwarf dwarf(object->elf);
 
     Dwarf::LocationInfo location;
     std::vector<Dwarf::SymbolizedFrame> frames;
diff --git a/src/Functions/addressToLine.cpp b/src/Functions/addressToLine.cpp
index 6f529de77ed..a115b13e54a 100644
--- a/src/Functions/addressToLine.cpp
+++ b/src/Functions/addressToLine.cpp
@@ -111,7 +111,7 @@ private:
 
         if (const auto * object = symbol_index.findObject(reinterpret_cast<const void *>(addr)))
         {
-            auto dwarf_it = cache.dwarfs.try_emplace(object->name, *object->elf).first;
+            auto dwarf_it = cache.dwarfs.try_emplace(object->name, object->elf).first;
             if (!std::filesystem::exists(object->name))
                 return {};
 

From d5a3adffbd5159845dd522c1d3df2070e6a840e4 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 20 Jan 2021 19:25:30 +0300
Subject: [PATCH 0123/1238] Replicate something in test keeper storage with
 raft

---
 src/Common/ya.make                            |   2 -
 src/Coordination/InMemoryLogStore.cpp         |   3 +-
 src/Coordination/ReadBufferFromNuraftBuffer.h |   3 +
 src/Coordination/SummingStateMachine.cpp      |   6 +-
 .../TestKeeperStorageDispatcher.cpp           |   2 +-
 .../TestKeeperStorageDispatcher.h             |   2 +-
 .../WriteBufferFromNuraftBuffer.cpp           |   2 +-
 src/Coordination/tests/gtest_for_build.cpp    | 142 ++++++++++++++++--
 8 files changed, 139 insertions(+), 23 deletions(-)
 rename src/{Common/ZooKeeper => Coordination}/TestKeeperStorageDispatcher.cpp (98%)
 rename src/{Common/ZooKeeper => Coordination}/TestKeeperStorageDispatcher.h (96%)

diff --git a/src/Common/ya.make b/src/Common/ya.make
index 4f2f1892a88..a17b57ebb04 100644
--- a/src/Common/ya.make
+++ b/src/Common/ya.make
@@ -84,8 +84,6 @@ SRCS(
     WeakHash.cpp
     ZooKeeper/IKeeper.cpp
     ZooKeeper/TestKeeper.cpp
-    ZooKeeper/TestKeeperStorage.cpp
-    ZooKeeper/TestKeeperStorageDispatcher.cpp
     ZooKeeper/ZooKeeper.cpp
     ZooKeeper/ZooKeeperCommon.cpp
     ZooKeeper/ZooKeeperConstants.cpp
diff --git a/src/Coordination/InMemoryLogStore.cpp b/src/Coordination/InMemoryLogStore.cpp
index 9f8d398a110..b9e2e502fc7 100644
--- a/src/Coordination/InMemoryLogStore.cpp
+++ b/src/Coordination/InMemoryLogStore.cpp
@@ -6,7 +6,8 @@ namespace DB
 namespace
 {
 using namespace nuraft;
-ptr<log_entry> makeClone(const ptr<log_entry> & entry) {
+ptr<log_entry> makeClone(const ptr<log_entry> & entry)
+{
     ptr<log_entry> clone = cs_new<log_entry>(entry->get_term(), buffer::clone(entry->get_buf()), entry->get_val_type());
     return clone;
 }
diff --git a/src/Coordination/ReadBufferFromNuraftBuffer.h b/src/Coordination/ReadBufferFromNuraftBuffer.h
index 392a97bdd8f..cc01d3c8f39 100644
--- a/src/Coordination/ReadBufferFromNuraftBuffer.h
+++ b/src/Coordination/ReadBufferFromNuraftBuffer.h
@@ -12,6 +12,9 @@ public:
     explicit ReadBufferFromNuraftBuffer(nuraft::ptr<nuraft::buffer> buffer)
         : ReadBufferFromMemory(buffer->data_begin(), buffer->size())
     {}
+    explicit ReadBufferFromNuraftBuffer(nuraft::buffer & buffer)
+        : ReadBufferFromMemory(buffer.data_begin(), buffer.size())
+    {}
 };
 
 }
diff --git a/src/Coordination/SummingStateMachine.cpp b/src/Coordination/SummingStateMachine.cpp
index 16154ca8cd4..bf2a5bb818f 100644
--- a/src/Coordination/SummingStateMachine.cpp
+++ b/src/Coordination/SummingStateMachine.cpp
@@ -49,7 +49,8 @@ nuraft::ptr<nuraft::snapshot> SummingStateMachine::last_snapshot()
     // Just return the latest snapshot.
     std::lock_guard<std::mutex> ll(snapshots_lock);
     auto entry = snapshots.rbegin();
-    if (entry == snapshots.rend()) return nullptr;
+    if (entry == snapshots.rend())
+        return nullptr;
 
     auto ctx = entry->second;
     return ctx->snapshot;
@@ -117,7 +118,8 @@ int SummingStateMachine::read_logical_snp_obj(
     {
         std::lock_guard<std::mutex> ll(snapshots_lock);
         auto entry = snapshots.find(s.get_last_log_idx());
-        if (entry == snapshots.end()) {
+        if (entry == snapshots.end())
+        {
             // Snapshot doesn't exist.
             data_out = nullptr;
             is_last_obj = true;
diff --git a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp b/src/Coordination/TestKeeperStorageDispatcher.cpp
similarity index 98%
rename from src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp
rename to src/Coordination/TestKeeperStorageDispatcher.cpp
index b1233fc47e3..1700fa76092 100644
--- a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp
+++ b/src/Coordination/TestKeeperStorageDispatcher.cpp
@@ -1,4 +1,4 @@
-#include <Common/ZooKeeper/TestKeeperStorageDispatcher.h>
+#include <Coordination/TestKeeperStorageDispatcher.h>
 #include <Common/setThreadName.h>
 
 namespace DB
diff --git a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.h b/src/Coordination/TestKeeperStorageDispatcher.h
similarity index 96%
rename from src/Common/ZooKeeper/TestKeeperStorageDispatcher.h
rename to src/Coordination/TestKeeperStorageDispatcher.h
index 27abf17ac73..f8cb06c3ced 100644
--- a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.h
+++ b/src/Coordination/TestKeeperStorageDispatcher.h
@@ -2,7 +2,7 @@
 
 #include <Common/ThreadPool.h>
 #include <Common/ConcurrentBoundedQueue.h>
-#include <Common/ZooKeeper/TestKeeperStorage.h>
+#include <Coordination/TestKeeperStorage.h>
 #include <functional>
 
 namespace zkutil
diff --git a/src/Coordination/WriteBufferFromNuraftBuffer.cpp b/src/Coordination/WriteBufferFromNuraftBuffer.cpp
index 09e1034ae8f..7d0a1dbcbb1 100644
--- a/src/Coordination/WriteBufferFromNuraftBuffer.cpp
+++ b/src/Coordination/WriteBufferFromNuraftBuffer.cpp
@@ -51,7 +51,7 @@ nuraft::ptr<nuraft::buffer> WriteBufferFromNuraftBuffer::getBuffer()
     return buffer;
 }
 
- WriteBufferFromNuraftBuffer::~WriteBufferFromNuraftBuffer()
+WriteBufferFromNuraftBuffer::~WriteBufferFromNuraftBuffer()
 {
     try
     {
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index 38602e48fae..fa330903ae2 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -3,6 +3,7 @@
 #include <Coordination/InMemoryLogStore.h>
 #include <Coordination/InMemoryStateManager.h>
 #include <Coordination/SummingStateMachine.h>
+#include <Coordination/NuKeeperStateMachine.h>
 #include <Coordination/LoggerWrapper.h>
 #include <Coordination/WriteBufferFromNuraftBuffer.h>
 #include <Coordination/ReadBufferFromNuraftBuffer.h>
@@ -12,15 +13,6 @@
 #include <libnuraft/nuraft.hxx>
 #include <thread>
 
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-}
 
 TEST(CoordinationTest, BuildTest)
 {
@@ -63,14 +55,15 @@ TEST(CoordinationTest, BufferSerde)
     EXPECT_EQ(dynamic_cast<Coordination::ZooKeeperGetRequest *>(request_read.get())->path, "/path/value");
 }
 
-struct SummingRaftServer
+template <typename StateMachine>
+struct SimpliestRaftServer
 {
-    SummingRaftServer(int server_id_, const std::string & hostname_, int port_)
+    SimpliestRaftServer(int server_id_, const std::string & hostname_, int port_)
         : server_id(server_id_)
         , hostname(hostname_)
         , port(port_)
         , endpoint(hostname + ":" + std::to_string(port))
-        , state_machine(nuraft::cs_new<DB::SummingStateMachine>())
+        , state_machine(nuraft::cs_new<StateMachine>())
         , state_manager(nuraft::cs_new<DB::InMemoryStateManager>(server_id, endpoint))
     {
         nuraft::raft_params params;
@@ -118,7 +111,7 @@ struct SummingRaftServer
     std::string endpoint;
 
     // State machine.
-    nuraft::ptr<DB::SummingStateMachine> state_machine;
+    nuraft::ptr<StateMachine> state_machine;
 
     // State manager.
     nuraft::ptr<nuraft::state_mgr> state_manager;
@@ -130,6 +123,8 @@ struct SummingRaftServer
     nuraft::ptr<nuraft::raft_server> raft_instance;
 };
 
+using SummingRaftServer = SimpliestRaftServer<DB::SummingStateMachine>;
+
 nuraft::ptr<nuraft::buffer> getLogEntry(int64_t number)
 {
     nuraft::ptr<nuraft::buffer> ret = nuraft::buffer::alloc(sizeof(number));
@@ -178,7 +173,7 @@ TEST(CoordinationTest, TestSummingRaft3)
         EXPECT_TRUE(false);
     }
 
-    while(s1.raft_instance->get_leader() != 2)
+    while (s1.raft_instance->get_leader() != 2)
     {
         std::cout << "Waiting s1 to join to s2 quorum\n";
         std::this_thread::sleep_for(std::chrono::milliseconds(100));
@@ -193,7 +188,7 @@ TEST(CoordinationTest, TestSummingRaft3)
         EXPECT_TRUE(false);
     }
 
-    while(s3.raft_instance->get_leader() != 2)
+    while (s3.raft_instance->get_leader() != 2)
     {
         std::cout << "Waiting s3 to join to s2 quorum\n";
         std::this_thread::sleep_for(std::chrono::milliseconds(100));
@@ -266,3 +261,120 @@ TEST(CoordinationTest, TestSummingRaft3)
     s2.launcher.shutdown(5);
     s3.launcher.shutdown(5);
 }
+
+using NuKeeperRaftServer = SimpliestRaftServer<DB::NuKeeperStateMachine>;
+
+
+nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
+{
+    DB::WriteBufferFromNuraftBuffer buf;
+    DB::writeIntBinary(session_id, buf);
+    request->write(buf);
+    return buf.getBuffer();
+}
+
+zkutil::TestKeeperStorage::ResponsesForSessions getZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer, const Coordination::ZooKeeperRequestPtr & request)
+{
+    zkutil::TestKeeperStorage::ResponsesForSessions results;
+    DB::ReadBufferFromNuraftBuffer buf(buffer);
+    while (!buf.eof())
+    {
+        int64_t session_id;
+        DB::readIntBinary(session_id, buf);
+
+        int32_t length;
+        Coordination::XID xid;
+        int64_t zxid;
+        Coordination::Error err;
+
+        Coordination::read(length, buf);
+        Coordination::read(xid, buf);
+        Coordination::read(zxid, buf);
+        Coordination::read(err, buf);
+        auto response = request->makeResponse();
+        response->readImpl(buf);
+        results.push_back(zkutil::TestKeeperStorage::ResponseForSession{session_id, response});
+    }
+    return results;
+}
+
+TEST(CoordinationTest, TestNuKeeperRaft)
+{
+    NuKeeperRaftServer s1(1, "localhost", 44447);
+    NuKeeperRaftServer s2(2, "localhost", 44448);
+    NuKeeperRaftServer s3(3, "localhost", 44449);
+
+    nuraft::srv_config first_config(1, "localhost:44447");
+    auto ret1 = s2.raft_instance->add_srv(first_config);
+
+    EXPECT_TRUE(ret1->get_accepted()) << "failed to add server: " << ret1->get_result_str() << std::endl;
+
+    while (s1.raft_instance->get_leader() != 2)
+    {
+        std::cout << "Waiting s1 to join to s2 quorum\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    nuraft::srv_config third_config(3, "localhost:44449");
+    auto ret3 = s2.raft_instance->add_srv(third_config);
+
+    EXPECT_TRUE(ret3->get_accepted()) << "failed to add server: " << ret3->get_result_str() << std::endl;
+
+    while (s3.raft_instance->get_leader() != 2)
+    {
+        std::cout << "Waiting s3 to join to s2 quorum\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    /// S2 is leader
+    EXPECT_EQ(s1.raft_instance->get_leader(), 2);
+    EXPECT_EQ(s2.raft_instance->get_leader(), 2);
+    EXPECT_EQ(s3.raft_instance->get_leader(), 2);
+
+    int64_t session_id = 34;
+    std::shared_ptr<Coordination::ZooKeeperCreateRequest> create_request = std::make_shared<Coordination::ZooKeeperCreateRequest>();
+    create_request->path = "/hello";
+    create_request->data = "world";
+
+    auto entry1 = getZooKeeperLogEntry(session_id, create_request);
+    auto ret_leader = s2.raft_instance->append_entries({entry1});
+
+    EXPECT_TRUE(ret_leader->get_accepted()) << "failed to replicate create entry:" << ret_leader->get_result_code();
+    EXPECT_EQ(ret_leader->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate create entry:" << ret_leader->get_result_code();
+
+    auto result = ret_leader.get();
+
+    auto responses = getZooKeeperResponses(result->get(), create_request);
+
+    EXPECT_EQ(responses.size(), 1);
+    EXPECT_EQ(responses[0].session_id, 34);
+    EXPECT_EQ(responses[0].response->getOpNum(), Coordination::OpNum::Create);
+    EXPECT_EQ(dynamic_cast<Coordination::ZooKeeperCreateResponse *>(responses[0].response.get())->path_created, "/hello");
+
+
+    while (s1.state_machine->getStorage().container.count("/hello") == 0)
+    {
+        std::cout << "Waiting s1 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    while (s2.state_machine->getStorage().container.count("/hello") == 0)
+    {
+        std::cout << "Waiting s2 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    while (s3.state_machine->getStorage().container.count("/hello") == 0)
+    {
+        std::cout << "Waiting s3 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    EXPECT_EQ(s1.state_machine->getStorage().container["/hello"].data, "world");
+    EXPECT_EQ(s2.state_machine->getStorage().container["/hello"].data, "world");
+    EXPECT_EQ(s3.state_machine->getStorage().container["/hello"].data, "world");
+
+    s1.launcher.shutdown(5);
+    s2.launcher.shutdown(5);
+    s3.launcher.shutdown(5);
+}

From 18c944257af66d5a2a34cbd4964bd38e068faca8 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 20 Jan 2021 20:23:15 +0300
Subject: [PATCH 0124/1238] cleanup

---
 src/Processors/Transforms/WindowTransform.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index ce7723846c9..cd2172ab7fb 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -37,7 +37,7 @@ public:
             const Block & output_header_,
             const WindowDescription & window_description_,
             const std::vector<WindowFunctionDescription> &
-                window_function_descriptions);
+                functions);
 
     ~WindowTransform() override;
 
@@ -49,7 +49,7 @@ public:
     static Block transformHeader(Block header, const ExpressionActionsPtr & expression);
 
     /*
-     * (former) Implemetation of ISimpleTransform.
+     * (former) Implementation of ISimpleTransform.
      */
     void transform(Chunk & chunk) /*override*/;
 

From 2129dc13f6d7e2a7e1ca45bd4128f67976f3dfe4 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Wed, 20 Jan 2021 20:44:18 +0300
Subject: [PATCH 0125/1238] Fix style and build

---
 src/Common/Dwarf.cpp      | 35 +++++++++++++++++++++--------------
 src/Common/Dwarf.h        |  8 +++++---
 src/Common/StackTrace.cpp |  1 +
 3 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp
index 14e6e1072b6..d0b3244dac2 100644
--- a/src/Common/Dwarf.cpp
+++ b/src/Common/Dwarf.cpp
@@ -743,7 +743,8 @@ bool Dwarf::findLocation(
     std::optional<std::string_view> main_file_name;
     std::optional<uint64_t> base_addr_cu;
 
-    forEachAttribute(cu, die, [&](const Attribute & attr) {
+    forEachAttribute(cu, die, [&](const Attribute & attr)
+    {
         switch (attr.spec.name)
         {
             case DW_AT_stmt_list:
@@ -875,14 +876,16 @@ bool Dwarf::findLocation(
 void Dwarf::findSubProgramDieForAddress(
     const CompilationUnit & cu, const Die & die, uint64_t address, std::optional<uint64_t> base_addr_cu, Die & subprogram) const
 {
-    forEachChild(cu, die, [&](const Die & child_die) {
+    forEachChild(cu, die, [&](const Die & child_die)
+    {
         if (child_die.abbr.tag == DW_TAG_subprogram)
         {
             std::optional<uint64_t> low_pc;
             std::optional<uint64_t> high_pc;
             std::optional<bool> is_high_pc_addr;
             std::optional<uint64_t> range_offset;
-            forEachAttribute(cu, child_die, [&](const Attribute & attr) {
+            forEachAttribute(cu, child_die, [&](const Attribute & attr)
+            {
                 switch (attr.spec.name)
                 {
                     case DW_AT_ranges:
@@ -942,7 +945,8 @@ void Dwarf::findInlinedSubroutineDieForAddress(
         return;
     }
 
-    forEachChild(cu, die, [&](const Die & child_die) {
+    forEachChild(cu, die, [&](const Die & child_die)
+    {
         // Between a DW_TAG_subprogram and and DW_TAG_inlined_subroutine we might
         // have arbitrary intermediary "nodes", including DW_TAG_common_block,
         // DW_TAG_lexical_block, DW_TAG_try_block, DW_TAG_catch_block and
@@ -966,7 +970,8 @@ void Dwarf::findInlinedSubroutineDieForAddress(
         std::optional<uint64_t> call_file;
         std::optional<uint64_t> call_line;
         std::optional<uint64_t> range_offset;
-        forEachAttribute(cu, child_die, [&](const Attribute & attr) {
+        forEachAttribute(cu, child_die, [&](const Attribute & attr)
+        {
             switch (attr.spec.name)
             {
                 case DW_AT_ranges:
@@ -1028,7 +1033,8 @@ void Dwarf::findInlinedSubroutineDieForAddress(
         location.file = line_vm.getFullFileName(*call_file);
         location.line = *call_line;
 
-        auto get_function_name = [&](const CompilationUnit & srcu, uint64_t die_offset) {
+        auto get_function_name = [&](const CompilationUnit & srcu, uint64_t die_offset)
+        {
             auto decl_die = getDieAtOffset(srcu, die_offset);
             // Jump to the actual function definition instead of declaration for name
             // and line info.
@@ -1037,7 +1043,8 @@ void Dwarf::findInlinedSubroutineDieForAddress(
             std::string_view name;
             // The file and line will be set in the next inline subroutine based on
             // its DW_AT_call_file and DW_AT_call_line.
-            forEachAttribute(srcu, def_die, [&](const Attribute & attr) {
+            forEachAttribute(srcu, def_die, [&](const Attribute & attr)
+            {
                 switch (attr.spec.name)
                 {
                     case DW_AT_linkage_name:
@@ -1146,14 +1153,14 @@ bool Dwarf::isAddrInRangeList(uint64_t address, std::optional<uint64_t> base_add
         return false;
     }
 
-    const bool is64BitAddr = addr_size == 8;
+    const bool is_64bit_addr = addr_size == 8;
     std::string_view sp = ranges_;
     sp.remove_prefix(offset);
-    const uint64_t max_addr = is64BitAddr ? std::numeric_limits<uint64_t>::max() : std::numeric_limits<uint32_t>::max();
+    const uint64_t max_addr = is_64bit_addr ? std::numeric_limits<uint64_t>::max() : std::numeric_limits<uint32_t>::max();
     while (!sp.empty())
     {
-        uint64_t begin = readOffset(sp, is64BitAddr);
-        uint64_t end = readOffset(sp, is64BitAddr);
+        uint64_t begin = readOffset(sp, is_64bit_addr);
+        uint64_t end = readOffset(sp, is_64bit_addr);
         // The range list entry is a base address selection entry.
         if (begin == max_addr)
         {
@@ -1191,10 +1198,10 @@ Dwarf::CompilationUnit Dwarf::findCompilationUnit(std::string_view info, uint64_
         chunk.remove_prefix(offset);
 
         auto initial_length = read<uint32_t>(chunk);
-        auto is64Bit = (initial_length == uint32_t(-1));
-        auto size = is64Bit ? read<uint64_t>(chunk) : initial_length;
+        auto is_64bit = (initial_length == uint32_t(-1));
+        auto size = is_64bit ? read<uint64_t>(chunk) : initial_length;
         SAFE_CHECK(size <= chunk.size(), "invalid chunk size");
-        size += is64Bit ? 12 : 4;
+        size += is_64bit ? 12 : 4;
 
         if (offset + size > targetOffset)
         {
diff --git a/src/Common/Dwarf.h b/src/Common/Dwarf.h
index 065ef6e3f5b..681d1f00362 100644
--- a/src/Common/Dwarf.h
+++ b/src/Common/Dwarf.h
@@ -21,6 +21,7 @@
 /** This file was edited for ClickHouse.
   */
 
+#include <memory>
 #include <optional>
 #include <string>
 #include <string_view>
@@ -115,7 +116,7 @@ public:
         std::string_view file_;
     };
 
-    // Indicates inline funtion `name` is called  at `line@file`.
+    // Indicates inline function `name` is called  at `line@file`.
     struct CallLocation
     {
         Path file = {};
@@ -393,7 +394,7 @@ private:
     DIEAbbreviation getAbbreviation(uint64_t code, uint64_t offset) const;
 
     /**
-     * Iterates over all attributes of a debugging info entry,  calling the given
+     * Iterates over all attributes of a debugging info entry, calling the given
      * callable for each. If all attributes are visited, then return the offset of
      * next DIE, or else iteration is stopped early and return size_t(-1) if any
      * of the calls return false.
@@ -419,7 +420,8 @@ private:
     std::optional<T> getAttribute(const CompilationUnit & cu, const Die & die, uint64_t attr_name) const
     {
         std::optional<T> result;
-        forEachAttribute(cu, die, [&](const Attribute & attr) {
+        forEachAttribute(cu, die, [&](const Attribute & attr)
+        {
             if (attr.spec.name == attr_name)
             {
                 result = std::get<T>(attr.attr_value);
diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp
index b1032786eca..e0cd534b057 100644
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@@ -377,6 +377,7 @@ static void toStringEveryLineImpl(
         out.str({});
     }
 #else
+    UNUSED(fatal);
     std::stringstream out;  // STYLE_CHECK_ALLOW_STD_STRING_STREAM
     out.exceptions(std::ios::failbit);
 

From 9a4ec13a9a2e237acbfb151b1966142666984282 Mon Sep 17 00:00:00 2001
From: Hasitha Kanchana <48449865+hasithaka@users.noreply.github.com>
Date: Wed, 20 Jan 2021 23:29:45 +0100
Subject: [PATCH 0126/1238] Update update.md

Add additional explanation for the ClickHouse version upgrade. It will help full when you have a specific
The title has to be changed.
i.e "ClickHouse Upgrade" not  "Clickhose update"
---
 docs/en/operations/update.md | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/update.md b/docs/en/operations/update.md
index edacf1ff973..04fbaf761c8 100644
--- a/docs/en/operations/update.md
+++ b/docs/en/operations/update.md
@@ -1,9 +1,9 @@
 ---
 toc_priority: 47
-toc_title: ClickHouse Update
+toc_title: ClickHouse Upgrade
 ---
 
-# ClickHouse Update {#clickhouse-update}
+# ClickHouse Upgrade {#clickhouse-upgrade}
 
 If ClickHouse was installed from `deb` packages, execute the following commands on the server:
 
@@ -16,3 +16,15 @@ $ sudo service clickhouse-server restart
 If you installed ClickHouse using something other than the recommended `deb` packages, use the appropriate update method.
 
 ClickHouse does not support a distributed update. The operation should be performed consecutively on each separate server. Do not update all the servers on a cluster simultaneously, or the cluster will be unavailable for some time.
+
+The upgrade of older version of ClickHouse to specific version:
+
+As an example: 
+
+```bash
+$ sudo apt-get update
+$ sudo apt-get install clickhouse-server=20.12.4.5 clickhouse-client=20.12.4.5 clickhouse-common-static=20.12.4.5
+$ sudo service clickhouse-server restart
+```
+
+Note: It's always recommended to backup all databases before initiating the upgrade process. Please make sure the new version is compatible with new changes so on.

From 0cbbb84f24236855391a69897871f43db5cc5f70 Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Thu, 21 Jan 2021 02:20:11 +0300
Subject: [PATCH 0127/1238] Add missing header

---
 src/Common/Dwarf.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Common/Dwarf.h b/src/Common/Dwarf.h
index 681d1f00362..9ea940c3380 100644
--- a/src/Common/Dwarf.h
+++ b/src/Common/Dwarf.h
@@ -21,6 +21,7 @@
 /** This file was edited for ClickHouse.
   */
 
+#include <functional>
 #include <memory>
 #include <optional>
 #include <string>

From f7175819d57df8185e05fddd28435fb1abb4e56c Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 21 Jan 2021 14:07:55 +0300
Subject: [PATCH 0128/1238] Add storage simpliest serialization

---
 src/Common/ZooKeeper/ZooKeeperIO.cpp          |  13 ++
 src/Common/ZooKeeper/ZooKeeperIO.h            |   2 +
 src/Coordination/NuKeeperStateMachine.cpp     | 190 ++++++++++++++++++
 src/Coordination/NuKeeperStateMachine.h       |  63 ++++++
 src/Coordination/TestKeeperStorage.cpp        |   5 +-
 .../TestKeeperStorageSerializer.cpp           |  87 ++++++++
 .../TestKeeperStorageSerializer.h             |  17 ++
 src/Coordination/tests/gtest_for_build.cpp    |  18 +-
 8 files changed, 391 insertions(+), 4 deletions(-)
 create mode 100644 src/Coordination/NuKeeperStateMachine.cpp
 create mode 100644 src/Coordination/NuKeeperStateMachine.h
 create mode 100644 src/Coordination/TestKeeperStorageSerializer.cpp
 create mode 100644 src/Coordination/TestKeeperStorageSerializer.h

diff --git a/src/Common/ZooKeeper/ZooKeeperIO.cpp b/src/Common/ZooKeeper/ZooKeeperIO.cpp
index a0e4161f111..3f0905ea186 100644
--- a/src/Common/ZooKeeper/ZooKeeperIO.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperIO.cpp
@@ -3,6 +3,13 @@
 namespace Coordination
 {
 
+
+void write(size_t x, WriteBuffer & out)
+{
+    x = __builtin_bswap64(x);
+    writeBinary(x, out);
+}
+
 void write(int64_t x, WriteBuffer & out)
 {
     x = __builtin_bswap64(x);
@@ -57,6 +64,12 @@ void write(const Error & x, WriteBuffer & out)
     write(static_cast<int32_t>(x), out);
 }
 
+void read(size_t & x, ReadBuffer & in)
+{
+    readBinary(x, in);
+    x = __builtin_bswap64(x);
+}
+
 void read(int64_t & x, ReadBuffer & in)
 {
     readBinary(x, in);
diff --git a/src/Common/ZooKeeper/ZooKeeperIO.h b/src/Common/ZooKeeper/ZooKeeperIO.h
index edeb995f27b..fd47e324664 100644
--- a/src/Common/ZooKeeper/ZooKeeperIO.h
+++ b/src/Common/ZooKeeper/ZooKeeperIO.h
@@ -13,6 +13,7 @@ namespace Coordination
 
 using namespace DB;
 
+void write(size_t x, WriteBuffer & out);
 void write(int64_t x, WriteBuffer & out);
 void write(int32_t x, WriteBuffer & out);
 void write(OpNum x, WriteBuffer & out);
@@ -37,6 +38,7 @@ void write(const std::vector<T> & arr, WriteBuffer & out)
         write(elem, out);
 }
 
+void read(size_t & x, ReadBuffer & in);
 void read(int64_t & x, ReadBuffer & in);
 void read(int32_t & x, ReadBuffer & in);
 void read(OpNum & x, ReadBuffer & in);
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
new file mode 100644
index 00000000000..59830040e66
--- /dev/null
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -0,0 +1,190 @@
+#include <Coordination/NuKeeperStateMachine.h>
+#include <Coordination/ReadBufferFromNuraftBuffer.h>
+#include <Coordination/WriteBufferFromNuraftBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <Common/ZooKeeper/ZooKeeperIO.h>
+
+namespace DB
+{
+
+zkutil::TestKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
+{
+    ReadBufferFromNuraftBuffer buffer(data);
+    zkutil::TestKeeperStorage::RequestForSession request_for_session;
+    readIntBinary(request_for_session.session_id, buffer);
+
+    int32_t length;
+    Coordination::read(length, buffer);
+
+    int32_t xid;
+    Coordination::read(xid, buffer);
+
+    Coordination::OpNum opnum;
+    Coordination::read(opnum, buffer);
+
+    request_for_session.request = Coordination::ZooKeeperRequestFactory::instance().get(opnum);
+    request_for_session.request->xid = xid;
+    request_for_session.request->readImpl(buffer);
+    return request_for_session;
+}
+
+nuraft::ptr<nuraft::buffer> writeResponses(zkutil::TestKeeperStorage::ResponsesForSessions & responses)
+{
+    WriteBufferFromNuraftBuffer buffer;
+    for (const auto & response_and_session : responses)
+    {
+        writeIntBinary(response_and_session.session_id, buffer);
+        response_and_session.response->write(buffer);
+    }
+    return buffer.getBuffer();
+}
+
+
+NuKeeperStateMachine::NuKeeperStateMachine()
+    : last_committed_idx(0)
+    , log(&Poco::Logger::get("NuRaftStateMachine"))
+{
+    LOG_DEBUG(log, "Created nukeeper state machine");
+}
+
+nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
+{
+    LOG_DEBUG(log, "Commiting logidx {}", log_idx);
+    auto request_for_session = parseRequest(data);
+    auto responses_with_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id);
+
+    last_committed_idx = log_idx;
+    return writeResponses(responses_with_sessions);
+}
+
+bool NuKeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
+{
+    LOG_DEBUG(log, "Applying snapshot {}", s.get_last_log_idx());
+    std::lock_guard<std::mutex> lock(snapshots_lock);
+    auto entry = snapshots.find(s.get_last_log_idx());
+    if (entry == snapshots.end())
+    {
+        return false;
+    }
+
+    /// TODO
+    return true;
+}
+
+nuraft::ptr<nuraft::snapshot> NuKeeperStateMachine::last_snapshot()
+{
+
+    LOG_DEBUG(log, "Trying to get last snapshot");
+   // Just return the latest snapshot.
+    std::lock_guard<std::mutex> lock(snapshots_lock);
+    auto entry = snapshots.rbegin();
+    if (entry == snapshots.rend())
+        return nullptr;
+
+    return entry->second;
+}
+
+void NuKeeperStateMachine::create_snapshot(
+    nuraft::snapshot & s,
+    nuraft::async_result<bool>::handler_type & when_done)
+{
+
+    LOG_DEBUG(log, "Creating snapshot {}", s.get_last_log_idx());
+    {
+        std::lock_guard<std::mutex> lock(snapshots_lock);
+        nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
+        nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
+        snapshots[s.get_last_log_idx()] = ss;
+        const int MAX_SNAPSHOTS = 3;
+        int num = snapshots.size();
+        auto entry = snapshots.begin();
+
+        for (int i = 0; i < num - MAX_SNAPSHOTS; ++i)
+        {
+            if (entry == snapshots.end())
+                break;
+            entry = snapshots.erase(entry);
+        }
+    }
+    nuraft::ptr<std::exception> except(nullptr);
+    bool ret = true;
+    when_done(ret, except);
+}
+
+void NuKeeperStateMachine::save_logical_snp_obj(
+    nuraft::snapshot & s,
+    size_t & obj_id,
+    nuraft::buffer & /*data*/,
+    bool /*is_first_obj*/,
+    bool /*is_last_obj*/)
+{
+    LOG_DEBUG(log, "Saving snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
+    if (obj_id == 0)
+    {
+        std::lock_guard<std::mutex> lock(snapshots_lock);
+        nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
+        nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
+        snapshots[s.get_last_log_idx()] = ss;
+        const int MAX_SNAPSHOTS = 3;
+        int num = snapshots.size();
+        auto entry = snapshots.begin();
+
+        for (int i = 0; i < num - MAX_SNAPSHOTS; ++i)
+        {
+            if (entry == snapshots.end())
+                break;
+            entry = snapshots.erase(entry);
+        }
+    }
+    else
+    {
+        std::lock_guard<std::mutex> lock(snapshots_lock);
+        auto entry = snapshots.find(s.get_last_log_idx());
+        assert(entry != snapshots.end());
+    }
+
+    obj_id++;
+}
+
+int NuKeeperStateMachine::read_logical_snp_obj(
+    nuraft::snapshot & s,
+    void* & /*user_snp_ctx*/,
+    ulong obj_id,
+    nuraft::ptr<nuraft::buffer> & data_out,
+    bool & is_last_obj)
+{
+
+    LOG_DEBUG(log, "Reading snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
+    {
+        std::lock_guard<std::mutex> ll(snapshots_lock);
+        auto entry = snapshots.find(s.get_last_log_idx());
+        if (entry == snapshots.end())
+        {
+            // Snapshot doesn't exist.
+            data_out = nullptr;
+            is_last_obj = true;
+            return 0;
+        }
+    }
+
+    if (obj_id == 0)
+    {
+        // Object ID == 0: first object, put dummy data.
+        data_out = nuraft::buffer::alloc(sizeof(size_t));
+        nuraft::buffer_serializer bs(data_out);
+        bs.put_i32(0);
+        is_last_obj = false;
+
+    }
+    else
+    {
+        // Object ID > 0: second object, put actual value.
+        data_out = nuraft::buffer::alloc(sizeof(size_t));
+        nuraft::buffer_serializer bs(data_out);
+        bs.put_u64(1);
+        is_last_obj = true;
+    }
+    return 0;
+}
+
+}
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
new file mode 100644
index 00000000000..42b90116a9b
--- /dev/null
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include <Coordination/TestKeeperStorage.h>
+#include <libnuraft/nuraft.hxx>
+#include <common/logger_useful.h>
+
+namespace DB
+{
+
+class NuKeeperStateMachine : public nuraft::state_machine
+{
+public:
+    NuKeeperStateMachine();
+
+    nuraft::ptr<nuraft::buffer> pre_commit(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; }
+
+    nuraft::ptr<nuraft::buffer> commit(const size_t log_idx, nuraft::buffer & data) override;
+
+    void rollback(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override {}
+
+    size_t last_commit_index() override { return last_committed_idx; }
+
+    bool apply_snapshot(nuraft::snapshot & s) override;
+
+    nuraft::ptr<nuraft::snapshot> last_snapshot() override;
+
+    void create_snapshot(
+        nuraft::snapshot & s,
+        nuraft::async_result<bool>::handler_type & when_done) override;
+
+    void save_logical_snp_obj(
+        nuraft::snapshot & s,
+        size_t & obj_id,
+        nuraft::buffer & data,
+        bool is_first_obj,
+        bool is_last_obj) override;
+
+    int read_logical_snp_obj(
+        nuraft::snapshot & s,
+        void* & user_snp_ctx,
+        ulong obj_id,
+        nuraft::ptr<nuraft::buffer> & data_out,
+        bool & is_last_obj) override;
+
+    zkutil::TestKeeperStorage & getStorage()
+    {
+        return storage;
+    }
+
+private:
+    zkutil::TestKeeperStorage storage;
+    // Mutex for `snapshots_`.
+    std::mutex snapshots_lock;
+
+    /// Fake snapshot storage
+    std::map<uint64_t, nuraft::ptr<nuraft::snapshot>> snapshots;
+
+    /// Last committed Raft log number.
+    std::atomic<size_t> last_committed_idx;
+    Poco::Logger * log;
+};
+
+}
diff --git a/src/Coordination/TestKeeperStorage.cpp b/src/Coordination/TestKeeperStorage.cpp
index b5bf9facbf1..31dc4116dc8 100644
--- a/src/Coordination/TestKeeperStorage.cpp
+++ b/src/Coordination/TestKeeperStorage.cpp
@@ -46,7 +46,7 @@ static TestKeeperStorage::ResponsesForSessions processWatchesImpl(const String &
     {
         std::shared_ptr<Coordination::ZooKeeperWatchResponse> watch_response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
         watch_response->path = path;
-        watch_response->xid = -1;
+        watch_response->xid = Coordination::WATCH_XID;
         watch_response->zxid = -1;
         watch_response->type = event_type;
         watch_response->state = Coordination::State::CONNECTED;
@@ -62,7 +62,7 @@ static TestKeeperStorage::ResponsesForSessions processWatchesImpl(const String &
     {
         std::shared_ptr<Coordination::ZooKeeperWatchResponse> watch_list_response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
         watch_list_response->path = parent_path;
-        watch_list_response->xid = -1;
+        watch_list_response->xid = Coordination::WATCH_XID;
         watch_list_response->zxid = -1;
         watch_list_response->type = Coordination::Event::CHILD;
         watch_list_response->state = Coordination::State::CONNECTED;
@@ -103,7 +103,6 @@ struct TestKeeperStorageHeartbeatRequest final : public TestKeeperStorageRequest
     }
 };
 
-
 struct TestKeeperStorageCreateRequest final : public TestKeeperStorageRequest
 {
     using TestKeeperStorageRequest::TestKeeperStorageRequest;
diff --git a/src/Coordination/TestKeeperStorageSerializer.cpp b/src/Coordination/TestKeeperStorageSerializer.cpp
new file mode 100644
index 00000000000..bf7015374be
--- /dev/null
+++ b/src/Coordination/TestKeeperStorageSerializer.cpp
@@ -0,0 +1,87 @@
+#include <Coordination/TestKeeperStorageSerializer.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <Common/ZooKeeper/ZooKeeperIO.h>
+
+namespace DB
+{
+
+namespace
+{
+    void writeNode(const zkutil::TestKeeperStorage::Node & node, WriteBuffer & out)
+    {
+        Coordination::write(node.data, out);
+        Coordination::write(node.acls, out);
+        Coordination::write(node.is_ephemeral, out);
+        Coordination::write(node.is_sequental, out);
+        Coordination::write(node.stat, out);
+        Coordination::write(node.seq_num, out);
+    }
+
+    void readNode(zkutil::TestKeeperStorage::Node & node, ReadBuffer & in)
+    {
+        Coordination::read(node.data, in);
+        Coordination::read(node.acls, in);
+        Coordination::read(node.is_ephemeral, in);
+        Coordination::read(node.is_sequental, in);
+        Coordination::read(node.stat, in);
+        Coordination::read(node.seq_num, in);
+    }
+}
+
+void TestKeeperStorageSerializer::serialize(const zkutil::TestKeeperStorage & storage, WriteBuffer & out) const
+{
+    Coordination::write(storage.zxid, out);
+    Coordination::write(storage.session_id_counter, out);
+    Coordination::write(storage.container.size(), out);
+    for (const auto & [path, node] : storage.container)
+    {
+        Coordination::write(path, out);
+        writeNode(node, out);
+    }
+    Coordination::write(storage.ephemerals.size(), out);
+    for (const auto & [session_id, paths] : storage.ephemerals)
+    {
+        Coordination::write(session_id, out);
+        Coordination::write(paths.size(), out);
+        for (const auto & path : paths)
+            Coordination::write(path, out);
+    }
+}
+
+void TestKeeperStorageSerializer::deserialize(zkutil::TestKeeperStorage & storage, ReadBuffer & in) const
+{
+    int64_t session_id_counter, zxid;
+    Coordination::read(zxid, in);
+    Coordination::read(session_id_counter, in);
+    storage.zxid = zxid;
+    storage.session_id_counter = session_id_counter;
+
+    size_t container_size;
+    Coordination::read(container_size, in);
+    while (storage.container.size() < container_size)
+    {
+        std::string path;
+        Coordination::read(path, in);
+        zkutil::TestKeeperStorage::Node node;
+        readNode(node, in);
+        storage.container[path] = node;
+    }
+    size_t ephemerals_size;
+    Coordination::read(ephemerals_size, in);
+    while (storage.ephemerals.size() < ephemerals_size)
+    {
+        int64_t session_id;
+        size_t ephemerals_for_session;
+        Coordination::read(session_id, in);
+        Coordination::read(ephemerals_for_session, in);
+        while (storage.ephemerals[session_id].size() < ephemerals_for_session)
+        {
+            std::string ephemeral_path;
+            Coordination::read(ephemeral_path, in);
+            storage.ephemerals[session_id].emplace(ephemeral_path);
+        }
+    }
+}
+
+}
diff --git a/src/Coordination/TestKeeperStorageSerializer.h b/src/Coordination/TestKeeperStorageSerializer.h
new file mode 100644
index 00000000000..b4453574cfd
--- /dev/null
+++ b/src/Coordination/TestKeeperStorageSerializer.h
@@ -0,0 +1,17 @@
+#pragma once
+#include <Coordination/TestKeeperStorage.h>
+#include <IO/WriteBuffer.h>
+#include <IO/ReadBuffer.h>
+
+namespace DB
+{
+
+class TestKeeperStorageSerializer
+{
+public:
+    void serialize(const zkutil::TestKeeperStorage & storage, WriteBuffer & out) const;
+
+    void deserialize(zkutil::TestKeeperStorage & storage, ReadBuffer & in) const;
+};
+
+}
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index fa330903ae2..635ac88f737 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -351,7 +351,6 @@ TEST(CoordinationTest, TestNuKeeperRaft)
     EXPECT_EQ(responses[0].response->getOpNum(), Coordination::OpNum::Create);
     EXPECT_EQ(dynamic_cast<Coordination::ZooKeeperCreateResponse *>(responses[0].response.get())->path_created, "/hello");
 
-
     while (s1.state_machine->getStorage().container.count("/hello") == 0)
     {
         std::cout << "Waiting s1 to apply entry\n";
@@ -374,6 +373,23 @@ TEST(CoordinationTest, TestNuKeeperRaft)
     EXPECT_EQ(s2.state_machine->getStorage().container["/hello"].data, "world");
     EXPECT_EQ(s3.state_machine->getStorage().container["/hello"].data, "world");
 
+    std::shared_ptr<Coordination::ZooKeeperGetRequest> get_request = std::make_shared<Coordination::ZooKeeperGetRequest>();
+    get_request->path = "/hello";
+    auto entry2 = getZooKeeperLogEntry(session_id, get_request);
+    auto ret_leader_get = s2.raft_instance->append_entries({entry2});
+
+    EXPECT_TRUE(ret_leader_get->get_accepted()) << "failed to replicate create entry: " << ret_leader_get->get_result_code();
+    EXPECT_EQ(ret_leader_get->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate create entry: " << ret_leader_get->get_result_code();
+
+    auto result_get = ret_leader_get.get();
+
+    auto get_responses = getZooKeeperResponses(result_get->get(), get_request);
+
+    EXPECT_EQ(get_responses.size(), 1);
+    EXPECT_EQ(get_responses[0].session_id, 34);
+    EXPECT_EQ(get_responses[0].response->getOpNum(), Coordination::OpNum::Get);
+    EXPECT_EQ(dynamic_cast<Coordination::ZooKeeperGetResponse *>(get_responses[0].response.get())->data, "world");
+
     s1.launcher.shutdown(5);
     s2.launcher.shutdown(5);
     s3.launcher.shutdown(5);

From d6b8dd75252aa40c1392241be2af563103c8ef68 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 21 Jan 2021 16:53:10 +0300
Subject: [PATCH 0129/1238] Dumb snapshoting

---
 src/Coordination/NuKeeperStateMachine.cpp     | 98 ++++++++++++-------
 src/Coordination/NuKeeperStateMachine.h       | 26 ++++-
 src/Coordination/TestKeeperStorage.h          | 13 +--
 .../TestKeeperStorageDispatcher.h             |  6 +-
 4 files changed, 93 insertions(+), 50 deletions(-)

diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 59830040e66..c0deb403f20 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -3,6 +3,7 @@
 #include <Coordination/WriteBufferFromNuraftBuffer.h>
 #include <IO/ReadHelpers.h>
 #include <Common/ZooKeeper/ZooKeeperIO.h>
+#include <Coordination/TestKeeperStorageSerializer.h>
 
 namespace DB
 {
@@ -51,23 +52,30 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
 {
     LOG_DEBUG(log, "Commiting logidx {}", log_idx);
     auto request_for_session = parseRequest(data);
-    auto responses_with_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id);
+    zkutil::TestKeeperStorage::ResponsesForSessions responses_for_sessions;
+    {
+        std::lock_guard lock(storage_lock);
+        responses_for_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id);
+    }
 
     last_committed_idx = log_idx;
-    return writeResponses(responses_with_sessions);
+    return writeResponses(responses_for_sessions);
 }
 
 bool NuKeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
 {
     LOG_DEBUG(log, "Applying snapshot {}", s.get_last_log_idx());
-    std::lock_guard<std::mutex> lock(snapshots_lock);
-    auto entry = snapshots.find(s.get_last_log_idx());
-    if (entry == snapshots.end())
+    StorageSnapshotPtr snapshot;
     {
-        return false;
+        std::lock_guard<std::mutex> lock(snapshots_lock);
+        auto entry = snapshots.find(s.get_last_log_idx());
+        if (entry == snapshots.end())
+            return false;
+        snapshot = entry->second;
     }
-
-    /// TODO
+    std::lock_guard lock(storage_lock);
+    storage = snapshot->storage;
+    last_committed_idx = s.get_last_log_idx();
     return true;
 }
 
@@ -81,7 +89,37 @@ nuraft::ptr<nuraft::snapshot> NuKeeperStateMachine::last_snapshot()
     if (entry == snapshots.rend())
         return nullptr;
 
-    return entry->second;
+    return entry->second->snapshot;
+}
+
+NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::createSnapshotInternal(nuraft::snapshot & s)
+{
+    nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
+    nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
+    std::lock_guard lock(storage_lock);
+    return std::make_shared<NuKeeperStateMachine::StorageSnapshot>(ss, storage);
+}
+
+NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::readSnapshot(nuraft::snapshot & s, nuraft::buffer & in) const
+{
+    nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
+    nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
+    TestKeeperStorageSerializer serializer;
+
+    ReadBufferFromNuraftBuffer reader(in);
+    zkutil::TestKeeperStorage new_storage;
+    serializer.deserialize(new_storage, reader);
+    return std::make_shared<StorageSnapshot>(ss, new_storage);
+}
+
+
+void NuKeeperStateMachine::writeSnapshot(const NuKeeperStateMachine::StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out) const
+{
+    TestKeeperStorageSerializer serializer;
+
+    WriteBufferFromNuraftBuffer writer;
+    serializer.serialize(snapshot->storage, writer);
+    out = writer.getBuffer();
 }
 
 void NuKeeperStateMachine::create_snapshot(
@@ -90,11 +128,10 @@ void NuKeeperStateMachine::create_snapshot(
 {
 
     LOG_DEBUG(log, "Creating snapshot {}", s.get_last_log_idx());
+    auto snapshot = createSnapshotInternal(s);
     {
         std::lock_guard<std::mutex> lock(snapshots_lock);
-        nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
-        nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
-        snapshots[s.get_last_log_idx()] = ss;
+        snapshots[s.get_last_log_idx()] = snapshot;
         const int MAX_SNAPSHOTS = 3;
         int num = snapshots.size();
         auto entry = snapshots.begin();
@@ -114,33 +151,22 @@ void NuKeeperStateMachine::create_snapshot(
 void NuKeeperStateMachine::save_logical_snp_obj(
     nuraft::snapshot & s,
     size_t & obj_id,
-    nuraft::buffer & /*data*/,
+    nuraft::buffer & data,
     bool /*is_first_obj*/,
     bool /*is_last_obj*/)
 {
     LOG_DEBUG(log, "Saving snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
     if (obj_id == 0)
     {
+        auto new_snapshot = createSnapshotInternal(s);
         std::lock_guard<std::mutex> lock(snapshots_lock);
-        nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
-        nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
-        snapshots[s.get_last_log_idx()] = ss;
-        const int MAX_SNAPSHOTS = 3;
-        int num = snapshots.size();
-        auto entry = snapshots.begin();
-
-        for (int i = 0; i < num - MAX_SNAPSHOTS; ++i)
-        {
-            if (entry == snapshots.end())
-                break;
-            entry = snapshots.erase(entry);
-        }
+        snapshots.try_emplace(s.get_last_log_idx(), std::move(new_snapshot));
     }
     else
     {
+        auto received_snapshot = readSnapshot(s, data);
         std::lock_guard<std::mutex> lock(snapshots_lock);
-        auto entry = snapshots.find(s.get_last_log_idx());
-        assert(entry != snapshots.end());
+        snapshots.try_emplace(s.get_last_log_idx(), std::move(received_snapshot));
     }
 
     obj_id++;
@@ -155,8 +181,9 @@ int NuKeeperStateMachine::read_logical_snp_obj(
 {
 
     LOG_DEBUG(log, "Reading snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
+    StorageSnapshotPtr required_snapshot;
     {
-        std::lock_guard<std::mutex> ll(snapshots_lock);
+        std::lock_guard<std::mutex> lock(snapshots_lock);
         auto entry = snapshots.find(s.get_last_log_idx());
         if (entry == snapshots.end())
         {
@@ -165,23 +192,18 @@ int NuKeeperStateMachine::read_logical_snp_obj(
             is_last_obj = true;
             return 0;
         }
+        required_snapshot = entry->second;
     }
 
     if (obj_id == 0)
     {
-        // Object ID == 0: first object, put dummy data.
-        data_out = nuraft::buffer::alloc(sizeof(size_t));
-        nuraft::buffer_serializer bs(data_out);
-        bs.put_i32(0);
+        auto new_snapshot = createSnapshotInternal(s);
+        writeSnapshot(new_snapshot, data_out);
         is_last_obj = false;
-
     }
     else
     {
-        // Object ID > 0: second object, put actual value.
-        data_out = nuraft::buffer::alloc(sizeof(size_t));
-        nuraft::buffer_serializer bs(data_out);
-        bs.put_u64(1);
+        writeSnapshot(required_snapshot, data_out);
         is_last_obj = true;
     }
     return 0;
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index 42b90116a9b..c8dd9f8e570 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -48,12 +48,34 @@ public:
     }
 
 private:
+    struct StorageSnapshot
+    {
+        StorageSnapshot(const nuraft::ptr<nuraft::snapshot> & s, const zkutil::TestKeeperStorage & storage_)
+            : snapshot(s)
+            , storage(storage_)
+        {}
+
+        nuraft::ptr<nuraft::snapshot> snapshot;
+        zkutil::TestKeeperStorage storage;
+    };
+
+    using StorageSnapshotPtr = std::shared_ptr<StorageSnapshot>;
+
+    StorageSnapshotPtr createSnapshotInternal(nuraft::snapshot & s);
+
+    StorageSnapshotPtr readSnapshot(nuraft::snapshot & s, nuraft::buffer & in) const;
+
+    void writeSnapshot(const StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out) const;
+
     zkutil::TestKeeperStorage storage;
-    // Mutex for `snapshots_`.
+    /// Mutex for snapshots
     std::mutex snapshots_lock;
 
+    /// Lock for storage
+    std::mutex storage_lock;
+
     /// Fake snapshot storage
-    std::map<uint64_t, nuraft::ptr<nuraft::snapshot>> snapshots;
+    std::map<uint64_t, StorageSnapshotPtr> snapshots;
 
     /// Last committed Raft log number.
     std::atomic<size_t> last_committed_idx;
diff --git a/src/Coordination/TestKeeperStorage.h b/src/Coordination/TestKeeperStorage.h
index 21b1ce16c32..0bdec50625e 100644
--- a/src/Coordination/TestKeeperStorage.h
+++ b/src/Coordination/TestKeeperStorage.h
@@ -19,7 +19,7 @@ using ResponseCallback = std::function<void(const Coordination::ZooKeeperRespons
 class TestKeeperStorage
 {
 public:
-    std::atomic<int64_t> session_id_counter{0};
+    int64_t session_id_counter{0};
 
     struct Node
     {
@@ -58,8 +58,8 @@ public:
     Ephemerals ephemerals;
     SessionAndWatcher sessions_and_watchers;
 
-    std::atomic<int64_t> zxid{0};
-    std::atomic<bool> finalized{false};
+    int64_t zxid{0};
+    bool finalized{false};
 
     Watches watches;
     Watches list_watches;   /// Watches for 'list' request (watches on children).
@@ -68,7 +68,7 @@ public:
 
     int64_t getZXID()
     {
-        return zxid.fetch_add(1);
+        return zxid++;
     }
 
 public:
@@ -76,11 +76,6 @@ public:
 
     ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
     ResponsesForSessions finalize(const RequestsForSessions & expired_requests);
-
-    int64_t getSessionID()
-    {
-        return session_id_counter.fetch_add(1);
-    }
 };
 
 }
diff --git a/src/Coordination/TestKeeperStorageDispatcher.h b/src/Coordination/TestKeeperStorageDispatcher.h
index f6a81d4a88e..e460ba41f0a 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.h
+++ b/src/Coordination/TestKeeperStorageDispatcher.h
@@ -13,6 +13,8 @@ using ZooKeeperResponseCallback = std::function<void(const Coordination::ZooKeep
 class TestKeeperStorageDispatcher
 {
 private:
+
+    std::atomic<int64_t> session_id_counter{0};
     Poco::Timespan operation_timeout{0, Coordination::DEFAULT_OPERATION_TIMEOUT_MS * 1000};
 
     using clock = std::chrono::steady_clock;
@@ -48,10 +50,12 @@ public:
     ~TestKeeperStorageDispatcher();
 
     void putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
+
     int64_t getSessionID()
     {
-        return storage.getSessionID();
+        return session_id_counter.fetch_add(1);
     }
+
     void registerSession(int64_t session_id, ZooKeeperResponseCallback callback);
     /// Call if we don't need any responses for this session no more (session was expired)
     void finishSession(int64_t session_id);

From 61fe49194b933e5db1fc35050fa01a5d44b6b1b3 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 21 Jan 2021 17:34:34 +0300
Subject: [PATCH 0130/1238] First working snapshots

---
 src/Coordination/NuKeeperStateMachine.cpp     |  5 +-
 src/Coordination/TestKeeperStorage.h          |  4 ++
 .../TestKeeperStorageDispatcher.h             |  6 +-
 src/Coordination/tests/gtest_for_build.cpp    | 56 ++++++++++++++++++-
 4 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index c0deb403f20..02f3016be32 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -126,7 +126,6 @@ void NuKeeperStateMachine::create_snapshot(
     nuraft::snapshot & s,
     nuraft::async_result<bool>::handler_type & when_done)
 {
-
     LOG_DEBUG(log, "Creating snapshot {}", s.get_last_log_idx());
     auto snapshot = createSnapshotInternal(s);
     {
@@ -156,6 +155,7 @@ void NuKeeperStateMachine::save_logical_snp_obj(
     bool /*is_last_obj*/)
 {
     LOG_DEBUG(log, "Saving snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
+
     if (obj_id == 0)
     {
         auto new_snapshot = createSnapshotInternal(s);
@@ -165,8 +165,9 @@ void NuKeeperStateMachine::save_logical_snp_obj(
     else
     {
         auto received_snapshot = readSnapshot(s, data);
+
         std::lock_guard<std::mutex> lock(snapshots_lock);
-        snapshots.try_emplace(s.get_last_log_idx(), std::move(received_snapshot));
+        snapshots[s.get_last_log_idx()] = std::move(received_snapshot);
     }
 
     obj_id++;
diff --git a/src/Coordination/TestKeeperStorage.h b/src/Coordination/TestKeeperStorage.h
index 0bdec50625e..76111490c78 100644
--- a/src/Coordination/TestKeeperStorage.h
+++ b/src/Coordination/TestKeeperStorage.h
@@ -74,6 +74,10 @@ public:
 public:
     TestKeeperStorage();
 
+    int64_t getSessionID()
+    {
+        return session_id_counter++;
+    }
     ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
     ResponsesForSessions finalize(const RequestsForSessions & expired_requests);
 };
diff --git a/src/Coordination/TestKeeperStorageDispatcher.h b/src/Coordination/TestKeeperStorageDispatcher.h
index e460ba41f0a..df4ac2cf99d 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.h
+++ b/src/Coordination/TestKeeperStorageDispatcher.h
@@ -13,8 +13,6 @@ using ZooKeeperResponseCallback = std::function<void(const Coordination::ZooKeep
 class TestKeeperStorageDispatcher
 {
 private:
-
-    std::atomic<int64_t> session_id_counter{0};
     Poco::Timespan operation_timeout{0, Coordination::DEFAULT_OPERATION_TIMEOUT_MS * 1000};
 
     using clock = std::chrono::steady_clock;
@@ -39,6 +37,7 @@ private:
     ThreadFromGlobalPool processing_thread;
 
     TestKeeperStorage storage;
+    std::mutex session_id_mutex;
 
 private:
     void processingThread();
@@ -53,7 +52,8 @@ public:
 
     int64_t getSessionID()
     {
-        return session_id_counter.fetch_add(1);
+        std::lock_guard lock(session_id_mutex);
+        return storage.getSessionID();
     }
 
     void registerSession(int64_t session_id, ZooKeeperResponseCallback callback);
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index 635ac88f737..09c5db03514 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -2,11 +2,14 @@
 
 #include <Coordination/InMemoryLogStore.h>
 #include <Coordination/InMemoryStateManager.h>
+#include <Coordination/TestKeeperStorageSerializer.h>
 #include <Coordination/SummingStateMachine.h>
 #include <Coordination/NuKeeperStateMachine.h>
 #include <Coordination/LoggerWrapper.h>
 #include <Coordination/WriteBufferFromNuraftBuffer.h>
 #include <Coordination/ReadBufferFromNuraftBuffer.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/WriteBufferFromString.h>
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
 #include <Common/ZooKeeper/ZooKeeperIO.h>
 #include <Common/Exception.h>
@@ -71,7 +74,7 @@ struct SimpliestRaftServer
         params.election_timeout_lower_bound_ = 200;
         params.election_timeout_upper_bound_ = 400;
         params.reserved_log_items_ = 5;
-        params.snapshot_distance_ = 5;
+        params.snapshot_distance_ = 1; /// forcefully send snapshots
         params.client_req_timeout_ = 3000;
         params.return_method_ = nuraft::raft_params::blocking;
 
@@ -298,6 +301,35 @@ zkutil::TestKeeperStorage::ResponsesForSessions getZooKeeperResponses(nuraft::pt
     return results;
 }
 
+TEST(CoordinationTest, TestStorageSerialization)
+{
+    zkutil::TestKeeperStorage storage;
+    storage.container["/hello"] = zkutil::TestKeeperStorage::Node{.data="world"};
+    storage.container["/hello/somepath"] =  zkutil::TestKeeperStorage::Node{.data="somedata"};
+    storage.session_id_counter = 5;
+    storage.zxid = 156;
+    storage.ephemerals[3] = {"/hello", "/"};
+    storage.ephemerals[1] = {"/hello/somepath"};
+
+    DB::WriteBufferFromOwnString buffer;
+    zkutil::TestKeeperStorageSerializer serializer;
+    serializer.serialize(storage, buffer);
+    std::string serialized = buffer.str();
+    EXPECT_NE(serialized.size(), 0);
+    DB::ReadBufferFromString read(serialized);
+    zkutil::TestKeeperStorage new_storage;
+    serializer.deserialize(new_storage, read);
+
+    EXPECT_EQ(new_storage.container.size(), 3);
+    EXPECT_EQ(new_storage.container["/hello"].data, "world");
+    EXPECT_EQ(new_storage.container["/hello/somepath"].data, "somedata");
+    EXPECT_EQ(new_storage.session_id_counter, 5);
+    EXPECT_EQ(new_storage.zxid, 156);
+    EXPECT_EQ(new_storage.ephemerals.size(), 2);
+    EXPECT_EQ(new_storage.ephemerals[3].size(), 2);
+    EXPECT_EQ(new_storage.ephemerals[1].size(), 1);
+}
+
 TEST(CoordinationTest, TestNuKeeperRaft)
 {
     NuKeeperRaftServer s1(1, "localhost", 44447);
@@ -390,7 +422,29 @@ TEST(CoordinationTest, TestNuKeeperRaft)
     EXPECT_EQ(get_responses[0].response->getOpNum(), Coordination::OpNum::Get);
     EXPECT_EQ(dynamic_cast<Coordination::ZooKeeperGetResponse *>(get_responses[0].response.get())->data, "world");
 
+
+    NuKeeperRaftServer s4(4, "localhost", 44450);
+    nuraft::srv_config fourth_config(4, "localhost:44450");
+    auto ret4 = s2.raft_instance->add_srv(fourth_config);
+    while (s4.raft_instance->get_leader() != 2)
+    {
+        std::cout << "Waiting s1 to join to s2 quorum\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    /// Applied snapshot
+    EXPECT_EQ(s4.raft_instance->get_leader(), 2);
+
+    while (s4.state_machine->getStorage().container.count("/hello") == 0)
+    {
+        std::cout << "Waiting s4 to apply entry\n";
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    EXPECT_EQ(s4.state_machine->getStorage().container["/hello"].data, "world");
+
     s1.launcher.shutdown(5);
     s2.launcher.shutdown(5);
     s3.launcher.shutdown(5);
+    s4.launcher.shutdown(5);
 }

From 4aa11b3494417f43d939d53b02d8773c2cf2944c Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 21 Jan 2021 18:09:48 +0300
Subject: [PATCH 0131/1238] Remove zkutil namespace from TestKeeperStorage

---
 src/Coordination/NuKeeperStateMachine.cpp        | 10 +++++-----
 src/Coordination/NuKeeperStateMachine.h          |  8 ++++----
 src/Coordination/TestKeeperStorage.cpp           |  7 -------
 src/Coordination/TestKeeperStorage.h             |  2 +-
 src/Coordination/TestKeeperStorageDispatcher.cpp |  4 ----
 src/Coordination/TestKeeperStorageDispatcher.h   |  2 +-
 src/Coordination/TestKeeperStorageSerializer.cpp | 10 +++++-----
 src/Coordination/TestKeeperStorageSerializer.h   |  4 ++--
 src/Coordination/tests/gtest_for_build.cpp       | 16 ++++++++--------
 src/Coordination/ya.make                         |  0
 src/Interpreters/Context.cpp                     |  6 +++---
 src/Interpreters/Context.h                       |  4 ++--
 src/Server/TestKeeperTCPHandler.h                |  2 +-
 13 files changed, 32 insertions(+), 43 deletions(-)
 create mode 100644 src/Coordination/ya.make

diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 02f3016be32..abd7ca6b167 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -8,10 +8,10 @@
 namespace DB
 {
 
-zkutil::TestKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
+TestKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
 {
     ReadBufferFromNuraftBuffer buffer(data);
-    zkutil::TestKeeperStorage::RequestForSession request_for_session;
+    TestKeeperStorage::RequestForSession request_for_session;
     readIntBinary(request_for_session.session_id, buffer);
 
     int32_t length;
@@ -29,7 +29,7 @@ zkutil::TestKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
     return request_for_session;
 }
 
-nuraft::ptr<nuraft::buffer> writeResponses(zkutil::TestKeeperStorage::ResponsesForSessions & responses)
+nuraft::ptr<nuraft::buffer> writeResponses(TestKeeperStorage::ResponsesForSessions & responses)
 {
     WriteBufferFromNuraftBuffer buffer;
     for (const auto & response_and_session : responses)
@@ -52,7 +52,7 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
 {
     LOG_DEBUG(log, "Commiting logidx {}", log_idx);
     auto request_for_session = parseRequest(data);
-    zkutil::TestKeeperStorage::ResponsesForSessions responses_for_sessions;
+    TestKeeperStorage::ResponsesForSessions responses_for_sessions;
     {
         std::lock_guard lock(storage_lock);
         responses_for_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id);
@@ -107,7 +107,7 @@ NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::readSnapshot(nura
     TestKeeperStorageSerializer serializer;
 
     ReadBufferFromNuraftBuffer reader(in);
-    zkutil::TestKeeperStorage new_storage;
+    TestKeeperStorage new_storage;
     serializer.deserialize(new_storage, reader);
     return std::make_shared<StorageSnapshot>(ss, new_storage);
 }
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index c8dd9f8e570..4e5e8406039 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -42,7 +42,7 @@ public:
         nuraft::ptr<nuraft::buffer> & data_out,
         bool & is_last_obj) override;
 
-    zkutil::TestKeeperStorage & getStorage()
+    TestKeeperStorage & getStorage()
     {
         return storage;
     }
@@ -50,13 +50,13 @@ public:
 private:
     struct StorageSnapshot
     {
-        StorageSnapshot(const nuraft::ptr<nuraft::snapshot> & s, const zkutil::TestKeeperStorage & storage_)
+        StorageSnapshot(const nuraft::ptr<nuraft::snapshot> & s, const TestKeeperStorage & storage_)
             : snapshot(s)
             , storage(storage_)
         {}
 
         nuraft::ptr<nuraft::snapshot> snapshot;
-        zkutil::TestKeeperStorage storage;
+        TestKeeperStorage storage;
     };
 
     using StorageSnapshotPtr = std::shared_ptr<StorageSnapshot>;
@@ -67,7 +67,7 @@ private:
 
     void writeSnapshot(const StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out) const;
 
-    zkutil::TestKeeperStorage storage;
+    TestKeeperStorage storage;
     /// Mutex for snapshots
     std::mutex snapshots_lock;
 
diff --git a/src/Coordination/TestKeeperStorage.cpp b/src/Coordination/TestKeeperStorage.cpp
index 31dc4116dc8..ef3ae1dfd16 100644
--- a/src/Coordination/TestKeeperStorage.cpp
+++ b/src/Coordination/TestKeeperStorage.cpp
@@ -17,13 +17,6 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-}
-
-namespace zkutil
-{
-
-using namespace DB;
-
 static String parentPath(const String & path)
 {
     auto rslash_pos = path.rfind('/');
diff --git a/src/Coordination/TestKeeperStorage.h b/src/Coordination/TestKeeperStorage.h
index 76111490c78..cc2ac34e7aa 100644
--- a/src/Coordination/TestKeeperStorage.h
+++ b/src/Coordination/TestKeeperStorage.h
@@ -8,7 +8,7 @@
 #include <unordered_set>
 #include <vector>
 
-namespace zkutil
+namespace DB
 {
 
 using namespace DB;
diff --git a/src/Coordination/TestKeeperStorageDispatcher.cpp b/src/Coordination/TestKeeperStorageDispatcher.cpp
index 2f8fbbb8fb6..63cb5920f9b 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.cpp
+++ b/src/Coordination/TestKeeperStorageDispatcher.cpp
@@ -11,10 +11,6 @@ namespace ErrorCodes
     extern const int TIMEOUT_EXCEEDED;
 }
 
-}
-namespace zkutil
-{
-
 void TestKeeperStorageDispatcher::processingThread()
 {
     setThreadName("TestKeeperSProc");
diff --git a/src/Coordination/TestKeeperStorageDispatcher.h b/src/Coordination/TestKeeperStorageDispatcher.h
index df4ac2cf99d..c1c739db87d 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.h
+++ b/src/Coordination/TestKeeperStorageDispatcher.h
@@ -5,7 +5,7 @@
 #include <Coordination/TestKeeperStorage.h>
 #include <functional>
 
-namespace zkutil
+namespace DB
 {
 
 using ZooKeeperResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr & response)>;
diff --git a/src/Coordination/TestKeeperStorageSerializer.cpp b/src/Coordination/TestKeeperStorageSerializer.cpp
index bf7015374be..cb3a2643f68 100644
--- a/src/Coordination/TestKeeperStorageSerializer.cpp
+++ b/src/Coordination/TestKeeperStorageSerializer.cpp
@@ -8,7 +8,7 @@ namespace DB
 
 namespace
 {
-    void writeNode(const zkutil::TestKeeperStorage::Node & node, WriteBuffer & out)
+    void writeNode(const TestKeeperStorage::Node & node, WriteBuffer & out)
     {
         Coordination::write(node.data, out);
         Coordination::write(node.acls, out);
@@ -18,7 +18,7 @@ namespace
         Coordination::write(node.seq_num, out);
     }
 
-    void readNode(zkutil::TestKeeperStorage::Node & node, ReadBuffer & in)
+    void readNode(TestKeeperStorage::Node & node, ReadBuffer & in)
     {
         Coordination::read(node.data, in);
         Coordination::read(node.acls, in);
@@ -29,7 +29,7 @@ namespace
     }
 }
 
-void TestKeeperStorageSerializer::serialize(const zkutil::TestKeeperStorage & storage, WriteBuffer & out) const
+void TestKeeperStorageSerializer::serialize(const TestKeeperStorage & storage, WriteBuffer & out) const
 {
     Coordination::write(storage.zxid, out);
     Coordination::write(storage.session_id_counter, out);
@@ -49,7 +49,7 @@ void TestKeeperStorageSerializer::serialize(const zkutil::TestKeeperStorage & st
     }
 }
 
-void TestKeeperStorageSerializer::deserialize(zkutil::TestKeeperStorage & storage, ReadBuffer & in) const
+void TestKeeperStorageSerializer::deserialize(TestKeeperStorage & storage, ReadBuffer & in) const
 {
     int64_t session_id_counter, zxid;
     Coordination::read(zxid, in);
@@ -63,7 +63,7 @@ void TestKeeperStorageSerializer::deserialize(zkutil::TestKeeperStorage & storag
     {
         std::string path;
         Coordination::read(path, in);
-        zkutil::TestKeeperStorage::Node node;
+        TestKeeperStorage::Node node;
         readNode(node, in);
         storage.container[path] = node;
     }
diff --git a/src/Coordination/TestKeeperStorageSerializer.h b/src/Coordination/TestKeeperStorageSerializer.h
index b4453574cfd..5a6a0cea0a5 100644
--- a/src/Coordination/TestKeeperStorageSerializer.h
+++ b/src/Coordination/TestKeeperStorageSerializer.h
@@ -9,9 +9,9 @@ namespace DB
 class TestKeeperStorageSerializer
 {
 public:
-    void serialize(const zkutil::TestKeeperStorage & storage, WriteBuffer & out) const;
+    void serialize(const TestKeeperStorage & storage, WriteBuffer & out) const;
 
-    void deserialize(zkutil::TestKeeperStorage & storage, ReadBuffer & in) const;
+    void deserialize(TestKeeperStorage & storage, ReadBuffer & in) const;
 };
 
 }
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index 09c5db03514..0c7ff8a579c 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -276,9 +276,9 @@ nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coord
     return buf.getBuffer();
 }
 
-zkutil::TestKeeperStorage::ResponsesForSessions getZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer, const Coordination::ZooKeeperRequestPtr & request)
+DB::TestKeeperStorage::ResponsesForSessions getZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer, const Coordination::ZooKeeperRequestPtr & request)
 {
-    zkutil::TestKeeperStorage::ResponsesForSessions results;
+    DB::TestKeeperStorage::ResponsesForSessions results;
     DB::ReadBufferFromNuraftBuffer buf(buffer);
     while (!buf.eof())
     {
@@ -296,28 +296,28 @@ zkutil::TestKeeperStorage::ResponsesForSessions getZooKeeperResponses(nuraft::pt
         Coordination::read(err, buf);
         auto response = request->makeResponse();
         response->readImpl(buf);
-        results.push_back(zkutil::TestKeeperStorage::ResponseForSession{session_id, response});
+        results.push_back(DB::TestKeeperStorage::ResponseForSession{session_id, response});
     }
     return results;
 }
 
 TEST(CoordinationTest, TestStorageSerialization)
 {
-    zkutil::TestKeeperStorage storage;
-    storage.container["/hello"] = zkutil::TestKeeperStorage::Node{.data="world"};
-    storage.container["/hello/somepath"] =  zkutil::TestKeeperStorage::Node{.data="somedata"};
+    DB::TestKeeperStorage storage;
+    storage.container["/hello"] = DB::TestKeeperStorage::Node{.data="world"};
+    storage.container["/hello/somepath"] =  DB::TestKeeperStorage::Node{.data="somedata"};
     storage.session_id_counter = 5;
     storage.zxid = 156;
     storage.ephemerals[3] = {"/hello", "/"};
     storage.ephemerals[1] = {"/hello/somepath"};
 
     DB::WriteBufferFromOwnString buffer;
-    zkutil::TestKeeperStorageSerializer serializer;
+    DB::TestKeeperStorageSerializer serializer;
     serializer.serialize(storage, buffer);
     std::string serialized = buffer.str();
     EXPECT_NE(serialized.size(), 0);
     DB::ReadBufferFromString read(serialized);
-    zkutil::TestKeeperStorage new_storage;
+    DB::TestKeeperStorage new_storage;
     serializer.deserialize(new_storage, read);
 
     EXPECT_EQ(new_storage.container.size(), 3);
diff --git a/src/Coordination/ya.make b/src/Coordination/ya.make
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index ad6b09b2d88..959b96722e0 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -306,7 +306,7 @@ struct ContextShared
     ConfigurationPtr zookeeper_config;                      /// Stores zookeeper configs
 
     mutable std::mutex test_keeper_storage_dispatcher_mutex;
-    mutable std::shared_ptr<zkutil::TestKeeperStorageDispatcher> test_keeper_storage_dispatcher;
+    mutable std::shared_ptr<TestKeeperStorageDispatcher> test_keeper_storage_dispatcher;
     mutable std::mutex auxiliary_zookeepers_mutex;
     mutable std::map<String, zkutil::ZooKeeperPtr> auxiliary_zookeepers;    /// Map for auxiliary ZooKeeper clients.
     ConfigurationPtr auxiliary_zookeepers_config;           /// Stores auxiliary zookeepers configs
@@ -1531,11 +1531,11 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const
     return shared->zookeeper;
 }
 
-std::shared_ptr<zkutil::TestKeeperStorageDispatcher> & Context::getTestKeeperStorageDispatcher() const
+std::shared_ptr<TestKeeperStorageDispatcher> & Context::getTestKeeperStorageDispatcher() const
 {
     std::lock_guard lock(shared->test_keeper_storage_dispatcher_mutex);
     if (!shared->test_keeper_storage_dispatcher)
-        shared->test_keeper_storage_dispatcher = std::make_shared<zkutil::TestKeeperStorageDispatcher>();
+        shared->test_keeper_storage_dispatcher = std::make_shared<TestKeeperStorageDispatcher>();
 
     return shared->test_keeper_storage_dispatcher;
 }
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 9c8d5252373..616d2d97de0 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -40,7 +40,6 @@ namespace Poco
 namespace zkutil
 {
     class ZooKeeper;
-    class TestKeeperStorageDispatcher;
 }
 
 
@@ -107,6 +106,7 @@ using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
 using StoragePoliciesMap = std::map<String, StoragePolicyPtr>;
 class StoragePolicySelector;
 using StoragePolicySelectorPtr = std::shared_ptr<const StoragePolicySelector>;
+class TestKeeperStorageDispatcher;
 
 class IOutputFormat;
 using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
@@ -513,7 +513,7 @@ public:
     std::shared_ptr<zkutil::ZooKeeper> getAuxiliaryZooKeeper(const String & name) const;
 
 
-    std::shared_ptr<zkutil::TestKeeperStorageDispatcher> & getTestKeeperStorageDispatcher() const;
+    std::shared_ptr<TestKeeperStorageDispatcher> & getTestKeeperStorageDispatcher() const;
 
     /// Set auxiliary zookeepers configuration at server starting or configuration reloading.
     void reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr & config);
diff --git a/src/Server/TestKeeperTCPHandler.h b/src/Server/TestKeeperTCPHandler.h
index 38f4db56c69..e7372e8dd82 100644
--- a/src/Server/TestKeeperTCPHandler.h
+++ b/src/Server/TestKeeperTCPHandler.h
@@ -28,7 +28,7 @@ private:
     IServer & server;
     Poco::Logger * log;
     Context global_context;
-    std::shared_ptr<zkutil::TestKeeperStorageDispatcher> test_keeper_storage_dispatcher;
+    std::shared_ptr<TestKeeperStorageDispatcher> test_keeper_storage_dispatcher;
     Poco::Timespan operation_timeout;
     Poco::Timespan session_timeout;
     int64_t session_id;

From 900580af026a10309ce3e3fe5789f4ea95468d7e Mon Sep 17 00:00:00 2001
From: Pavel Kruglov <avogar@sandbox-633380738>
Date: Thu, 21 Jan 2021 20:33:08 +0300
Subject: [PATCH 0132/1238] Add parallel select when there is one part with
 level>0 in select final

---
 .../MergeTree/MergeTreeDataSelectExecutor.cpp | 81 ++++++++++++++-----
 .../optimized_select_final_one_part.xml       | 20 +++++
 ...t_merge_across_partitions_select_final.sql | 22 +++--
 3 files changed, 98 insertions(+), 25 deletions(-)
 create mode 100644 tests/performance/optimized_select_final_one_part.xml

diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 4e1f307137a..9ac4b623f9f 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -1305,6 +1305,12 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
         data_settings->index_granularity,
         index_granularity_bytes);
 
+    const size_t min_marks_for_concurrent_read = roundRowsOrBytesToMarks(
+        settings.merge_tree_min_rows_for_concurrent_read,
+        settings.merge_tree_min_bytes_for_concurrent_read,
+        data_settings->index_granularity,
+        index_granularity_bytes);
+
     if (sum_marks > max_marks_to_use_cache)
         use_uncompressed_cache = false;
 
@@ -1347,25 +1353,60 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
         {
             Pipes pipes;
 
-            for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it)
+            /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
+            /// with level > 0 then we won't postprocess this part and if num_streams > 1 we
+            /// can use parallel select on this part.
+            if (num_streams > 1 && settings.do_not_merge_across_partitions_select_final &&
+                std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 &&
+                parts_to_merge_ranges[range_index]->data_part->info.level > 0)
             {
-                auto source_processor = std::make_shared<MergeTreeSelectProcessor>(
+                MergeTreeReadPoolPtr pool = std::make_shared<MergeTreeReadPool>(
+                    num_streams,
+                    sum_marks,
+                    min_marks_for_concurrent_read,
+                    std::vector{*std::move(parts_to_merge_ranges[range_index])},
                     data,
                     metadata_snapshot,
-                    part_it->data_part,
-                    max_block_size,
-                    settings.preferred_block_size_bytes,
-                    settings.preferred_max_column_in_block_size_bytes,
-                    column_names,
-                    part_it->ranges,
-                    use_uncompressed_cache,
                     query_info.prewhere_info,
                     true,
-                    reader_settings,
-                    virt_columns,
-                    part_it->part_index_in_query);
+                    column_names,
+                    MergeTreeReadPool::BackoffSettings(settings),
+                    settings.preferred_block_size_bytes,
+                    false);
 
-                pipes.emplace_back(std::move(source_processor));
+                for (size_t i = 0; i < num_streams; ++i)
+                {
+                    auto source = std::make_shared<MergeTreeThreadSelectBlockInputProcessor>(
+                        i, pool, min_marks_for_concurrent_read, max_block_size,
+                        settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes,
+                        data, metadata_snapshot, use_uncompressed_cache,
+                        query_info.prewhere_info, reader_settings, virt_columns);
+
+                    pipes.emplace_back(std::move(source));
+                }
+            }
+            else
+            {
+                for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it)
+                {
+                    auto source_processor = std::make_shared<MergeTreeSelectProcessor>(
+                        data,
+                        metadata_snapshot,
+                        part_it->data_part,
+                        max_block_size,
+                        settings.preferred_block_size_bytes,
+                        settings.preferred_max_column_in_block_size_bytes,
+                        column_names,
+                        part_it->ranges,
+                        use_uncompressed_cache,
+                        query_info.prewhere_info,
+                        true,
+                        reader_settings,
+                        virt_columns,
+                        part_it->part_index_in_query);
+
+                    pipes.emplace_back(std::move(source_processor));
+                }
             }
 
             if (pipes.empty())
@@ -1380,6 +1421,13 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
             plan = createPlanFromPipe(std::move(pipe), "with final");
         }
 
+        auto expression_step = std::make_unique<ExpressionStep>(
+            plan->getCurrentDataStream(),
+            metadata_snapshot->getSortingKey().expression->getActionsDAG().clone());
+
+        expression_step->setStepDescription("Calculate sorting key expression");
+        plan->addStep(std::move(expression_step));
+
         /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
         /// with level > 0 then we won't postprocess this part
         if (settings.do_not_merge_across_partitions_select_final &&
@@ -1390,13 +1438,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
             continue;
         }
 
-        auto expression_step = std::make_unique<ExpressionStep>(
-                plan->getCurrentDataStream(),
-                metadata_snapshot->getSortingKey().expression->getActionsDAG().clone());
-
-        expression_step->setStepDescription("Calculate sorting key expression");
-        plan->addStep(std::move(expression_step));
-
         Names sort_columns = metadata_snapshot->getSortingKeyColumns();
         SortDescription sort_description;
         size_t sort_columns_size = sort_columns.size();
diff --git a/tests/performance/optimized_select_final_one_part.xml b/tests/performance/optimized_select_final_one_part.xml
new file mode 100644
index 00000000000..6ba8b0a71b0
--- /dev/null
+++ b/tests/performance/optimized_select_final_one_part.xml
@@ -0,0 +1,20 @@
+<test max_ignored_relative_change="0.2">
+    <settings>
+        <do_not_merge_across_partitions_select_final>1</do_not_merge_across_partitions_select_final>
+    </settings>
+
+    <create_query>
+        CREATE TABLE optimized_select_final (t DateTime, x Int32, s String)
+        ENGINE = ReplacingMergeTree()
+        PARTITION BY toYYYYMM(t) ORDER BY x
+    </create_query>
+
+    <fill_query>INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), number, 'string' FROM numbers(500000000)</fill_query>
+
+    <fill_query>OPTIMIZE TABLE optimized_select_final FINAL</fill_query>
+
+    <query>SELECT max(x) FROM optimized_select_final FINAL where s = 'string' FORMAT Null</query>
+
+    <drop_query>DROP TABLE IF EXISTS  optimized_select_final</drop_query>
+
+</test>
diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
index d332946605d..c24990b598a 100644
--- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
+++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
@@ -1,15 +1,27 @@
 DROP TABLE IF EXISTS select_final;
 
-CREATE TABLE select_final (t DateTime, x Int32) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY x; 
+CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); 
 
-INSERT INTO select_final SELECT toDate('2000-01-01'), number FROM numbers(2);
-INSERT INTO select_final SELECT toDate('2000-01-01'), number + 1 FROM numbers(2);
+INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(2);
+INSERT INTO select_final SELECT toDate('2000-01-01'), number + 1, '' FROM numbers(2);
 
-INSERT INTO select_final SELECT toDate('2020-01-01'), number FROM numbers(2);
-INSERT INTO select_final SELECT toDate('2020-01-01'), number + 1 FROM numbers(2);
+INSERT INTO select_final SELECT toDate('2020-01-01'), number, '' FROM numbers(2);
+INSERT INTO select_final SELECT toDate('2020-01-01'), number + 1, '' FROM numbers(2);
 
 
 SELECT * FROM select_final FINAL ORDER BY x SETTINGS do_not_merge_across_partitions_select_final = 1;
 
+TRUNCATE TABLE select_final;
+
+INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(2);
+INSERT INTO select_final SELECT toDate('2000-01-01'), number, 'updated' FROM numbers(2);
+
+OPTIMIZE TABLE select_final FINAL;
+
+INSERT INTO select_final SELECT toDate('2020-01-01'), number, '' FROM numbers(2);
+INSERT INTO select_final SELECT toDate('2020-01-01'), number, 'updated' FROM numbers(2);
+
+SELECT max(x) FROM select_final FINAL where string = 'updated' SETTINGS do_not_merge_across_partitions_select_final = 1;
+
 DROP TABLE select_final;
 

From 7706eb4f369617c6198ca1007fcd979ff3c73d39 Mon Sep 17 00:00:00 2001
From: Pavel Kruglov <avogar@sandbox-633380738>
Date: Thu, 21 Jan 2021 22:06:49 +0300
Subject: [PATCH 0133/1238] update test reference

---
 ...t_merge_across_partitions_select_final.reference | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
index 4c85a1d418a..facdf3dab26 100644
--- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
+++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
@@ -1,6 +1,7 @@
-2000-01-01 00:00:00	0
-2020-01-01 00:00:00	0
-2000-01-01 00:00:00	1
-2020-01-01 00:00:00	1
-2000-01-01 00:00:00	2
-2020-01-01 00:00:00	2
+2000-01-01 00:00:00	0	
+2020-01-01 00:00:00	0	
+2000-01-01 00:00:00	1	
+2020-01-01 00:00:00	1	
+2000-01-01 00:00:00	2	
+2020-01-01 00:00:00	2	
+1

From c2e6d6cfe8007afb13dc77d474f6e31d063014af Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 21 Jan 2021 23:01:25 +0300
Subject: [PATCH 0134/1238] Starting nukeeper server

---
 src/Coordination/NuKeeperServer.cpp       | 13 +++++++
 src/Coordination/NuKeeperServer.h         | 43 +++++++++++++++++++++++
 src/Coordination/NuKeeperStateMachine.cpp |  1 -
 src/Coordination/TestKeeperStorage.h      |  1 +
 4 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 src/Coordination/NuKeeperServer.cpp
 create mode 100644 src/Coordination/NuKeeperServer.h

diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
new file mode 100644
index 00000000000..162e521f1c8
--- /dev/null
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -0,0 +1,13 @@
+#include <Coordination/NuKeeperServer.h>
+
+namespace DB
+{
+
+void NuKeeperServer::addServer(int server_id_, const std::string & server_uri)
+{
+    if (raft_instance->is_leader())
+    {
+        nuraft::srv_config first_config(server_id, server_uri);
+    }
+
+}
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
new file mode 100644
index 00000000000..0dc536b1593
--- /dev/null
+++ b/src/Coordination/NuKeeperServer.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <libnuraft/nuraft.hxx>
+#include <Coordination/InMemoryLogStore.h>
+#include <Coordination/InMemoryStateManager.h>
+#include <Coordination/NuKeeperStateMachine.h>
+#include <Coordination/TestKeeperStorage.h>
+
+namespace DB
+{
+
+class NuKeeperServer
+{
+private:
+    int server_id;
+
+    std::string hostname;
+
+    int port;
+
+    std::string endpoint;
+
+    nuraft::ptr<StateMachine> state_machine;
+
+    nuraft::ptr<nuraft::state_mgr> state_manager;
+
+    nuraft::raft_launcher launcher;
+
+    nuraft::ptr<nuraft::raft_server> raft_instance;
+
+public:
+    NuKeeperServer(int server_id, const std::string & hostname, int port);
+
+    void startup();
+
+    TestKeeperStorage::ResponsesForSessions putRequests(const TestKeeperStorage::RequestsForSessions & requests);
+
+    void addServer(int server_id_, const std::string & server_uri);
+
+    void shutdown();
+};
+
+}
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index abd7ca6b167..136ead44596 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -81,7 +81,6 @@ bool NuKeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
 
 nuraft::ptr<nuraft::snapshot> NuKeeperStateMachine::last_snapshot()
 {
-
     LOG_DEBUG(log, "Trying to get last snapshot");
    // Just return the latest snapshot.
     std::lock_guard<std::mutex> lock(snapshots_lock);
diff --git a/src/Coordination/TestKeeperStorage.h b/src/Coordination/TestKeeperStorage.h
index cc2ac34e7aa..2c7c6bad4fa 100644
--- a/src/Coordination/TestKeeperStorage.h
+++ b/src/Coordination/TestKeeperStorage.h
@@ -78,6 +78,7 @@ public:
     {
         return session_id_counter++;
     }
+
     ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
     ResponsesForSessions finalize(const RequestsForSessions & expired_requests);
 };

From 8461e896451bb85772a7220ebfb15d3cd2ce2755 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Fri, 22 Jan 2021 11:43:31 +0800
Subject: [PATCH 0135/1238] Remove getArgumentsThatAreAlwaysConstant, also add
 2 testcases

---
 src/Functions/FunctionFile.cpp                           | 9 ++++-----
 .../01658_read_file_to_stringcolumn.reference            | 2 ++
 .../0_stateless/01658_read_file_to_stringcolumn.sh       | 4 ++++
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index c493b2a2b88..afd24f4d575 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -45,7 +45,6 @@ namespace DB
         }
 
         bool useDefaultImplementationForConstants() const override { return true; }
-        ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
 
         ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
         {
@@ -101,14 +100,14 @@ namespace DB
         }
 
     private:
-        void checkReadIsAllowed(const std::string & user_files_path, const std::string & file_path) const
+        void checkReadIsAllowed(const std::string & user_files_absolute_path, const std::string & file_absolute_path) const
         {
             // If run in Local mode, no need for path checking.
             if (context.getApplicationType() != Context::ApplicationType::LOCAL)
-                if (file_path.find(user_files_path) != 0)
-                    throw Exception("File is not inside " + user_files_path, ErrorCodes::DATABASE_ACCESS_DENIED);
+                if (file_absolute_path.find(user_files_absolute_path) != 0)
+                    throw Exception("File is not inside " + user_files_absolute_path, ErrorCodes::DATABASE_ACCESS_DENIED);
 
-            Poco::File path_poco_file = Poco::File(file_path);
+            Poco::File path_poco_file = Poco::File(file_absolute_path);
             if (path_poco_file.exists() && path_poco_file.isDirectory())
                 throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME);
         }
diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
index 82bc7c9ca90..a22076de920 100644
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
@@ -8,6 +8,8 @@ ccccccccc	aaaaaaaaa	bbbbbbbbb
 :107
 :79
 :35
+:35
+:35
 699415
 aaaaaaaaa	bbbbbbbbb
 ccccccccc	aaaaaaaaa	bbbbbbbbb
diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index 1696fc710ad..44810636a7c 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -34,6 +34,10 @@ echo "clickhouse-client --query "'"select file('"'dir'), file('b.txt')"'";echo :
 # Test path out of the user_files directory. It's not allowed in client mode
 echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
 
+# Test relative path consists of ".." whose absolute path is out of the user_files directory.
+echo "clickhouse-client --query "'"select file('"'/var/lib/clickhouse/user_files/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+echo "clickhouse-client --query "'"select file('"'../a.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+
 
 
 ### 2nd TEST in LOCAL mode.

From b3c0baa96775422256fdecd91d6a04b2677dcbe1 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Fri, 22 Jan 2021 15:29:39 +0800
Subject: [PATCH 0136/1238] fix mkdir with -p

---
 tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index 44810636a7c..56049b299fb 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -12,7 +12,7 @@ echo -n aaaaaaaaa > /var/lib/clickhouse/user_files/a.txt
 echo -n bbbbbbbbb > /var/lib/clickhouse/user_files/b.txt
 echo -n ccccccccc > /var/lib/clickhouse/user_files/c.txt
 echo -n ccccccccc > /tmp/c.txt
-mkdir /var/lib/clickhouse/user_files/dir
+mkdir -p /var/lib/clickhouse/user_files/dir
 
 ### 1st TEST in CLIENT mode.
 ${CLICKHOUSE_CLIENT} --query "drop table if exists data;"
@@ -45,7 +45,7 @@ echo "clickhouse-client --query "'"select file('"'../a.txt'), file('b.txt')"'";e
 echo -n aaaaaaaaa > a.txt
 echo -n bbbbbbbbb > b.txt
 echo -n ccccccccc > c.txt
-mkdir dir
+mkdir -p dir
 #Test for large files, with length : 699415
 c_count=$(wc -c ${CURDIR}/01518_nullable_aggregate_states2.reference | awk '{print $1}')
 echo $c_count

From c965e66a3baea696baeaa0c4ab92aaa4ef4543ab Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Fri, 22 Jan 2021 15:01:54 +0300
Subject: [PATCH 0137/1238] Increase timeout for crash report

---
 tests/integration/test_send_crash_reports/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_send_crash_reports/test.py b/tests/integration/test_send_crash_reports/test.py
index a3c35ca1537..a9b141ebfd3 100644
--- a/tests/integration/test_send_crash_reports/test.py
+++ b/tests/integration/test_send_crash_reports/test.py
@@ -26,12 +26,12 @@ def started_node():
 def test_send_segfault(started_node, ):
     started_node.copy_file_to_container(os.path.join(SCRIPT_DIR, "fake_sentry_server.py"), "/fake_sentry_server.py")
     started_node.exec_in_container(["bash", "-c", "python3 /fake_sentry_server.py > /fake_sentry_server.log 2>&1"], detach=True, user="root")
-    time.sleep(0.5)
+    time.sleep(1)
     started_node.exec_in_container(["bash", "-c", "pkill -11 clickhouse"], user="root")
 
     result = None
     for attempt in range(1, 6):
-        time.sleep(0.25 * attempt)
+        time.sleep(attempt)
         result = started_node.exec_in_container(['cat', fake_sentry_server.RESULT_PATH], user='root')
         if result == 'OK':
             break

From 67f1dcd9d3fabad9b0698c08bf60597610dade8f Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Fri, 22 Jan 2021 20:37:34 +0800
Subject: [PATCH 0138/1238] adjust the testcases due to the CI test environment
 change

---
 .../01658_read_file_to_stringcolumn.sh           | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index 56049b299fb..d66b245dc74 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -20,23 +20,23 @@ ${CLICKHOUSE_CLIENT} --query "create table data (A String, B String) engine=Merg
 
 
 # Valid cases:
-${CLICKHOUSE_CLIENT} --query "select file('a.txt'), file('b.txt');";echo ":"$?
-${CLICKHOUSE_CLIENT} --query "insert into data select file('a.txt'), file('b.txt');";echo ":"$?
-${CLICKHOUSE_CLIENT} --query "insert into data select file('a.txt'), file('b.txt');";echo ":"$?
-${CLICKHOUSE_CLIENT} --query "select file('c.txt'), * from data";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "select file('/var/lib/clickhouse/user_files/a.txt'), file('/var/lib/clickhouse/user_files/b.txt');";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "insert into data select file('/var/lib/clickhouse/user_files/a.txt'), file('/var/lib/clickhouse/user_files/b.txt');";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "insert into data select file('/var/lib/clickhouse/user_files/a.txt'), file('/var/lib/clickhouse/user_files/b.txt');";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "select file('/var/lib/clickhouse/user_files/c.txt'), * from data";echo ":"$?
 
 
 # Invalid cases: (Here using sub-shell to catch exception avoiding the test quit)
 # Test non-exists file
-echo "clickhouse-client --query "'"select file('"'nonexist.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+echo "clickhouse-client --query "'"select file('"'nonexist.txt'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null
 # Test isDir
-echo "clickhouse-client --query "'"select file('"'dir'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+echo "clickhouse-client --query "'"select file('"'/var/lib/clickhouse/user_files/dir'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null
 # Test path out of the user_files directory. It's not allowed in client mode
-echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null
 
 # Test relative path consists of ".." whose absolute path is out of the user_files directory.
 echo "clickhouse-client --query "'"select file('"'/var/lib/clickhouse/user_files/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
-echo "clickhouse-client --query "'"select file('"'../a.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+echo "clickhouse-client --query "'"select file('"'../../../../a.txt'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null
 
 
From 6d2b9ebbb27ac2e453f028c361f01459b046a196 Mon Sep 17 00:00:00 2001
From: Pavel Kruglov <avogar@sandbox-633380738>
Date: Fri, 22 Jan 2021 17:16:22 +0300
Subject: [PATCH 0139/1238] Update performance test

---
 tests/performance/optimized_select_final_one_part.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/optimized_select_final_one_part.xml b/tests/performance/optimized_select_final_one_part.xml
index 6ba8b0a71b0..3724bc8f208 100644
--- a/tests/performance/optimized_select_final_one_part.xml
+++ b/tests/performance/optimized_select_final_one_part.xml
@@ -9,7 +9,7 @@
         PARTITION BY toYYYYMM(t) ORDER BY x
     </create_query>
 
-    <fill_query>INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), number, 'string' FROM numbers(500000000)</fill_query>
+    <fill_query>INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), number, 'string' FROM numbers(100000000)</fill_query>
 
     <fill_query>OPTIMIZE TABLE optimized_select_final FINAL</fill_query>
 

From c1e36cfe7063250d020c0d687ea77301e74c6516 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 22 Jan 2021 19:04:57 +0300
Subject: [PATCH 0140/1238] Something working

---
 programs/server/Server.cpp                    |   3 +
 src/Coordination/NuKeeperServer.cpp           | 158 +++++++++++++++++-
 src/Coordination/NuKeeperServer.h             |  29 +++-
 src/Coordination/NuKeeperStateMachine.cpp     |  29 +++-
 src/Coordination/TestKeeperStorage.cpp        |   1 +
 .../TestKeeperStorageDispatcher.cpp           |  27 +--
 .../TestKeeperStorageDispatcher.h             |  17 +-
 utils/zookeeper-test/main.cpp                 |   5 +
 8 files changed, 231 insertions(+), 38 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 94cd6854f78..df1513e6b65 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -830,6 +830,9 @@ int Server::main(const std::vector<std::string> & /*args*/)
         listen_try = true;
     }
 
+    /// Initialize test keeper raft
+    global_context->getTestKeeperStorageDispatcher();
+
     for (const auto & listen_host : listen_hosts)
     {
         /// TCP TestKeeper
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 162e521f1c8..2aefc215451 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -1,13 +1,165 @@
 #include <Coordination/NuKeeperServer.h>
+#include <Coordination/LoggerWrapper.h>
+#include <Coordination/NuKeeperStateMachine.h>
+#include <Coordination/InMemoryStateManager.h>
+#include <Coordination/WriteBufferFromNuraftBuffer.h>
+#include <Coordination/ReadBufferFromNuraftBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <chrono>
+#include <Common/ZooKeeper/ZooKeeperIO.h>
+#include <string>
 
 namespace DB
 {
 
-void NuKeeperServer::addServer(int server_id_, const std::string & server_uri)
+
+NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, int port_)
+    : server_id(server_id_)
+    , hostname(hostname_)
+    , port(port_)
+    , endpoint(hostname + ":" + std::to_string(port))
+    , state_machine(nuraft::cs_new<NuKeeperStateMachine>())
+    , state_manager(nuraft::cs_new<InMemoryStateManager>(server_id, endpoint))
 {
-    if (raft_instance->is_leader())
+}
+
+NuraftError NuKeeperServer::addServer(int server_id_, const std::string & server_uri_)
+{
+    nuraft::srv_config config(server_id_, server_uri_);
+    auto ret1 = raft_instance->add_srv(config);
+    return NuraftError{ret1->get_result_code(), ret1->get_result_str()};
+}
+
+
+NuraftError NuKeeperServer::startup()
+{
+    nuraft::raft_params params;
+    params.heart_beat_interval_ = 100;
+    params.election_timeout_lower_bound_ = 200;
+    params.election_timeout_upper_bound_ = 400;
+    params.reserved_log_items_ = 5;
+    params.snapshot_distance_ = 5;
+    params.client_req_timeout_ = 3000;
+    params.return_method_ = nuraft::raft_params::blocking;
+
+    raft_instance = launcher.init(
+        state_machine, state_manager, nuraft::cs_new<LoggerWrapper>("RaftInstance"), port,
+        nuraft::asio_service::options{}, params);
+
+    if (!raft_instance)
+        return NuraftError{nuraft::cmd_result_code::TIMEOUT, "Cannot create RAFT instance"};
+
+    static constexpr auto MAX_RETRY = 30;
+    for (size_t i = 0; i < MAX_RETRY; ++i)
     {
-        nuraft::srv_config first_config(server_id, server_uri);
+        if (raft_instance->is_initialized())
+            return NuraftError{nuraft::cmd_result_code::OK, ""};
+
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
     }
 
+    return NuraftError{nuraft::cmd_result_code::TIMEOUT, "Cannot start RAFT instance"};
+}
+
+NuraftError NuKeeperServer::shutdown()
+{
+    if (!launcher.shutdown(5))
+        return NuraftError{nuraft::cmd_result_code::TIMEOUT, "Temout waiting RAFT instance to shutdown"};
+    return NuraftError{nuraft::cmd_result_code::OK, ""};
+}
+
+namespace
+{
+
+nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
+{
+    DB::WriteBufferFromNuraftBuffer buf;
+    DB::writeIntBinary(session_id, buf);
+    request->write(buf);
+    return buf.getBuffer();
+}
+
+}
+
+TestKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer)
+{
+    DB::TestKeeperStorage::ResponsesForSessions results;
+    DB::ReadBufferFromNuraftBuffer buf(buffer);
+
+    while (!buf.eof())
+    {
+        int64_t session_id;
+        DB::readIntBinary(session_id, buf);
+        int32_t length;
+        Coordination::XID xid;
+        int64_t zxid;
+        Coordination::Error err;
+
+        Coordination::read(length, buf);
+        Coordination::read(xid, buf);
+        Coordination::read(zxid, buf);
+        Coordination::read(err, buf);
+        Coordination::ZooKeeperResponsePtr response;
+
+        if (xid == Coordination::WATCH_XID)
+            response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
+        else
+        {
+            response = ops_mapping[session_id][xid];
+            ops_mapping[session_id].erase(xid);
+            if (ops_mapping[session_id].empty())
+                ops_mapping.erase(session_id);
+        }
+
+        if (err == Coordination::Error::ZOK && (xid == Coordination::WATCH_XID || response->getOpNum() != Coordination::OpNum::Close))
+            response->readImpl(buf);
+
+        response->xid = xid;
+        response->zxid = zxid;
+        response->error = err;
+
+        results.push_back(DB::TestKeeperStorage::ResponseForSession{session_id, response});
+    }
+    return results;
+}
+
+TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKeeperStorage::RequestsForSessions & requests)
+{
+    std::vector<nuraft::ptr<nuraft::buffer>> entries;
+    for (auto & [session_id, request] : requests)
+    {
+        ops_mapping[session_id][request->xid] = request->makeResponse();
+        entries.push_back(getZooKeeperLogEntry(session_id, request));
+    }
+
+    auto result = raft_instance->append_entries(entries);
+    if (!result->get_accepted())
+        return {};
+
+    if (result->get_result_code() != nuraft::cmd_result_code::OK)
+        return {};
+
+    return readZooKeeperResponses(result->get());
+}
+
+
+int64_t NuKeeperServer::getSessionID()
+{
+    auto entry = nuraft::buffer::alloc(sizeof(size_t));
+    nuraft::buffer_serializer bs(entry);
+    bs.put_i64(0);
+
+    auto result = raft_instance->append_entries({entry});
+    if (!result->get_accepted())
+        return -1;
+
+    if (result->get_result_code() != nuraft::cmd_result_code::OK)
+        return -1;
+
+    auto resp = result->get();
+    nuraft::buffer_serializer bs_resp(resp);
+    return bs_resp.get_i64();
+}
+
 }
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index 0dc536b1593..c77a7a8be0a 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -5,10 +5,17 @@
 #include <Coordination/InMemoryStateManager.h>
 #include <Coordination/NuKeeperStateMachine.h>
 #include <Coordination/TestKeeperStorage.h>
+#include <unordered_map>
 
 namespace DB
 {
 
+struct NuraftError
+{
+    nuraft::cmd_result_code code;
+    std::string message;
+};
+
 class NuKeeperServer
 {
 private:
@@ -20,7 +27,7 @@ private:
 
     std::string endpoint;
 
-    nuraft::ptr<StateMachine> state_machine;
+    nuraft::ptr<NuKeeperStateMachine> state_machine;
 
     nuraft::ptr<nuraft::state_mgr> state_manager;
 
@@ -28,16 +35,26 @@ private:
 
     nuraft::ptr<nuraft::raft_server> raft_instance;
 
-public:
-    NuKeeperServer(int server_id, const std::string & hostname, int port);
+    using XIDToOp = std::unordered_map<Coordination::XID, Coordination::ZooKeeperResponsePtr>;
 
-    void startup();
+    using SessionIDOps = std::unordered_map<int64_t, XIDToOp>;
+
+    SessionIDOps ops_mapping;
+
+    TestKeeperStorage::ResponsesForSessions readZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer);
+
+public:
+    NuKeeperServer(int server_id_, const std::string & hostname_, int port_);
+
+    NuraftError startup();
 
     TestKeeperStorage::ResponsesForSessions putRequests(const TestKeeperStorage::RequestsForSessions & requests);
 
-    void addServer(int server_id_, const std::string & server_uri);
+    int64_t getSessionID();
 
-    void shutdown();
+    NuraftError addServer(int server_id_, const std::string & server_uri);
+
+    NuraftError shutdown();
 };
 
 }
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 136ead44596..79324c91cd3 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -51,15 +51,32 @@ NuKeeperStateMachine::NuKeeperStateMachine()
 nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
 {
     LOG_DEBUG(log, "Commiting logidx {}", log_idx);
-    auto request_for_session = parseRequest(data);
-    TestKeeperStorage::ResponsesForSessions responses_for_sessions;
+    if (data.size() == sizeof(size_t))
     {
-        std::lock_guard lock(storage_lock);
-        responses_for_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id);
+        LOG_DEBUG(log, "Session ID response {}", log_idx);
+        auto response = nuraft::buffer::alloc(sizeof(size_t));
+        nuraft::buffer_serializer bs(response);
+        {
+            std::lock_guard lock(storage_lock);
+            bs.put_i64(storage.getSessionID());
+        }
+        last_committed_idx = log_idx;
+        return response;
     }
+    else
+    {
+        auto request_for_session = parseRequest(data);
+        //LOG_DEBUG(log, "GOT REQUEST {}", Coordination::toString(request_for_session.request->getOpNum()));
+        TestKeeperStorage::ResponsesForSessions responses_for_sessions;
+        {
+            std::lock_guard lock(storage_lock);
+            responses_for_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id);
+        }
+        //LOG_DEBUG(log, "TOTAL RESPONSES {} FIRST XID {}", responses_for_sessions.size(), responses_for_sessions[0].response->xid);
 
-    last_committed_idx = log_idx;
-    return writeResponses(responses_for_sessions);
+        last_committed_idx = log_idx;
+        return writeResponses(responses_for_sessions);
+    }
 }
 
 bool NuKeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
diff --git a/src/Coordination/TestKeeperStorage.cpp b/src/Coordination/TestKeeperStorage.cpp
index ef3ae1dfd16..ef72f5d4eaa 100644
--- a/src/Coordination/TestKeeperStorage.cpp
+++ b/src/Coordination/TestKeeperStorage.cpp
@@ -519,6 +519,7 @@ TestKeeperStorage::ResponsesForSessions TestKeeperStorage::finalize(const Reques
 
     finalized = true;
 
+    /// TODO delete ephemerals
     ResponsesForSessions finalize_results;
     auto finish_watch = [] (const auto & watch_pair) -> ResponsesForSessions
     {
diff --git a/src/Coordination/TestKeeperStorageDispatcher.cpp b/src/Coordination/TestKeeperStorageDispatcher.cpp
index 63cb5920f9b..9cc40f6e5c3 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.cpp
+++ b/src/Coordination/TestKeeperStorageDispatcher.cpp
@@ -18,16 +18,16 @@ void TestKeeperStorageDispatcher::processingThread()
     {
         while (!shutdown)
         {
-            RequestInfo info;
+            TestKeeperStorage::RequestForSession request;
 
             UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds());
 
-            if (requests_queue.tryPop(info, max_wait))
+            if (requests_queue.tryPop(request, max_wait))
             {
                 if (shutdown)
                     break;
 
-                auto responses = storage.processRequest(info.request, info.session_id);
+                auto responses = server.putRequests({request});
                 for (const auto & response_for_session : responses)
                     setResponse(response_for_session.session_id, response_for_session.response);
             }
@@ -67,15 +67,17 @@ void TestKeeperStorageDispatcher::finalize()
             processing_thread.join();
     }
 
-    RequestInfo info;
-    TestKeeperStorage::RequestsForSessions expired_requests;
-    while (requests_queue.tryPop(info))
-        expired_requests.push_back(TestKeeperStorage::RequestForSession{info.session_id, info.request});
+    //TestKeeperStorage::RequestsForSessions expired_requests;
+    //TestKeeperStorage::RequestForSession request;
+    //while (requests_queue.tryPop(request))
+    //    expired_requests.push_back(TestKeeperStorage::RequestForSession{request});
 
-    auto expired_responses = storage.finalize(expired_requests);
+    //auto expired_responses = storage.finalize(expired_requests);
 
-    for (const auto & response_for_session : expired_responses)
-        setResponse(response_for_session.session_id, response_for_session.response);
+    //for (const auto & response_for_session : expired_responses)
+    //    setResponse(response_for_session.session_id, response_for_session.response);
+    /// TODO FIXME
+    server.shutdown();
 }
 
 void TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
@@ -87,8 +89,7 @@ void TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperReques
             throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown session id {}", session_id);
     }
 
-    RequestInfo request_info;
-    request_info.time = clock::now();
+    TestKeeperStorage::RequestForSession request_info;
     request_info.request = request;
     request_info.session_id = session_id;
 
@@ -101,7 +102,9 @@ void TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperReques
 }
 
 TestKeeperStorageDispatcher::TestKeeperStorageDispatcher()
+    : server(1, "localhost", 44444)
 {
+    server.startup();
     processing_thread = ThreadFromGlobalPool([this] { processingThread(); });
 }
 
diff --git a/src/Coordination/TestKeeperStorageDispatcher.h b/src/Coordination/TestKeeperStorageDispatcher.h
index c1c739db87d..ef788a16369 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.h
+++ b/src/Coordination/TestKeeperStorageDispatcher.h
@@ -2,8 +2,9 @@
 
 #include <Common/ThreadPool.h>
 #include <Common/ConcurrentBoundedQueue.h>
-#include <Coordination/TestKeeperStorage.h>
 #include <functional>
+#include <Coordination/NuKeeperServer.h>
+#include <Poco/Util/AbstractConfiguration.h>
 
 namespace DB
 {
@@ -17,16 +18,9 @@ private:
 
     using clock = std::chrono::steady_clock;
 
-    struct RequestInfo
-    {
-        Coordination::ZooKeeperRequestPtr request;
-        clock::time_point time;
-        int64_t session_id;
-    };
-
     std::mutex push_request_mutex;
 
-    using RequestsQueue = ConcurrentBoundedQueue<RequestInfo>;
+    using RequestsQueue = ConcurrentBoundedQueue<TestKeeperStorage::RequestForSession>;
     RequestsQueue requests_queue{1};
     std::atomic<bool> shutdown{false};
     using SessionToResponseCallback = std::unordered_map<int64_t, ZooKeeperResponseCallback>;
@@ -36,7 +30,7 @@ private:
 
     ThreadFromGlobalPool processing_thread;
 
-    TestKeeperStorage storage;
+    NuKeeperServer server;
     std::mutex session_id_mutex;
 
 private:
@@ -46,6 +40,7 @@ private:
 
 public:
     TestKeeperStorageDispatcher();
+
     ~TestKeeperStorageDispatcher();
 
     void putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
@@ -53,7 +48,7 @@ public:
     int64_t getSessionID()
     {
         std::lock_guard lock(session_id_mutex);
-        return storage.getSessionID();
+        return server.getSessionID();
     }
 
     void registerSession(int64_t session_id, ZooKeeperResponseCallback callback);
diff --git a/utils/zookeeper-test/main.cpp b/utils/zookeeper-test/main.cpp
index 8f8aac00866..bfd7df26726 100644
--- a/utils/zookeeper-test/main.cpp
+++ b/utils/zookeeper-test/main.cpp
@@ -127,18 +127,22 @@ void testCreateListWatchEvent(zkutil::ZooKeeper & zk)
 
 void testMultiRequest(zkutil::ZooKeeper & zk)
 {
+    std::cerr << "Testing multi request\n";
     Coordination::Requests requests;
     requests.push_back(zkutil::makeCreateRequest("/data/multirequest", "aaa", zkutil::CreateMode::Persistent));
     requests.push_back(zkutil::makeSetRequest("/data/multirequest", "bbb", -1));
     zk.multi(requests);
+    std::cerr << "Multi executed\n";
 
     try
     {
         requests.clear();
+        std::cerr << "Testing bad multi\n";
         requests.push_back(zkutil::makeCreateRequest("/data/multirequest", "qweqwe", zkutil::CreateMode::Persistent));
         requests.push_back(zkutil::makeSetRequest("/data/multirequest", "bbb", -1));
         requests.push_back(zkutil::makeSetRequest("/data/multirequest", "ccc", -1));
         zk.multi(requests);
+        std::cerr << "Bad multi executed\n";
         std::terminate();
     }
     catch (...)
@@ -147,6 +151,7 @@ void testMultiRequest(zkutil::ZooKeeper & zk)
     }
 
     checkEq(zk, "/data/multirequest", "bbb");
+    std::cerr << "Multi request finished\n";
 }
 
 std::mutex elements_mutex;

From f13a075797b861eae34232e249968aef6c627f05 Mon Sep 17 00:00:00 2001
From: Daria Mozhaeva <dmozhaeva@yandex-team.ru>
Date: Fri, 22 Jan 2021 19:40:58 +0300
Subject: [PATCH 0141/1238] Fixed notes.

---
 .../integrations/embedded-rocksdb.md          | 15 ++++--------
 .../integrations/embedded-rocksdb.md          | 14 ++++-------
 docs/ru/operations/settings/settings.md       | 24 +++++++++----------
 3 files changed, 21 insertions(+), 32 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
index 79e0e040377..95602fa313a 100644
--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@@ -1,4 +1,4 @@
----
+.---
 toc_priority: 6
 toc_title: EmbeddedRocksDB
 ---
@@ -7,8 +7,6 @@ toc_title: EmbeddedRocksDB
 
 This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/).
 
-`EmbeddedRocksDB` lets you:
-
 ## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table}
 
 ``` sql
@@ -23,6 +21,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 Required parameters:
 
 -  `primary_key_name` – any column name in the column list.
+- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `rocksdb key`.
+- columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order.
+- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from `rocksdb`.
 
 Example:
 
@@ -36,10 +37,4 @@ CREATE TABLE test
 )
 ENGINE = EmbeddedRocksDB
 PRIMARY KEY key
-```
-
-## Description {#description}
-
-- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a rocksdb key.
-- columns other than the primary key will be serialized in binary as rocksdb value in corresponding order.
-- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from rocksdb.
+```
\ No newline at end of file
diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
index 575fc279b74..cb59cc9b568 100644
--- a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
@@ -7,8 +7,6 @@ toc_title: EmbeddedRocksDB
 
 Этот движок позволяет интегрировать ClickHouse с [rocksdb](http://rocksdb.org/).
 
-`EmbeddedRocksDB` дает возможность:
-
 ## Создание таблицы {#table_engine-EmbeddedRocksDB-creating-a-table}
 
 ``` sql
@@ -23,7 +21,9 @@ PRIMARY KEY(primary_key_name);
 
 Обязательные параметры:
 
--  `primary_key_name` – любое имя столбца из списка столбцов.
+`primary_key_name` может быть любое имя столбца из списка столбцов.
+Указание первичного ключа `primary key` является обязательным. Он будет сериализован в двоичном формате как ключ `rocksdb`. Поддерживается только один столбец в первичном ключе.
+Столбцы, которые отличаются от первичного ключа, будут сериализованы в двоичном формате как значение `rockdb` в соответствующем порядке. Запросы с фильтрацией по ключу `equals` или `in` оптимизируются для поиска по нескольким ключам из `rocksdb`.
 
 Пример:
 
@@ -37,10 +37,4 @@ CREATE TABLE test
 )
 ENGINE = EmbeddedRocksDB
 PRIMARY KEY key;
-```
-
-## Описание {#description}
-
-- должен быть указан `primary key`, он поддерживает только один столбец в первичном ключе. Первичный ключ будет сериализован в двоичном формате как ключ rocksdb.
-- столбцы, отличные от первичного ключа, будут сериализованы в двоичном формате как значение rockdb в соответствующем порядке.
-- запросы с фильтрацией по ключу `equals` или `in` будут оптимизированы для поиска по нескольким ключам из rocksdb.
+```
\ No newline at end of file
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index b48ca668aa4..ace0ede7c4d 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -406,15 +406,15 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102	2;
 
 Возможные значения:
 
--   `'best_effort'` — включает расширенный парсинг.
+-   `best_effort` — включает расширенный парсинг.
 
-        ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `'2018-06-08T01:02:03.000Z'`.
+        ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `2018-06-08T01:02:03.000Z`.
 
--   `'basic'` — используется базовый парсер.
+-   `basic` — используется базовый парсер.
 
-        ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS`. Например, `'2019-08-20 10:18:56'`.
+        ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS`. Например, `2019-08-20 10:18:56`.
 
-Значение по умолчанию: `'basic'`.
+Значение по умолчанию: `basic`.
 
 См. также:
 
@@ -427,19 +427,19 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102	2;
 
 Возможные значения:
 
--   `'simple'` - простой выходной формат.
+-   `simple` - простой выходной формат.
 
-    Выходные дата и время Clickhouse в формате `YYYY-MM-DD hh:mm:ss`. Например, `'2019-08-20 10:18:56'`. Расчет выполняется в соответствии с часовым поясом типа данных (если он есть) или часовым поясом сервера.
+    Выходные дата и время Clickhouse в формате `YYYY-MM-DD hh:mm:ss`. Например, `2019-08-20 10:18:56`. Расчет выполняется в соответствии с часовым поясом типа данных (если он есть) или часовым поясом сервера.
 
--   `'iso'` - выходной формат ISO.
+-   `iso` - выходной формат ISO.
 
-    Выходные дата и время Clickhouse в формате [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ`. Например, `'2019-08-20T10:18:56Z'`. Обратите внимание, что выходные данные отображаются в формате UTC (`Z` означает UTC).
+    Выходные дата и время Clickhouse в формате [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ`. Например, `2019-08-20T10:18:56Z`. Обратите внимание, что выходные данные отображаются в формате UTC (`Z` означает UTC).
 
--   `'unix_timestamp'` - выходной формат Unix.
+-   `unix_timestamp` - выходной формат Unix.
 
-    Выходные дата и время в формате [Unix](https://en.wikipedia.org/wiki/Unix_time). Например `'1566285536'`.
+    Выходные дата и время в формате [Unix](https://en.wikipedia.org/wiki/Unix_time). Например `1566285536`.
 
-Значение по умолчанию: `'simple'`.
+Значение по умолчанию: `simple`.
 
 См. также:
 

From 7014729aad53ef88aa54ae69d43d28f99dc722db Mon Sep 17 00:00:00 2001
From: Daria Mozhaeva <dmozhaeva@yandex-team.ru>
Date: Fri, 22 Jan 2021 20:11:00 +0300
Subject: [PATCH 0142/1238] add punctuation.

---
 docs/en/operations/settings/settings.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 1ff2ea77fd0..3591ee200e5 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -443,19 +443,19 @@ Allows choosing different output formats of the text representation of date and
 
 Possible values:
 
--   `'simple'` - Simple output format.
+-   `simple` - Simple output format.
 
-    Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `'2019-08-20 10:18:56'`. The calculation is performed according to the data type's time zone (if present) or server time zone.
+    Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone.
 
--   `'iso'` - ISO output format.
+-   `iso` - ISO output format.
 
-    Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `'2019-08-20T10:18:56Z'`. Note that output is in UTC (`Z` means UTC).
+    Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC).
 
--   `'unix_timestamp'` - Unix timestamp output format.
+-   `unix_timestamp` - Unix timestamp output format.
 
-    Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `'1566285536'`.
+    Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`.
 
-Default value: `'simple'`.
+Default value: `simple`.
 
 See also:
 

From 8b03329f4d1589ad0e2ae7dd00d15246a6f95c14 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 22 Jan 2021 23:04:47 +0300
Subject: [PATCH 0143/1238] Some logging

---
 src/Coordination/NuKeeperServer.cpp              | 2 ++
 src/Coordination/WriteBufferFromNuraftBuffer.cpp | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 2aefc215451..7fb7f25aef6 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -127,10 +127,12 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(n
 TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKeeperStorage::RequestsForSessions & requests)
 {
     std::vector<nuraft::ptr<nuraft::buffer>> entries;
+    LOG_DEBUG(&Poco::Logger::get("DEBUG"), "REQUESTS SIZE {}", requests.size());
     for (auto & [session_id, request] : requests)
     {
         ops_mapping[session_id][request->xid] = request->makeResponse();
         entries.push_back(getZooKeeperLogEntry(session_id, request));
+        LOG_DEBUG(&Poco::Logger::get("DEBUG"), "ENTRY SIZE {}", entries.back()->size());
     }
 
     auto result = raft_instance->append_entries(entries);
diff --git a/src/Coordination/WriteBufferFromNuraftBuffer.cpp b/src/Coordination/WriteBufferFromNuraftBuffer.cpp
index 7d0a1dbcbb1..2f451af6538 100644
--- a/src/Coordination/WriteBufferFromNuraftBuffer.cpp
+++ b/src/Coordination/WriteBufferFromNuraftBuffer.cpp
@@ -1,4 +1,5 @@
 #include <Coordination/WriteBufferFromNuraftBuffer.h>
+#include <common/logger_useful.h>
 
 namespace DB
 {
@@ -16,6 +17,7 @@ void WriteBufferFromNuraftBuffer::nextImpl()
     size_t old_size = buffer->size();
     /// pos may not be equal to vector.data() + old_size, because WriteBuffer::next() can be used to flush data
     size_t pos_offset = pos - reinterpret_cast<Position>(buffer->data_begin());
+    LOG_DEBUG(&Poco::Logger::get("DEBUG"), "BUFFER SIZE {}", old_size * size_multiplier);
     nuraft::ptr<nuraft::buffer> new_buffer = nuraft::buffer::alloc(old_size * size_multiplier);
     memcpy(new_buffer->data_begin(), buffer->data_begin(), buffer->size());
     buffer = new_buffer;

From f8f79d5788ad84e396e87b0830a9adfb772fc276 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Sat, 23 Jan 2021 02:03:07 +0300
Subject: [PATCH 0144/1238] tmp -- the tests pass by some miracle

---
 src/Parsers/ASTWindowDefinition.cpp           |   2 +-
 src/Processors/Transforms/WindowTransform.cpp | 707 ++++++++++++++----
 src/Processors/Transforms/WindowTransform.h   | 206 ++++-
 3 files changed, 773 insertions(+), 142 deletions(-)

diff --git a/src/Parsers/ASTWindowDefinition.cpp b/src/Parsers/ASTWindowDefinition.cpp
index c726629d31b..ef28b54b613 100644
--- a/src/Parsers/ASTWindowDefinition.cpp
+++ b/src/Parsers/ASTWindowDefinition.cpp
@@ -54,7 +54,7 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
 
     if (!frame.is_default)
     {
-        const auto name = frame.type == WindowFrame::FrameType::Rows
+        const auto * name = frame.type == WindowFrame::FrameType::Rows
             ? "ROWS" : frame.type == WindowFrame::FrameType::Groups
                 ? "GROUPS" : "RANGE";
 
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index aac7c336c84..1bbbfc3d021 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -32,8 +32,6 @@ WindowTransform::WindowTransform(const Block & input_header_,
 
         workspace.argument_column_indices.reserve(
             workspace.window_function.argument_names.size());
-        workspace.argument_columns.reserve(
-            workspace.window_function.argument_names.size());
         for (const auto & argument_name : workspace.window_function.argument_names)
         {
             workspace.argument_column_indices.push_back(
@@ -53,8 +51,13 @@ WindowTransform::WindowTransform(const Block & input_header_,
         partition_by_indices.push_back(
             input_header.getPositionByName(column.column_name));
     }
-    partition_start_columns.resize(partition_by_indices.size(), nullptr);
-    partition_start_row = 0;
+
+    order_by_indices.reserve(window_description.order_by.size());
+    for (const auto & column : window_description.order_by)
+    {
+        order_by_indices.push_back(
+            input_header.getPositionByName(column.column_name));
+    }
 }
 
 WindowTransform::~WindowTransform()
@@ -67,88 +70,491 @@ WindowTransform::~WindowTransform()
     }
 }
 
-void WindowTransform::transform(Chunk & chunk)
+void WindowTransform::advancePartitionEnd()
 {
-    const size_t num_rows = chunk.getNumRows();
-    auto columns = chunk.detachColumns();
-
-    for (auto & ws : workspaces)
+    if (partition_ended)
     {
-        ws.argument_columns.clear();
-        for (const auto column_index : ws.argument_column_indices)
-        {
-            // Aggregate functions can't work with constant columns, so we have to
-            // materialize them like the Aggregator does.
-            columns[column_index]
-                = std::move(columns[column_index])->convertToFullColumnIfConst();
-
-            ws.argument_columns.push_back(columns[column_index].get());
-        }
-
-        ws.result_column = ws.window_function.aggregate_function->getReturnType()
-            ->createColumn();
+        return;
     }
 
-    // We loop for all window functions for each row. Switching the loops might
-    // be more efficient, because we would run less code and access less data in
-    // the inner loop. If you change this, don't forget to fix the calculation of
-    // partition boundaries. Probably it has to be precalculated and stored as
-    // an array of offsets. An interesting optimization would be to pass it as
-    // an extra column from the previous sorting step -- that step might need to
-    // make similar comparison anyway, if it's sorting only by the PARTITION BY
-    // columns.
-    for (size_t row = 0; row < num_rows; row++)
+    const RowNumber end = blocksEnd();
+
+    // If we're at the total end of data, we must end the partition. This is the
+    // only place in calculations where we need special handling for end of data,
+    // other places will work as usual based on `partition_ended` = true, because
+    // end of data is logically the same as any other end of partition.
+    // We must check this first, because other calculations might not be valid
+    // when we're at the end of data.
+    // FIXME not true, we also handle it elsewhere
+    if (input_is_finished)
     {
-        // Check whether the new partition has started. We have to reset the
-        // aggregate functions when the new partition starts.
-        assert(partition_start_columns.size() == partition_by_indices.size());
-        bool new_partition = false;
-        if (partition_start_columns.empty())
+        partition_ended = true;
+        partition_end = end;
+        return;
+    }
+
+    // Try to advance the partition end pointer.
+    const size_t n = partition_by_indices.size();
+    if (n == 0)
+    {
+        fmt::print(stderr, "no partition by\n");
+        // No PARTITION BY. All input is one partition, which will end when the
+        // input ends.
+        partition_end = end;
+        return;
+    }
+
+    // The partition ends when the PARTITION BY columns change. We need an array
+    // of reference columns for comparison. We might have already dropped the
+    // blocks where the partition starts, but any row in the partition will do.
+    // We can't use group_start or frame_start, because we might have advanced
+    // them to be equal to the partition_end.
+    // Use the row previous to partition_end -- it should be valid.
+    // FIXME group_start is now valid;
+    //auto reference_row = partition_end;
+    //retreatRowNumber(partition_end);
+    auto reference_row = group_start;
+    // assert(reference_row < partition_end);
+    if (reference_row == partition_end)
+    {
+        // This is for the very first partition. Try to get rid of it.
+        advanceRowNumber(partition_end);
+    }
+    assert(reference_row < blocksEnd());
+    assert(reference_row.block >= first_block_number);
+    Columns reference_partition_by;
+    for (const auto i : partition_by_indices)
+    {
+        reference_partition_by.push_back(inputAt(reference_row)[i]);
+    }
+
+    fmt::print(stderr, "{} cols to compare, reference at {}\n", n, group_start);
+
+    for ( ; partition_end < end; advanceRowNumber(partition_end))
+    {
+        // Check for partition end.
+        size_t i = 0;
+        for ( ; i < n; i++)
         {
-            // No PARTITION BY at all, do nothing.
-        }
-        else if (partition_start_columns[0] == nullptr)
-        {
-            // This is the first partition.
-            new_partition = true;
-            partition_start_columns.clear();
-            for (const auto i : partition_by_indices)
+            const auto * c = inputAt(partition_end)[partition_by_indices[i]].get();
+            if (c->compareAt(partition_end.row,
+                    group_start.row, *reference_partition_by[i],
+                    1 /* nan_direction_hint */) != 0)
             {
-                partition_start_columns.push_back(columns[i]);
+                break;
             }
-            partition_start_row = row;
+        }
+
+        if (i < n)
+        {
+//            fmt::print(stderr, "col {} doesn't match at {}: ref {}, val {}\n",
+//                i, partition_end, inputAt(partition_end)[i]);
+            partition_ended = true;
+            return;
+        }
+    }
+
+    // Went until the end of data and didn't find the new partition.
+    assert(!partition_ended && partition_end == blocksEnd());
+}
+
+void WindowTransform::advanceGroupEnd()
+{
+    if (group_ended)
+    {
+        return;
+    }
+
+    switch (window_description.frame.type)
+    {
+        case WindowFrame::FrameType::Groups:
+            advanceGroupEndGroups();
+            break;
+        case WindowFrame::FrameType::Rows:
+            advanceGroupEndRows();
+            break;
+        case WindowFrame::FrameType::Range:
+            advanceGroupEndRange();
+            break;
+    }
+}
+
+void WindowTransform::advanceGroupEndRows()
+{
+    // ROWS mode, peer groups always contains only the current row.
+//    if (group_end == partition_end)
+//    {
+//        // We might be already at the partition_end, if we got to it at the
+//        // previous work() call, but didn't know the partition ended there (it
+//        // was non-final end of data), and in the next work() call (now) we
+//        // discovered that either:
+//        // 1) we won't get more input, or
+//        // 2) we got new data and the new partition really began at this point,
+//        //    which is the beginning of the block.
+//        // Assert these conditions and do nothing.
+//        assert(input_is_finished || partition_end.row == 0);
+//    }
+//    else
+//    {
+//        assert(group_end < partition_end);
+//        advanceRowNumber(group_end);
+//        group_ended = true;
+//    }
+
+    assert(group_ended == false);
+    // We cannot advance the groups if the group start is already beyond the
+    // end of partition.
+    if (group_start == partition_end)
+    {
+        // should it be an assertion?
+        return;
+    }
+
+    assert(group_start < partition_end);
+    group_end = group_start;
+    advanceRowNumber(group_end);
+    group_ended = true;
+}
+
+void WindowTransform::advanceGroupEndRange()
+{
+    assert(false);
+}
+
+void WindowTransform::advanceGroupEndGroups()
+{
+    const size_t n = order_by_indices.size();
+    if (n == 0)
+    {
+        // No ORDER BY, so all rows are the same group. The group will end
+        // with the partition.
+        group_end = partition_end;
+        group_ended = partition_ended;
+    }
+
+    Columns reference_order_by;
+    for (const auto i : order_by_indices)
+    {
+        reference_order_by.push_back(inputAt(group_start)[i]);
+    }
+
+    // `partition_end` is either end of partition or end of data.
+    for ( ; group_end < partition_end; advanceRowNumber(group_end))
+    {
+        // Check for group end.
+        size_t i = 0;
+        for ( ; i < n; i++)
+        {
+            const auto * c = inputAt(partition_end)[partition_by_indices[i]].get();
+            if (c->compareAt(group_end.row,
+                    group_start.row, *reference_order_by[i],
+                    1 /* nan_direction_hint */) != 0)
+            {
+                break;
+            }
+        }
+
+        if (i < n)
+        {
+            group_ended = true;
+            return;
+        }
+    }
+
+    assert(group_end == partition_end);
+    if (partition_ended)
+    {
+        // A corner case -- the ORDER BY columns were the same, but the group
+        // still ended because the partition has ended.
+        group_ended = true;
+    }
+}
+
+void WindowTransform::advanceFrameStart()
+{
+    // Frame start is always UNBOUNDED PRECEDING for now, so we don't have to
+    // move it. It is initialized when the new partition starts.
+}
+
+void WindowTransform::advanceFrameEnd()
+{
+    // This should be called when we know the boundaries of the group (probably
+    // not a fundamental requirement, but currently it's written this way).
+    assert(group_ended);
+
+    const auto frame_end_before = frame_end;
+
+    // Frame end is always the current group end, for now.
+    // In ROWS mode the group is going to contain only the current row.
+    frame_end = group_end;
+    frame_ended = true;
+
+    // Add the columns over which we advanced the frame to the aggregate function
+    // states.
+    std::vector<const IColumn *> argument_columns;
+    for (auto & ws : workspaces)
+    {
+        const auto & f = ws.window_function;
+        const auto * a = f.aggregate_function.get();
+        auto * buf = ws.aggregate_function_state.data();
+
+        // We use two explicit loops here instead of using advanceRowNumber(),
+        // because we want to cache the argument columns array per block. Later
+        // we also use batch add.
+        // Unfortunately this leads to tricky loop conditions, because the
+        // frame_end might be either a past-the-end block, or a valid block, in
+        // which case we also have to process its head.
+        // And we also have to remember to reset the row number when moving to
+        // the next block.
+
+        uint64_t past_the_end_block;
+        // Note that the past-the-end row is not in the past-the-end block, but
+        // in the block before it.
+        uint32_t past_the_end_row;
+
+        if (frame_end.block < first_block_number + blocks.size())
+        {
+            // The past-the-end row is in some valid block.
+            past_the_end_block = frame_end.block + 1;
+            past_the_end_row = frame_end.row;
         }
         else
         {
-            // Check whether the new partition started, by comparing all the
-            // PARTITION BY columns.
-            size_t first_inequal_column = 0;
-            for (; first_inequal_column < partition_start_columns.size();
-                  ++first_inequal_column)
-            {
-                const auto * current_column = columns[
-                    partition_by_indices[first_inequal_column]].get();
+            // The past-the-end row is at the total end of data.
+            past_the_end_block = first_block_number + blocks.size();
+            // It's in the previous block!
+            past_the_end_row = blocks.back().numRows();
+        }
+        for (auto r = frame_end_before;
+            r.block < past_the_end_block;
+            ++r.block, r.row = 0)
+        {
+            const auto & block = blocks[r.block - first_block_number];
 
-                if (current_column->compareAt(row, partition_start_row,
-                    *partition_start_columns[first_inequal_column],
-                    1 /* nan_direction_hint */) != 0)
-                {
-                    break;
-                }
+            argument_columns.clear();
+            for (const auto i : ws.argument_column_indices)
+            {
+                argument_columns.push_back(block.input_columns[i].get());
             }
 
-            if (first_inequal_column < partition_start_columns.size())
+            // We process all rows of intermediate blocks, and the head of the
+            // last block.
+            const auto end = ((r.block + 1) == past_the_end_block)
+                ? past_the_end_row
+                : block.numRows();
+            for ( ; r.row < end; ++r.row)
             {
-                // The new partition has started. Remember where.
-                new_partition = true;
-                partition_start_columns.clear();
-                for (const auto i : partition_by_indices)
-                {
-                    partition_start_columns.push_back(columns[i]);
-                }
-                partition_start_row = row;
+                a->add(buf,
+                    argument_columns.data(),
+                    r.row,
+                    arena.get());
             }
         }
+    }
+}
+
+void WindowTransform::writeOutGroup()
+{
+    fmt::print(stderr, "write out group [{}..{})\n",
+        group_start, group_end);
+
+    // Empty groups don't make sense.
+    assert(group_start < group_end);
+
+    std::vector<const IColumn *> argument_columns;
+    for (size_t wi = 0; wi < workspaces.size(); ++wi)
+    {
+        auto & ws = workspaces[wi];
+        const auto & f = ws.window_function;
+        const auto * a = f.aggregate_function.get();
+        auto * buf = ws.aggregate_function_state.data();
+
+        // Need to use a tricky loop to be able to batch per-block (but we don't
+        // do it yet...). See the comments to the similar loop in
+        // advanceFrameEnd() above.
+        uint64_t past_the_end_block;
+        uint32_t past_the_end_row;
+        if (frame_end.block < first_block_number + blocks.size())
+        {
+            past_the_end_block = frame_end.block + 1;
+            past_the_end_row = frame_end.row;
+        }
+        else
+        {
+            past_the_end_block = first_block_number + blocks.size();
+            past_the_end_row = blocks.back().numRows();
+        }
+        for ( auto r = group_start;
+            r.block < past_the_end_block;
+            ++r.block, r.row = 0)
+        {
+            const auto & block = blocks[r.block - first_block_number];
+
+            argument_columns.clear();
+            for (const auto ai : ws.argument_column_indices)
+            {
+                argument_columns.push_back(block.input_columns[ai].get());
+            }
+
+            // We process all rows of intermediate blocks, and the head of the
+            // last block.
+            const auto end = ((r.block + 1) == past_the_end_block)
+                ? past_the_end_row
+                : block.numRows();
+            for ( ; r.row < end; ++r.row)
+            {
+                // FIXME does it also allocate the result on the arena?
+                // We'll have to pass it out with blocks then...
+                a->insertResultInto(buf,
+                    *block.output_columns[wi],
+                    arena.get());
+            }
+        }
+    }
+
+    first_not_ready_row = group_end;
+}
+
+void WindowTransform::appendChunk(Chunk & chunk)
+{
+    fmt::print(stderr, "new chunk, {} rows, finished={}\n", chunk.getNumRows(),
+        input_is_finished);
+
+    // First, prepare the new input block and add it to the queue. We might not
+    // have it if it's end of data, though.
+    if (!input_is_finished)
+    {
+        blocks.push_back({});
+        auto & block = blocks.back();
+        block.input_columns = chunk.detachColumns();
+
+        for (auto & ws : workspaces)
+        {
+            // Aggregate functions can't work with constant columns, so we have to
+            // materialize them like the Aggregator does.
+            for (const auto column_index : ws.argument_column_indices)
+            {
+                block.input_columns[column_index]
+                    = std::move(block.input_columns[column_index])
+                        ->convertToFullColumnIfConst();
+            }
+
+            block.output_columns.push_back(ws.window_function.aggregate_function
+                ->getReturnType()->createColumn());
+        }
+    }
+
+    // Start the calculations. First, advance the partition end.
+    for (;;)
+    {
+        advancePartitionEnd();
+
+        // Either we ran out of data or we found the end of partition (maybe
+        // both, but this only happens at the total end of data).
+        assert(partition_ended || partition_end == blocksEnd());
+        if (partition_ended && partition_end == blocksEnd())
+        {
+            assert(input_is_finished);
+        }
+
+        fmt::print(stderr, "partition end '{}', {}\n", partition_end,
+            partition_ended);
+
+        // After that, advance the peer groups. We can advance peer groups until
+        // the end of partition or current end of data, which is precisely the
+        // description of `partition_end`.
+        while (group_end < partition_end)
+        {
+            group_start = group_end;
+            advanceGroupEnd();
+
+            fmt::print(stderr, "group end '{}'\n", group_end);
+
+            // If the group didn't end yet, wait.
+            if (!group_ended)
+            {
+                return;
+            }
+
+            // The group ended.
+            // Advance the frame start, updating the state of the aggregate
+            // functions.
+            advanceFrameStart();
+            // Advance the frame end, updating the state of the aggregate
+            // functions.
+            advanceFrameEnd();
+
+            if (!frame_ended)
+            {
+                return;
+            }
+
+            // Write out the aggregation results
+            writeOutGroup();
+
+            // Move to the next group.
+            // The frame will have to be recalculated.
+            frame_ended = false;
+
+            // Move to the next group. Don't advance group_start yet, it's
+            // convenient to use it as the PARTITION BY etalon.
+            group_ended = false;
+
+            if (group_end == partition_end)
+            {
+                break;
+            }
+            assert(group_end < partition_end);
+        }
+
+        if (!partition_ended)
+        {
+            // We haven't encountered the end of the partition yet, need more
+            // data.
+            assert(partition_end == blocksEnd());
+            break;
+        }
+
+        if (input_is_finished)
+        {
+            // why?
+            return;
+        }
+
+        // Start the next partition.
+        const auto new_partition_start = partition_end;
+        advanceRowNumber(partition_end);
+        partition_ended = false;
+        // We have to reset the frame when the new partition starts. This is not a
+        // generally correct way to do so, but we don't really support moving frame
+        // for now.
+        frame_start = new_partition_start;
+        frame_end = new_partition_start;
+        group_start = new_partition_start;
+        group_end = new_partition_start;
+        // The group pointers are already reset to the partition start, see the
+        // above loop.
+
+        fmt::print(stderr, "reinitialize agg data at start of {}\n",
+            new_partition_start);
+        // Reinitialize the aggregate function states because the new partition
+        // has started.
+        for (auto & ws : workspaces)
+        {
+            const auto & f = ws.window_function;
+            const auto * a = f.aggregate_function.get();
+            auto * buf = ws.aggregate_function_state.data();
+
+            a->destroy(buf);
+        }
+
+        // Release the arena we use for aggregate function states, so that it
+        // doesn't grow without limit. Not sure if it's actually correct, maybe
+        // it allocates the return values in the Arena as well...
+        if (arena)
+        {
+            arena = std::make_unique<Arena>();
+        }
 
         for (auto & ws : workspaces)
         {
@@ -156,86 +562,105 @@ void WindowTransform::transform(Chunk & chunk)
             const auto * a = f.aggregate_function.get();
             auto * buf = ws.aggregate_function_state.data();
 
-            if (new_partition)
-            {
-                // Reset the aggregate function states.
-                a->destroy(buf);
-                a->create(buf);
-            }
-
-            // Update the aggregate function state and save the result.
-            a->add(buf,
-                ws.argument_columns.data(),
-                row,
-                arena.get());
-
-            a->insertResultInto(buf,
-                *ws.result_column,
-                arena.get());
+            a->create(buf);
         }
     }
-
-    // We have to release the mutable reference to the result column before we
-    // return this block, or else extra copying may occur when the subsequent
-    // processors modify the block. Workspaces live longer than individual blocks.
-    for (auto & ws : workspaces)
-    {
-        columns.push_back(std::move(ws.result_column));
-    }
-
-    chunk.setColumns(std::move(columns), num_rows);
 }
 
 IProcessor::Status WindowTransform::prepare()
 {
-    /// Check can output.
+    fmt::print(stderr, "prepare, next output {}, not ready row {}, first block {}, hold {} blocks\n",
+        next_output_block_number, first_not_ready_row, first_block_number,
+        blocks.size());
+
     if (output.isFinished())
     {
+        // The consumer asked us not to continue (or we decided it ourselves),
+        // so we abort.
         input.close();
         return Status::Finished;
     }
 
-    if (!output.canPush())
-    {
-        input.setNotNeeded();
-        return Status::PortFull;
-    }
+//    // Technically the past-the-end next_output_block_number is also valid if
+//    // we haven't yet received the corresponding input block.
+//    assert(next_output_block_number < first_block_number + blocks.size()
+//        || blocks.empty());
 
-    /// Output if has data.
-    if (has_output)
-    {
-        output.pushData(std::move(output_data));
-        has_output = false;
+    assert(first_not_ready_row.block >= first_block_number);
+    // Might be past-the-end, so equality also valid.
+    assert(first_not_ready_row.block <= first_block_number + blocks.size());
+    assert(next_output_block_number >= first_block_number);
 
-        return Status::PortFull;
-    }
-
-    /// Check can input.
-    if (!has_input)
+    // Output the ready data prepared by work().
+    // We inspect the calculation state and create the output chunk right here,
+    // because this is pretty lightweight.
+    if (next_output_block_number < first_not_ready_row.block)
     {
-        if (input.isFinished())
+        if (output.canPush())
         {
-            output.finish();
-            return Status::Finished;
+            // Output the ready block.
+            fmt::print(stderr, "output block {}\n", next_output_block_number);
+            const auto i = next_output_block_number - first_block_number;
+            ++next_output_block_number;
+            auto & block = blocks[i];
+            auto columns = block.input_columns;
+            for (auto & res : block.output_columns)
+            {
+                columns.push_back(ColumnPtr(std::move(res)));
+            }
+            output_data.chunk.setColumns(columns, block.numRows());
+
+            output.pushData(std::move(output_data));
+        }
+        else
+        {
+            // Not sure what this branch means. The output port is full and we
+            // apply backoff pressure on the input?
+            input.setNotNeeded();
         }
 
-        input.setNeeded();
+        return Status::PortFull;
+    }
 
-        if (!input.hasData())
-            return Status::NeedData;
+    if (input_is_finished)
+    {
+        // The input data ended at the previous prepare() + work() cycle,
+        // and we don't have ready output data (checked above). We must be
+        // finished.
+        assert(next_output_block_number == first_block_number + blocks.size());
+        assert(first_not_ready_row == blocksEnd());
 
+        // FIXME do we really have to do this?
+        output.finish();
+
+        return Status::Finished;
+    }
+
+    // Consume input data if we have any ready.
+    if (!has_input && input.hasData())
+    {
         input_data = input.pullData(true /* set_not_needed */);
         has_input = true;
 
-        if (input_data.exception)
-        {
-            /// No more data needed. Exception will be thrown (or swallowed) later.
-            input.setNotNeeded();
-        }
+        // Now we have new input and can try to generate more output in work().
+        return Status::Ready;
     }
 
-    /// Now transform.
-    return Status::Ready;
+    // We 1) don't have any ready output (checked above),
+    // 2) don't have any more input (also checked above).
+    // Will we get any more input?
+    if (input.isFinished())
+    {
+        // We won't, time to finalize the calculation in work(). We should only
+        // do this once.
+        assert(!input_is_finished);
+        input_is_finished = true;
+        return Status::Ready;
+    }
+
+    // We have to wait for more input.
+    input.setNeeded();
+    return Status::NeedData;
 }
 
 void WindowTransform::work()
@@ -249,10 +674,12 @@ void WindowTransform::work()
         return;
     }
 
+    assert(has_input || input_is_finished);
+
     try
     {
-        transform(input_data.chunk);
-        output_data.chunk.swap(input_data.chunk);
+        has_input = false;
+        appendChunk(input_data.chunk);
     }
     catch (DB::Exception &)
     {
@@ -262,10 +689,32 @@ void WindowTransform::work()
         return;
     }
 
-    has_input = false;
+    // We don't really have to keep the entire partition, and it can be big, so
+    // we want to drop the starting blocks to save memory.
+    // We can drop the old blocks if we already returned them as output, and the
+    // frame and group are already past them. Note that the frame start can be
+    // further than group start for some frame specs, so we have to check both.
+    // Both pointers can also be at the end of partition, but we need at least
+    // one row before that, so that we can use it as an etalon for finding the
+    // partition boundaries, hence the "-1", and the weird std::max(1, ...)
+    // wrapper is to avoid unsigned overflow.
+    // FIXME the above "-1" is not needed anymore, I changed how we advance the
+    // group_start
+    const auto first_used_block = std::min(next_output_block_number,
+        std::max(1ul, std::min(frame_start.block, group_start.block)) - 1);
+    if (first_block_number < first_used_block)
+    {
+        fmt::print(stderr, "will drop blocks from {} to {}\n", first_block_number,
+            first_used_block);
 
-    if (output_data.chunk)
-        has_output = true;
+        blocks.erase(blocks.begin(),
+            blocks.begin() + first_used_block - first_block_number);
+        first_block_number = first_used_block;
+
+        assert(next_output_block_number >= first_block_number);
+        assert(frame_start.block >= first_block_number);
+        assert(group_start.block >= first_block_number);
+    }
 }
 
 
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index cd2172ab7fb..8acece9fd17 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -5,6 +5,8 @@
 
 #include <Common/AlignedBuffer.h>
 
+#include <deque>
+
 namespace DB
 {
 
@@ -20,14 +22,76 @@ struct WindowFunctionWorkspace
     AlignedBuffer aggregate_function_state;
     std::vector<size_t> argument_column_indices;
 
+    /*
     // Argument and result columns. Be careful, they are per-chunk.
     std::vector<const IColumn *> argument_columns;
     MutableColumnPtr result_column;
+    */
+};
+
+struct WindowTransformBlock
+{
+    Columns input_columns;
+    MutableColumns output_columns;
+
+    // Even in case of `count() over ()` we should have a dummy input column.
+    // Not sure how reliable this is...
+    size_t numRows() const { return input_columns[0]->size(); }
+};
+
+/*
+// Use half the range of the unsigned int data type, to allow wraparound and
+// comparison. I.e. even when the counter overflows we can still tell that it is
+// greater than another counter, unless they are more than half the range apart.
+template <typename T>
+struct Wraparound
+{
+    T value;
+
+    // exclusive?
+    constexpr auto max_value = T(1) << (sizeof(T) * 8 - 1);
+
+    operator T() const { return value; }
+    operator T&() { return value; }
+    bool operator == (const T & other) { return other.value = value; }
+    Wraparound & operator ++ () { value++; return *this; }
+    bool operator < (const T & other) { return value % max_value < other.value % max_value; }
+    Wraparound & operator + (const T & other) { value = value + other.value; return *this; }
+};
+*/
+
+
+struct RowNumber
+{
+    uint64_t block = 0;
+    uint16_t row = 0;
+
+    bool operator < (const RowNumber & other) const
+    {
+        return block < other.block
+            || (block == other.block && row < other.row);
+    }
+
+    bool operator == (const RowNumber & other) const
+    {
+        return block == other.block && row == other.row;
+    }
 };
 
 /*
  * Computes several window functions that share the same window. The input must
  * be sorted correctly for this window (PARTITION BY, then ORDER BY).
+ * We need to track the following pointers:
+ * 1) start of partition -- rows that compare equal w/PARTITION BY.
+ * 2) current frame boundaries.
+ * 3) start of peer group -- rows that compare equal w/ORDER BY (empty ORDER BY
+ *    means all rows are equal).
+ * These row ranges are (almost) nested -- peer group is inside frame inside
+ * partition. The only exception is when the exclusion clause is specified that
+ * excludes current peer group, but we don't support it anyway.
+ * All pointers only move forward.
+ * The value of the function is the same for all rows of the peer group.
+ * (partition [frame {group} ] )
  */
 class WindowTransform : public IProcessor /* public ISimpleTransform */
 {
@@ -51,7 +115,7 @@ public:
     /*
      * (former) Implementation of ISimpleTransform.
      */
-    void transform(Chunk & chunk) /*override*/;
+    void appendChunk(Chunk & chunk) /*override*/;
 
     /*
      * Implementation of IProcessor;
@@ -59,6 +123,75 @@ public:
     Status prepare() override;
     void work() override;
 
+private:
+    void advancePartitionEnd();
+    void advanceGroupEnd();
+    void advanceGroupEndGroups();
+    void advanceGroupEndRows();
+    void advanceGroupEndRange();
+    void advanceFrameStart();
+    void advanceFrameEnd();
+    void writeOutGroup();
+
+    Columns & inputAt(const RowNumber & x)
+    {
+        assert(x.block >= first_block_number);
+        assert(x.block - first_block_number < blocks.size());
+        return blocks[x.block - first_block_number].input_columns;
+    }
+
+    const Columns & inputAt(const RowNumber & x) const
+    { return const_cast<WindowTransform *>(this)->inputAt(x); }
+
+    MutableColumns & outputAt(const RowNumber & x)
+    {
+        assert(x.block >= first_block_number);
+        assert(x.block - first_block_number < blocks.size());
+        return blocks[x.block - first_block_number].output_columns;
+    }
+
+    void advanceRowNumber(RowNumber & x) const
+    {
+        assert(x.block >= first_block_number);
+        assert(x.block - first_block_number < blocks.size());
+
+        const int block_rows = inputAt(x)[0]->size();
+        assert(x.row < block_rows);
+
+        x.row++;
+        if (x.row < block_rows)
+        {
+            return;
+        }
+
+        x.row = 0;
+        ++x.block;
+    }
+
+    void retreatRowNumber(RowNumber & x) const
+    {
+        if (x.row > 0)
+        {
+            --x.row;
+            return;
+        }
+
+        --x.block;
+        assert(x.block >= first_block_number);
+        assert(x.block < first_block_number + blocks.size());
+        assert(inputAt(x)[0]->size() > 0);
+        x.row = inputAt(x)[0]->size() - 1;
+
+#ifndef NDEBUG
+        auto xx = x;
+        advanceRowNumber(xx);
+        assert(xx == x);
+#endif
+    }
+
+    RowNumber blocksEnd() const
+    { return RowNumber{first_block_number + blocks.size(), 0}; }
+
 public:
     /*
      * Data (formerly) inherited from ISimpleTransform.
@@ -67,6 +200,7 @@ public:
     OutputPort & output;
 
     bool has_input = false;
+    bool input_is_finished = false;
     Port::Data input_data;
     bool has_output = false;
     Port::Data output_data;
@@ -80,21 +214,69 @@ public:
 
     // Indices of the PARTITION BY columns in block.
     std::vector<size_t> partition_by_indices;
+    // Indices of the ORDER BY columns in block;
+    std::vector<size_t> order_by_indices;
 
-    // The columns for PARTITION BY and the row in these columns where the
-    // current partition started. They might be in some of the previous blocks,
-    // so we have to keep the shared ownership of the columns. We don't keep the
-    // entire block to save memory, only the needed columns, in the same order
-    // as the partition_by_indices array.
-    // Can be empty if there is no PARTITION BY.
-    // Columns are nullptr when it is the first partition.
-    std::vector<ColumnPtr> partition_start_columns;
-    size_t partition_start_row = 0;
-
-    // Data for computing the window functions.
+    // Per-window-function scratch spaces.
     std::vector<WindowFunctionWorkspace> workspaces;
 
+    // FIXME Reset it when the partition changes. We only save the temporary
+    // states in it (probably?).
     std::unique_ptr<Arena> arena;
+
+    // A sliding window of blocks we currently need. We add the input blocks as
+    // they arrive, and discard the blocks we don't need anymore. The blocks
+    // have an always-incrementing index. The index of the first block is in
+    // `first_block_number`.
+    std::deque<WindowTransformBlock> blocks;
+    uint64_t first_block_number = 0;
+    // The next block we are going to pass to the consumer.
+    uint64_t next_output_block_number = 0;
+    // The first row for which we still haven't calculated the window functions.
+    // Used to determine which resulting blocks we can pass to the consumer.
+    RowNumber first_not_ready_row;
+
+    // We don't keep the pointer to start of partition, because we don't really
+    // need it, and we want to be able to drop the starting blocks to save memory.
+    // The `partition_end` is past-the-end, as usual. When partition_ended = false,
+    // it still haven't ended, and partition_end is the next row to check.
+    RowNumber partition_end;
+    bool partition_ended = false;
+
+    // Current peer group is [group_start, group_end) if group_ended,
+    // [group_start, ?) otherwise.
+    RowNumber group_start;
+    RowNumber group_end;
+    bool group_ended = false;
+
+    // After we have found the final boundaries of the frame, we can immediately
+    // output the result for the current group, w/o waiting for more data.
+    RowNumber frame_start;
+    RowNumber frame_end;
+    bool frame_ended = false;
 };
 
 }
+
+/// See https://fmt.dev/latest/api.html#formatting-user-defined-types
+template <>
+struct fmt::formatter<DB::RowNumber>
+{
+    constexpr auto parse(format_parse_context & ctx)
+    {
+        auto it = ctx.begin();
+        auto end = ctx.end();
+
+        /// Only support {}.
+        if (it != end && *it != '}')
+            throw format_error("invalid format");
+
+        return it;
+    }
+
+    template <typename FormatContext>
+    auto format(const DB::RowNumber & x, FormatContext & ctx)
+    {
+        return format_to(ctx.out(), "{}:{}", x.block, x.row);
+    }
+};

From 5bb3c8941313ab91324f5ee4ba1a7288287852ea Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Sat, 23 Jan 2021 07:38:49 +0300
Subject: [PATCH 0145/1238] CREATE QUOTA and ALTER QUOTA syntax updated.

---
 .../sql-reference/statements/alter/quota.md   | 23 +++++++++++++++---
 .../sql-reference/statements/create/quota.md  | 24 +++++++++++++++++--
 2 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/docs/en/sql-reference/statements/alter/quota.md b/docs/en/sql-reference/statements/alter/quota.md
index 2923fd04c4b..08a36e8598c 100644
--- a/docs/en/sql-reference/statements/alter/quota.md
+++ b/docs/en/sql-reference/statements/alter/quota.md
@@ -5,16 +5,33 @@ toc_title: QUOTA
 
 # ALTER QUOTA {#alter-quota-statement}
 
-Changes quotas.
+Changes [quotas](../../../operations/access-rights.md#quotas-management).
 
 Syntax:
 
 ``` sql
 ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
     [RENAME TO new_name]
-    [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
+    [KEYED BY {NONE | USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
     [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
-        {MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
+        {MAX { {QUERIES | ERRORS | RESULT_ROWS | RESULT_BYTES | READ_ROWS | READ_BYTES | EXECUTION_TIME} = number } [,...] |
         NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
+Multiword key types may be written either with underscores (`CLIENT_KEY`), or with spaces and in simple quotes (`'client key'`). You may also use `'client key or user name'` instead of `CLIENT_KEY, USER_NAME`, and `'client key or ip address'` instead of `CLIENT_KEY, IP_ADDRESS`.
+
+Multiword resource types may be written either with underscores (`RESULT_ROWS`) or without them (`RESULT ROWS`). 
+
+**Examples**
+
+Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
+
+``` sql
+ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER;
+```
+
+For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
+
+``` sql
+ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 MINUTE MAX EXECUTION_TIME = 0.5, FOR INTERVAL 5 QUATER MAX QUERIES = 321, ERRORS = 10 TO default;
+```
diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md
index 20537b98a46..8ae49e4e1a8 100644
--- a/docs/en/sql-reference/statements/create/quota.md
+++ b/docs/en/sql-reference/statements/create/quota.md
@@ -11,15 +11,18 @@ Syntax:
 
 ``` sql
 CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
-    [KEYED BY {'none' | 'user name' | 'ip address' | 'forwarded ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
+    [KEYED BY {NONE | USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
     [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
-        {MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
+        {MAX { {QUERIES | ERRORS | RESULT_ROWS | RESULT_BYTES | READ_ROWS | READ_BYTES | EXECUTION_TIME} = number } [,...] |
          NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
 
 `ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
 
+Multiword key types may be written either with underscores (`CLIENT_KEY`), or with spaces and in simple quotes (`'client key'`). You may also use `'client key or user name'` instead of `CLIENT_KEY, USER_NAME`, and `'client key or ip address'` instead of `CLIENT_KEY, IP_ADDRESS`.
+
+Multiword resource types may be written either with underscores (`RESULT_ROWS`) or without them (`RESULT ROWS`). 
 ## Example {#create-quota-example}
 
 Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
@@ -27,3 +30,20 @@ Limit the maximum number of queries for the current user with 123 queries in 15
 ``` sql
 CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER
 ```
+Multiword key types may be written either with underscores (`CLIENT_KEY`), or with spaces and in simple quotes (`'client key'`). You can also use `'client key or user name'` instead of `CLIENT_KEY, USER_NAME`, and `'client key or ip address'` instead of `CLIENT_KEY, IP_ADDRESS`.
+
+Multiword resource types may be written either with underscores (`RESULT_ROWS`) or without them (`RESULT ROWS`). 
+
+## Examples {#create-quota-example}
+
+Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
+
+``` sql
+CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER;
+```
+
+For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
+
+``` sql
+CREATE QUOTA qB FOR INTERVAL 30 MINUTE MAX EXECUTION_TIME = 0.5, FOR INTERVAL 5 QUATER MAX QUERIES = 321, ERRORS = 10 TO default;
+```

From 42e5c3c41346142428ac03e8d5271e22166363f1 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Sat, 23 Jan 2021 09:27:57 +0300
Subject: [PATCH 0146/1238] Translated to ru

---
 .../sql-reference/statements/alter/quota.md   | 27 +++++++++++++++----
 .../sql-reference/statements/create/quota.md  | 24 ++++++++++++-----
 2 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/docs/ru/sql-reference/statements/alter/quota.md b/docs/ru/sql-reference/statements/alter/quota.md
index 707f56e7cd4..4f524db2a6d 100644
--- a/docs/ru/sql-reference/statements/alter/quota.md
+++ b/docs/ru/sql-reference/statements/alter/quota.md
@@ -5,18 +5,35 @@ toc_title: QUOTA
 
 # ALTER QUOTA {#alter-quota-statement}
 
-Изменяет квоту.
+Изменяет [квоту](../../../operations/access-rights.md#quotas-management).
 
-## Синтаксис {#alter-quota-syntax}
+Синтаксис:
 
 ``` sql
 ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
     [RENAME TO new_name]
-    [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
+    [KEYED BY {NONE | USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
     [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
-        {MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
+        {MAX { {QUERIES | ERRORS | RESULT_ROWS | RESULT_BYTES | READ_ROWS | READ_BYTES | EXECUTION_TIME} = number } [,...] |
         NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
+Поддерживаются два варианта написания составных типов ключей: с подчеркиванием (`CLIENT_KEY`) или через пробел и в одинарных кавычках (`'client key'`). Также можно использовать ключ `'client key or user name'` вместо `CLIENT_KEY, USER_NAME`, и ключ `'client key or ip address'` вместо `CLIENT_KEY, IP_ADDRESS`.
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/quota/) <!--hide-->
\ No newline at end of file
+Поддерживаются также два варианта написания составных типов ресурсов: с подчеркиванием (`RESULT_ROWS`) или без подчеркивания, через пробел (`RESULT ROWS`). 
+
+**Примеры**
+
+Ограничить для текущего пользователя максимальное число запросов — не более 123 запросов за каждые 15 месяцев:
+
+``` sql
+ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER;
+```
+
+Ограничить по умолчанию максимальное время выполнения запроса — не более полсекунды за каждые 30 минут, а также максимальное число запросов — не более 321 и максимальное число ошибок — не более 10 за каждые 5 кварталов:
+
+``` sql
+ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 MINUTE MAX EXECUTION_TIME = 0.5, FOR INTERVAL 5 QUATER MAX QUERIES = 321, ERRORS = 10 TO default;
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/alter/quota/) <!--hide-->
diff --git a/docs/ru/sql-reference/statements/create/quota.md b/docs/ru/sql-reference/statements/create/quota.md
index fe18869bf2e..073c4eda85c 100644
--- a/docs/ru/sql-reference/statements/create/quota.md
+++ b/docs/ru/sql-reference/statements/create/quota.md
@@ -7,23 +7,35 @@ toc_title: "\u041a\u0432\u043e\u0442\u0430"
 
 Создает [квоту](../../../operations/access-rights.md#quotas-management), которая может быть присвоена пользователю или роли.
 
-### Синтаксис {#create-quota-syntax}
+Синтаксис:
 
 ``` sql
 CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
-    [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
+    [KEYED BY {NONE | USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
     [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
-        {MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
+        {MAX { {QUERIES | ERRORS | RESULT_ROWS | RESULT_BYTES | READ_ROWS | READ_BYTES | EXECUTION_TIME} = number } [,...] |
          NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
 
-### Пример {#create-quota-example}
+В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
 
-Ограничить максимальное количество запросов для текущего пользователя до 123 запросов каждые 15 месяцев:
+Поддерживаются два варианта написания составных типов ключей: с подчеркиванием (`CLIENT_KEY`) или через пробел и в одинарных кавычках (`'client key'`). Также можно использовать ключ `'client key or user name'` вместо `CLIENT_KEY, USER_NAME`, и ключ `'client key or ip address'` вместо `CLIENT_KEY, IP_ADDRESS`.
+
+Поддерживаются также два варианта написания составных типов ресурсов: с подчеркиванием (`RESULT_ROWS`) или без подчеркивания, через пробел (`RESULT ROWS`). 
+
+**Примеры**
+
+Ограничить максимальное количество запросов для текущего пользователя — не более 123 запросов за каждые 15 месяцев:
 
 ``` sql
-CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER
+CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER;
+```
+
+Ограничить по умолчанию максимальное время выполнения запроса — не более полсекунды за каждые 30 минут, а также максимальное число запросов — не более 321 и максимальное число ошибок — не более 10 за каждые 5 кварталов:
+
+``` sql
+CREATE QUOTA qB FOR INTERVAL 30 MINUTE MAX EXECUTION_TIME = 0.5, FOR INTERVAL 5 QUATER MAX QUERIES = 321, ERRORS = 10 TO default;
 ```
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/quota) 

From 140bcc4dc3dcffd2f4b86d76ee5041e05fef83c3 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sat, 23 Jan 2021 16:45:05 +0800
Subject: [PATCH 0147/1238] Just to restart the CI test being suspended
 unexpectedly

---
 tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index d66b245dc74..8d4f36a0503 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-# Data preparation
+# Data preparation.
 # When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple
 mkdir -p /var/lib/clickhouse/user_files/
 echo -n aaaaaaaaa > /var/lib/clickhouse/user_files/a.txt

From 154382925902d4d1d764b508bcedbeb477c026c7 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sat, 23 Jan 2021 16:53:43 +0800
Subject: [PATCH 0148/1238] Clean some comments

---
 src/Functions/FunctionFile.cpp | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index afd24f4d575..6b17454619a 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -58,7 +58,6 @@ namespace DB
                 auto & res_chars = res->getChars();
                 auto & res_offsets = res->getOffsets();
 
-                //File access permission check
                 const String user_files_path = context.getUserFilesPath();
                 String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString();
                 Poco::Path poco_filepath = Poco::Path(filename);
@@ -67,27 +66,11 @@ namespace DB
                 const String file_absolute_path = poco_filepath.absolute().toString();
                 checkReadIsAllowed(user_files_absolute_path, file_absolute_path);
 
-                //Method-1: Read file with ReadBuffer
                 ReadBufferFromFile in(file_absolute_path);
                 ssize_t file_len = Poco::File(file_absolute_path).getSize();
                 res_chars.resize_exact(file_len + 1);
                 char *res_buf = reinterpret_cast<char *>(&res_chars[0]);
                 in.readStrict(res_buf, file_len);
-
-                /*
-                //Method-2(Just for reference): Read directly into the String buf, which avoiding one copy from PageCache to ReadBuffer
-                int fd;
-                if (-1 == (fd = open(file_absolute_path.c_str(), O_RDONLY)))
-                     throwFromErrnoWithPath("Cannot open file " + std::string(file_absolute_path), std::string(file_absolute_path),
-                                           errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
-                if (file_len != pread(fd, res_buf, file_len, 0))
-                    throwFromErrnoWithPath("Read failed with " + std::string(file_absolute_path), std::string(file_absolute_path),
-                                           ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
-                if (0 != close(fd))
-                    throw Exception("Cannot close file " + std::string(file_absolute_path), ErrorCodes::CANNOT_CLOSE_FILE);
-                fd = -1;
-                */
-
                 res_offsets.push_back(file_len + 1);
                 res_buf[file_len] = '\0';
 

From c56750c9ceb19abd14bc7961fc0bf4ec0bd4b992 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sat, 23 Jan 2021 21:43:27 +0800
Subject: [PATCH 0149/1238] Remove ErrorCodes unused

---
 src/Functions/FunctionFile.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index 6b17454619a..e4327862982 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -15,10 +15,6 @@ namespace DB
     {
         extern const int ILLEGAL_COLUMN;
         extern const int NOT_IMPLEMENTED;
-        extern const int FILE_DOESNT_EXIST;
-        extern const int CANNOT_OPEN_FILE;
-        extern const int CANNOT_CLOSE_FILE;
-        extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR;
         extern const int INCORRECT_FILE_NAME;
         extern const int DATABASE_ACCESS_DENIED;
     }

From 6d23dd2590e21ac3b07688bc2185450279a15988 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Sat, 23 Jan 2021 23:57:08 +0800
Subject: [PATCH 0150/1238] fix test: to get user_files_path from config

---
 .../01658_read_file_to_stringcolumn.sh        | 38 ++++++++++---------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index 8d4f36a0503..aeaf08cb4d8 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -7,12 +7,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 # Data preparation.
 # When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple
-mkdir -p /var/lib/clickhouse/user_files/
-echo -n aaaaaaaaa > /var/lib/clickhouse/user_files/a.txt
-echo -n bbbbbbbbb > /var/lib/clickhouse/user_files/b.txt
-echo -n ccccccccc > /var/lib/clickhouse/user_files/c.txt
+#user_files_path=$(clickhouse-client --query "select data_path from system.databases where name='default'" | sed -En 's/data\/default/user_files/p')
+user_files_path=$(grep user_files_path ${CLICKHOUSE_CONFIG} | awk '{match($0,"<user_files_path>(.*)</user_files_path>",path); print path[1]}')
+mkdir -p ${user_files_path}/
+echo -n aaaaaaaaa > ${user_files_path}/a.txt
+echo -n bbbbbbbbb > ${user_files_path}/b.txt
+echo -n ccccccccc > ${user_files_path}/c.txt
 echo -n ccccccccc > /tmp/c.txt
-mkdir -p /var/lib/clickhouse/user_files/dir
+mkdir -p ${user_files_path}/dir
 
 ### 1st TEST in CLIENT mode.
 ${CLICKHOUSE_CLIENT} --query "drop table if exists data;"
@@ -20,23 +22,23 @@ ${CLICKHOUSE_CLIENT} --query "create table data (A String, B String) engine=Merg
 
 
 # Valid cases:
-${CLICKHOUSE_CLIENT} --query "select file('/var/lib/clickhouse/user_files/a.txt'), file('/var/lib/clickhouse/user_files/b.txt');";echo ":"$?
-${CLICKHOUSE_CLIENT} --query "insert into data select file('/var/lib/clickhouse/user_files/a.txt'), file('/var/lib/clickhouse/user_files/b.txt');";echo ":"$?
-${CLICKHOUSE_CLIENT} --query "insert into data select file('/var/lib/clickhouse/user_files/a.txt'), file('/var/lib/clickhouse/user_files/b.txt');";echo ":"$?
-${CLICKHOUSE_CLIENT} --query "select file('/var/lib/clickhouse/user_files/c.txt'), * from data";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$?
+${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/c.txt'), * from data";echo ":"$?
 
 
 # Invalid cases: (Here using sub-shell to catch exception avoiding the test quit)
 # Test non-exists file
-echo "clickhouse-client --query "'"select file('"'nonexist.txt'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null
+echo "clickhouse-client --query "'"select file('"'nonexist.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null
 # Test isDir
-echo "clickhouse-client --query "'"select file('"'/var/lib/clickhouse/user_files/dir'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null
+echo "clickhouse-client --query "'"select file('"'${user_files_path}/dir'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null
 # Test path out of the user_files directory. It's not allowed in client mode
-echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null
+echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null
 
 # Test relative path consists of ".." whose absolute path is out of the user_files directory.
-echo "clickhouse-client --query "'"select file('"'/var/lib/clickhouse/user_files/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
-echo "clickhouse-client --query "'"select file('"'../../../../a.txt'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null
+echo "clickhouse-client --query "'"select file('"'${user_files_path}/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
+echo "clickhouse-client --query "'"select file('"'../../../../a.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null
 
 
@@ -74,8 +76,8 @@ echo "clickhouse-local --query "'"select file('"'dir'), file('b.txt')"'";echo :$
 
 # Restore
 rm -rf a.txt b.txt c.txt dir
-rm -rf /var/lib/clickhouse/user_files/a.txt
-rm -rf /var/lib/clickhouse/user_files/b.txt
-rm -rf /var/lib/clickhouse/user_files/c.txt
+rm -rf ${user_files_path}/a.txt
+rm -rf ${user_files_path}/b.txt
+rm -rf ${user_files_path}/c.txt
 rm -rf /tmp/c.txt
-rm -rf /var/lib/clickhouse/user_files/dir
+rm -rf ${user_files_path}/dir

From 39379bcd5c7478995abe1e990fedfd73b094c462 Mon Sep 17 00:00:00 2001
From: Pavel Kruglov <avogar@sandbox-633380738>
Date: Sat, 23 Jan 2021 19:55:29 +0300
Subject: [PATCH 0151/1238] Update performance test

---
 tests/performance/optimized_select_final_one_part.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/optimized_select_final_one_part.xml b/tests/performance/optimized_select_final_one_part.xml
index 3724bc8f208..c7d505c89cb 100644
--- a/tests/performance/optimized_select_final_one_part.xml
+++ b/tests/performance/optimized_select_final_one_part.xml
@@ -13,7 +13,7 @@
 
     <fill_query>OPTIMIZE TABLE optimized_select_final FINAL</fill_query>
 
-    <query>SELECT max(x) FROM optimized_select_final FINAL where s = 'string' FORMAT Null</query>
+    <query>SELECT * FROM optimized_select_final FINAL where s = 'string' FORMAT Null</query>
 
     <drop_query>DROP TABLE IF EXISTS  optimized_select_final</drop_query>
 

From 135426d3cbaa0e6a72547c9ea92d296658f3d774 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Sat, 23 Jan 2021 21:16:59 +0300
Subject: [PATCH 0152/1238] Syntax updated, examples added.

---
 .../mergetree-family/mergetree.md             | 54 +++++++++++++++++--
 .../mergetree-family/mergetree.md             | 54 +++++++++++++++++--
 2 files changed, 99 insertions(+), 9 deletions(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 084d05ec0a0..75fc42b6fc6 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -45,7 +45,10 @@ ORDER BY expr
 [PARTITION BY expr]
 [PRIMARY KEY expr]
 [SAMPLE BY expr]
-[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
+[TTL expr 
+    [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ]
+    [WHERE conditions] 
+    [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ] 
 [SETTINGS name=value, ...]
 ```
 
@@ -455,7 +458,10 @@ ALTER TABLE example_table
 Table can have an expression for removal of expired rows, and multiple expressions for automatic move of parts between [disks or volumes](#table_engine-mergetree-multiple-volumes). When rows in the table expire, ClickHouse deletes all corresponding rows. For parts moving feature, all rows of a part must satisfy the movement expression criteria.
 
 ``` sql
-TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ...
+TTL expr 
+    [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
+    [WHERE conditions] 
+    [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ]   
 ```
 
 Type of TTL rule may follow each TTL expression. It affects an action which is to be done once the expression is satisfied (reaches current time):
@@ -464,9 +470,17 @@ Type of TTL rule may follow each TTL expression. It affects an action which is t
 -   `TO DISK 'aaa'` - move part to the disk `aaa`;
 -   `TO VOLUME 'bbb'` - move part to the disk `bbb`.
 
-Examples:
+With `WHERE` clause you may specify which of the expired rows to delete or move.
 
-Creating a table with TTL
+With `GROUP BY` clause you may [aggregate](../../../sql-reference/aggregate-functions/index.md) expired rows. `GROUP BY` key expression must be a prefix of the table primary key. 
+
+If a column is part of primary key, but not present in `GROUP BY` key expression, in result rows it contains aggregated value across grouped rows. 
+
+If a column is not present neither in primary key, nor in `SET` clause, in result row it contains any occasional value from grouped rows.
+
+**Examples**
+
+Creating a table with TTL:
 
 ``` sql
 CREATE TABLE example_table
@@ -482,13 +496,43 @@ TTL d + INTERVAL 1 MONTH [DELETE],
     d + INTERVAL 2 WEEK TO DISK 'bbb';
 ```
 
-Altering TTL of the table
+Altering TTL of the table:
 
 ``` sql
 ALTER TABLE example_table
     MODIFY TTL d + INTERVAL 1 DAY;
 ```
 
+Creating a table, where the rows are expired after one month. The expired rows where dates are Mondays are deleted:
+
+``` sql
+CREATE TABLE table_with_where
+(
+    d DateTime, 
+    a Int
+)
+ENGINE = MergeTree
+PARTITION BY toYYYYMM(d)
+ORDER BY d
+TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1;
+```
+
+Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value accross the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows.
+
+``` sql
+CREATE TABLE table_for_aggregation
+(
+    d DateTime, 
+    k1 Int, 
+    k2 Int, 
+    x Int, 
+    y Int
+)
+ENGINE = MergeTree
+ORDER BY k1, k2
+TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
+```
+
 **Removing Data**
 
 Data with an expired TTL is removed when ClickHouse merges data parts.
diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
index 9b2a5eafca3..e21d4bc47e2 100644
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@@ -37,7 +37,10 @@ ORDER BY expr
 [PARTITION BY expr]
 [PRIMARY KEY expr]
 [SAMPLE BY expr]
-[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
+[TTL expr 
+    [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ]
+    [WHERE conditions] 
+    [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ] 
 [SETTINGS name=value, ...]
 ```
 
@@ -443,7 +446,10 @@ ALTER TABLE example_table
 Для таблицы можно задать одно выражение для устаревания данных, а также несколько выражений, по срабатывании которых данные переместятся на [некоторый диск или том](#table_engine-mergetree-multiple-volumes). Когда некоторые данные в таблице устаревают, ClickHouse удаляет все соответствующие строки.
 
 ``` sql
-TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ...
+TTL expr 
+    [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
+    [WHERE conditions] 
+    [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] 
 ```
 
 За каждым TTL выражением может следовать тип действия, которое выполняется после достижения времени, соответствующего результату TTL выражения:
@@ -452,7 +458,17 @@ TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ...
 -   `TO DISK 'aaa'` - переместить данные на диск `aaa`;
 -   `TO VOLUME 'bbb'` - переместить данные на том `bbb`.
 
-Примеры:
+В секции `WHERE` можно задать условие удаления или перемещения устаревших строк.
+
+В секции `GROUP BY` можно [агрегировать](../../../sql-reference/aggregate-functions/index.md) данные из устаревших строк. Колонки, по которым агрегируются данные в `GROUP BY`, должны являться префиксом первичного ключа таблицы. 
+
+Если колонка является частью первичного ключа, но не фигурирует в списке полей в `GROUP BY`, в результирующих строках она будет содержать агрегированные данные по сгруппированным строкам. 
+
+Если колонка не является частью первичного ключа и не указана в секции `SET`, в результирующих строках она будет содержать случайное значение, взятое из одной из сгруппированных строк.
+
+**Примеры**
+
+Создание таблицы с TTL: 
 
 ``` sql
 CREATE TABLE example_table
@@ -468,13 +484,43 @@ TTL d + INTERVAL 1 MONTH [DELETE],
     d + INTERVAL 2 WEEK TO DISK 'bbb';
 ```
 
-Изменение TTL
+Изменение TTL:
 
 ``` sql
 ALTER TABLE example_table
     MODIFY TTL d + INTERVAL 1 DAY;
 ```
 
+Создание таблицы, в которой строки устаревают через месяц. Устаревшие строки удаляются, если дата выпадает на понедельник:
+
+``` sql
+CREATE TABLE table_with_where
+(
+    d DateTime, 
+    a Int
+)
+ENGINE = MergeTree
+PARTITION BY toYYYYMM(d)
+ORDER BY d
+TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1;
+```
+
+Создание таблицы, где устаревшие строки агрегируются. В результирующих строках колонка `x` содержит максимальное значение по сгруппированным строкам, `y` — минимальное значение, а `d` — случайное значение из одной из сгуппированных строк.
+
+``` sql
+CREATE TABLE table_for_aggregation
+(
+    d DateTime, 
+    k1 Int, 
+    k2 Int, 
+    x Int, 
+    y Int
+)
+ENGINE = MergeTree
+ORDER BY k1, k2
+TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
+```
+
 **Удаление данных**
 
 Данные с истекшим TTL удаляются, когда ClickHouse мёржит куски данных.

From a671ebf3e9e1f58616e9cdba49dda949ac9fe7d6 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Mon, 25 Jan 2021 11:21:09 +0800
Subject: [PATCH 0153/1238] skip the client test for being unable to get the
 correct user_files_path

---
 .../01658_read_file_to_stringcolumn.reference        | 12 ------------
 .../0_stateless/01658_read_file_to_stringcolumn.sh   |  9 ++++++---
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
index a22076de920..eb5f1795f18 100644
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
@@ -1,15 +1,3 @@
-aaaaaaaaa	bbbbbbbbb
-:0
-:0
-:0
-ccccccccc	aaaaaaaaa	bbbbbbbbb
-ccccccccc	aaaaaaaaa	bbbbbbbbb
-:0
-:107
-:79
-:35
-:35
-:35
 699415
 aaaaaaaaa	bbbbbbbbb
 ccccccccc	aaaaaaaaa	bbbbbbbbb
diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index aeaf08cb4d8..cc8ed3f7294 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -7,8 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 # Data preparation.
 # When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple
-#user_files_path=$(clickhouse-client --query "select data_path from system.databases where name='default'" | sed -En 's/data\/default/user_files/p')
-user_files_path=$(grep user_files_path ${CLICKHOUSE_CONFIG} | awk '{match($0,"<user_files_path>(.*)</user_files_path>",path); print path[1]}')
+user_files_path=$(clickhouse-client --query "select data_path from system.databases where name='default'" | sed -En 's/data\/default/user_files/p')
+#user_files_path=$(grep user_files_path ${CLICKHOUSE_CONFIG} | awk '{match($0,"<user_files_path>(.*)</user_files_path>",path); print path[1]}')
 mkdir -p ${user_files_path}/
 echo -n aaaaaaaaa > ${user_files_path}/a.txt
 echo -n bbbbbbbbb > ${user_files_path}/b.txt
@@ -16,6 +16,9 @@ echo -n ccccccccc > ${user_files_path}/c.txt
 echo -n ccccccccc > /tmp/c.txt
 mkdir -p ${user_files_path}/dir
 
+# Skip the client test part, for being unable to get the correct user_files_path
+if false; then
+
 ### 1st TEST in CLIENT mode.
 ${CLICKHOUSE_CLIENT} --query "drop table if exists data;"
 ${CLICKHOUSE_CLIENT} --query "create table data (A String, B String) engine=MergeTree() order by A;"
@@ -40,7 +43,7 @@ echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('${user_fil
 echo "clickhouse-client --query "'"select file('"'${user_files_path}/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
 echo "clickhouse-client --query "'"select file('"'../../../../a.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null
 
-
+fi
 
 ### 2nd TEST in LOCAL mode.
 

From 7ff04d7532a378315ca91334d8e98630ccef29a0 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 25 Jan 2021 13:19:02 +0300
Subject: [PATCH 0154/1238] Some fixes

---
 src/Coordination/LoggerWrapper.h                 |  6 ++++--
 src/Coordination/NuKeeperServer.cpp              |  4 +---
 src/Coordination/NuKeeperStateMachine.cpp        |  4 ++--
 src/Coordination/WriteBufferFromNuraftBuffer.cpp | 15 +++++++++------
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/src/Coordination/LoggerWrapper.h b/src/Coordination/LoggerWrapper.h
index 37de7806e9d..5895457441a 100644
--- a/src/Coordination/LoggerWrapper.h
+++ b/src/Coordination/LoggerWrapper.h
@@ -11,7 +11,9 @@ class LoggerWrapper : public nuraft::logger
 public:
     LoggerWrapper(const std::string & name)
         : log(&Poco::Logger::get(name))
-    {}
+    {
+        set_level(4);
+    }
 
     void put_details(
         int level,
@@ -25,7 +27,7 @@ public:
 
     void set_level(int level) override
     {
-        level = std::max(6, std::min(1, level));
+        level = std::min(6, std::max(1, level));
         log->setLevel(level);
     }
 
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 7fb7f25aef6..16f69585af7 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -39,7 +39,7 @@ NuraftError NuKeeperServer::startup()
     params.election_timeout_lower_bound_ = 200;
     params.election_timeout_upper_bound_ = 400;
     params.reserved_log_items_ = 5;
-    params.snapshot_distance_ = 5;
+    params.snapshot_distance_ = 50;
     params.client_req_timeout_ = 3000;
     params.return_method_ = nuraft::raft_params::blocking;
 
@@ -127,12 +127,10 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(n
 TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKeeperStorage::RequestsForSessions & requests)
 {
     std::vector<nuraft::ptr<nuraft::buffer>> entries;
-    LOG_DEBUG(&Poco::Logger::get("DEBUG"), "REQUESTS SIZE {}", requests.size());
     for (auto & [session_id, request] : requests)
     {
         ops_mapping[session_id][request->xid] = request->makeResponse();
         entries.push_back(getZooKeeperLogEntry(session_id, request));
-        LOG_DEBUG(&Poco::Logger::get("DEBUG"), "ENTRY SIZE {}", entries.back()->size());
     }
 
     auto result = raft_instance->append_entries(entries);
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 79324c91cd3..69088d09472 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -50,7 +50,7 @@ NuKeeperStateMachine::NuKeeperStateMachine()
 
 nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
 {
-    LOG_DEBUG(log, "Commiting logidx {}", log_idx);
+    //LOG_DEBUG(log, "Commiting logidx {}", log_idx);
     if (data.size() == sizeof(size_t))
     {
         LOG_DEBUG(log, "Session ID response {}", log_idx);
@@ -72,9 +72,9 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
             std::lock_guard lock(storage_lock);
             responses_for_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id);
         }
-        //LOG_DEBUG(log, "TOTAL RESPONSES {} FIRST XID {}", responses_for_sessions.size(), responses_for_sessions[0].response->xid);
 
         last_committed_idx = log_idx;
+        //LOG_DEBUG(log, "TOTAL RESPONSES {} FIRST XID {} FOR LOG IDX {}", responses_for_sessions.size(), responses_for_sessions[0].response->xid, log_idx);
         return writeResponses(responses_for_sessions);
     }
 }
diff --git a/src/Coordination/WriteBufferFromNuraftBuffer.cpp b/src/Coordination/WriteBufferFromNuraftBuffer.cpp
index 2f451af6538..1a16b7cef24 100644
--- a/src/Coordination/WriteBufferFromNuraftBuffer.cpp
+++ b/src/Coordination/WriteBufferFromNuraftBuffer.cpp
@@ -14,15 +14,18 @@ void WriteBufferFromNuraftBuffer::nextImpl()
     if (is_finished)
         throw Exception("WriteBufferFromNuraftBuffer is finished", ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER);
 
-    size_t old_size = buffer->size();
     /// pos may not be equal to vector.data() + old_size, because WriteBuffer::next() can be used to flush data
     size_t pos_offset = pos - reinterpret_cast<Position>(buffer->data_begin());
-    LOG_DEBUG(&Poco::Logger::get("DEBUG"), "BUFFER SIZE {}", old_size * size_multiplier);
-    nuraft::ptr<nuraft::buffer> new_buffer = nuraft::buffer::alloc(old_size * size_multiplier);
-    memcpy(new_buffer->data_begin(), buffer->data_begin(), buffer->size());
-    buffer = new_buffer;
+    size_t old_size = buffer->size();
+    if (pos_offset == old_size)
+    {
+        nuraft::ptr<nuraft::buffer> new_buffer = nuraft::buffer::alloc(old_size * size_multiplier);
+        memcpy(new_buffer->data_begin(), buffer->data_begin(), buffer->size());
+        buffer = new_buffer;
+    }
     internal_buffer = Buffer(reinterpret_cast<Position>(buffer->data_begin() + pos_offset), reinterpret_cast<Position>(buffer->data_begin() + buffer->size()));
     working_buffer = internal_buffer;
+
 }
 
 WriteBufferFromNuraftBuffer::WriteBufferFromNuraftBuffer()
@@ -38,7 +41,7 @@ void WriteBufferFromNuraftBuffer::finalize()
         return;
 
     is_finished = true;
-    size_t real_size = position() - reinterpret_cast<Position>(buffer->data_begin());
+    size_t real_size = pos - reinterpret_cast<Position>(buffer->data_begin());
     nuraft::ptr<nuraft::buffer> new_buffer = nuraft::buffer::alloc(real_size);
     memcpy(new_buffer->data_begin(), buffer->data_begin(), real_size);
     buffer = new_buffer;

From dea4b5009bb716e53f8b1b84548ad5e0497574c6 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 25 Jan 2021 15:29:12 +0300
Subject: [PATCH 0155/1238] Some server initialization

---
 programs/server/Server.cpp                    |  4 +-
 src/Common/ErrorCodes.cpp                     |  1 +
 src/Coordination/InMemoryLogStore.cpp         |  8 +--
 src/Coordination/NuKeeperServer.cpp           | 40 +++++++-----
 src/Coordination/NuKeeperServer.h             | 12 +---
 src/Coordination/NuKeeperStateMachine.cpp     |  4 --
 .../TestKeeperStorageDispatcher.cpp           | 61 ++++++++++++++-----
 .../TestKeeperStorageDispatcher.h             | 10 +--
 src/Interpreters/Context.cpp                  | 17 +++++-
 src/Interpreters/Context.h                    |  1 +
 src/Server/TestKeeperTCPHandler.cpp           |  4 +-
 tests/config/config.d/test_keeper_port.xml    |  8 +++
 12 files changed, 114 insertions(+), 56 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 66a9b700e89..ddd72e97dde 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -842,8 +842,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
         listen_try = true;
     }
 
-    /// Initialize test keeper raft
-    global_context->getTestKeeperStorageDispatcher();
+    /// Initialize test keeper RAFT. Do nothing if no test_keeper_server in config.
+    global_context->initializeTestKeeperStorageDispatcher();
 
     for (const auto & listen_host : listen_hosts)
     {
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index a2cd65137c0..1c398a52666 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -533,6 +533,7 @@
     M(564, INTERSERVER_SCHEME_DOESNT_MATCH) \
     M(565, TOO_MANY_PARTITIONS) \
     M(566, CANNOT_RMDIR) \
+    M(567, RAFT_ERROR) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Coordination/InMemoryLogStore.cpp b/src/Coordination/InMemoryLogStore.cpp
index b9e2e502fc7..101458891e7 100644
--- a/src/Coordination/InMemoryLogStore.cpp
+++ b/src/Coordination/InMemoryLogStore.cpp
@@ -34,7 +34,7 @@ size_t InMemoryLogStore::next_slot() const
 
 nuraft::ptr<nuraft::log_entry> InMemoryLogStore::last_entry() const
 {
-    ulong next_idx = next_slot();
+    size_t next_idx = next_slot();
     std::lock_guard<std::mutex> lock(logs_lock);
     auto entry = logs.find(next_idx - 1);
     if (entry == logs.end())
@@ -105,7 +105,7 @@ nuraft::ptr<nuraft::log_entry> InMemoryLogStore::entry_at(size_t index)
 
 size_t InMemoryLogStore::term_at(size_t index)
 {
-    ulong term = 0;
+    size_t term = 0;
     {
         std::lock_guard<std::mutex> l(logs_lock);
         auto entry = logs.find(index);
@@ -121,7 +121,7 @@ nuraft::ptr<nuraft::buffer> InMemoryLogStore::pack(size_t index, Int32 cnt)
     std::vector<nuraft::ptr<nuraft::buffer>> returned_logs;
 
     size_t size_total = 0;
-    for (ulong ii = index; ii < index + cnt; ++ii)
+    for (size_t ii = index; ii < index + cnt; ++ii)
     {
         ptr<log_entry> le = nullptr;
         {
@@ -180,7 +180,7 @@ void InMemoryLogStore::apply_pack(size_t index, nuraft::buffer & pack)
 bool InMemoryLogStore::compact(size_t last_log_index)
 {
     std::lock_guard<std::mutex> l(logs_lock);
-    for (ulong ii = start_idx; ii <= last_log_index; ++ii)
+    for (size_t ii = start_idx; ii <= last_log_index; ++ii)
     {
         auto entry = logs.find(ii);
         if (entry != logs.end())
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 16f69585af7..c79cdd64014 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -13,6 +13,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int TIMEOUT_EXCEEDED;
+    extern const int RAFT_ERROR;
+}
 
 NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, int port_)
     : server_id(server_id_)
@@ -24,22 +29,22 @@ NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, in
 {
 }
 
-NuraftError NuKeeperServer::addServer(int server_id_, const std::string & server_uri_)
+bool NuKeeperServer::addServer(int server_id_, const std::string & server_uri_)
 {
     nuraft::srv_config config(server_id_, server_uri_);
     auto ret1 = raft_instance->add_srv(config);
-    return NuraftError{ret1->get_result_code(), ret1->get_result_str()};
+    return ret1->get_result_code() == nuraft::cmd_result_code::OK;
 }
 
 
-NuraftError NuKeeperServer::startup()
+void NuKeeperServer::startup()
 {
     nuraft::raft_params params;
     params.heart_beat_interval_ = 100;
     params.election_timeout_lower_bound_ = 200;
     params.election_timeout_upper_bound_ = 400;
-    params.reserved_log_items_ = 5;
-    params.snapshot_distance_ = 50;
+    params.reserved_log_items_ = 5000;
+    params.snapshot_distance_ = 5000;
     params.client_req_timeout_ = 3000;
     params.return_method_ = nuraft::raft_params::blocking;
 
@@ -48,25 +53,26 @@ NuraftError NuKeeperServer::startup()
         nuraft::asio_service::options{}, params);
 
     if (!raft_instance)
-        return NuraftError{nuraft::cmd_result_code::TIMEOUT, "Cannot create RAFT instance"};
+        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
 
     static constexpr auto MAX_RETRY = 30;
     for (size_t i = 0; i < MAX_RETRY; ++i)
     {
         if (raft_instance->is_initialized())
-            return NuraftError{nuraft::cmd_result_code::OK, ""};
+            return;
 
         std::this_thread::sleep_for(std::chrono::milliseconds(100));
     }
 
-    return NuraftError{nuraft::cmd_result_code::TIMEOUT, "Cannot start RAFT instance"};
+    throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot start RAFT server within startup timeout");
 }
 
-NuraftError NuKeeperServer::shutdown()
+TestKeeperStorage::ResponsesForSessions NuKeeperServer::shutdown(const TestKeeperStorage::RequestsForSessions & expired_requests)
 {
+    auto responses = putRequests(expired_requests);
     if (!launcher.shutdown(5))
-        return NuraftError{nuraft::cmd_result_code::TIMEOUT, "Temout waiting RAFT instance to shutdown"};
-    return NuraftError{nuraft::cmd_result_code::OK, ""};
+        LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Failed to shutdown RAFT server in {} seconds", 5);
+    return responses;
 }
 
 namespace
@@ -96,6 +102,7 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(n
         int64_t zxid;
         Coordination::Error err;
 
+        /// FIXME (alesap) We don't need to parse responses here
         Coordination::read(length, buf);
         Coordination::read(xid, buf);
         Coordination::read(zxid, buf);
@@ -135,10 +142,10 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKe
 
     auto result = raft_instance->append_entries(entries);
     if (!result->get_accepted())
-        return {};
+        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot send requests to RAFT, mostly because we are not leader");
 
     if (result->get_result_code() != nuraft::cmd_result_code::OK)
-        return {};
+        throw Exception(ErrorCodes::RAFT_ERROR, "Requests failed");
 
     return readZooKeeperResponses(result->get());
 }
@@ -146,16 +153,17 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKe
 
 int64_t NuKeeperServer::getSessionID()
 {
-    auto entry = nuraft::buffer::alloc(sizeof(size_t));
+    auto entry = nuraft::buffer::alloc(sizeof(int64_t));
+    /// Just special session request
     nuraft::buffer_serializer bs(entry);
     bs.put_i64(0);
 
     auto result = raft_instance->append_entries({entry});
     if (!result->get_accepted())
-        return -1;
+        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot send session_id request to RAFT");
 
     if (result->get_result_code() != nuraft::cmd_result_code::OK)
-        return -1;
+        throw Exception(ErrorCodes::RAFT_ERROR, "session_id request failed to RAFT");
 
     auto resp = result->get();
     nuraft::buffer_serializer bs_resp(resp);
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index c77a7a8be0a..6f2ca72eae5 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -10,12 +10,6 @@
 namespace DB
 {
 
-struct NuraftError
-{
-    nuraft::cmd_result_code code;
-    std::string message;
-};
-
 class NuKeeperServer
 {
 private:
@@ -46,15 +40,15 @@ private:
 public:
     NuKeeperServer(int server_id_, const std::string & hostname_, int port_);
 
-    NuraftError startup();
+    void startup();
 
     TestKeeperStorage::ResponsesForSessions putRequests(const TestKeeperStorage::RequestsForSessions & requests);
 
     int64_t getSessionID();
 
-    NuraftError addServer(int server_id_, const std::string & server_uri);
+    bool addServer(int server_id_, const std::string & server_uri);
 
-    NuraftError shutdown();
+    TestKeeperStorage::ResponsesForSessions shutdown(const TestKeeperStorage::RequestsForSessions & expired_requests);
 };
 
 }
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 69088d09472..13c0f92e604 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -50,7 +50,6 @@ NuKeeperStateMachine::NuKeeperStateMachine()
 
 nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
 {
-    //LOG_DEBUG(log, "Commiting logidx {}", log_idx);
     if (data.size() == sizeof(size_t))
     {
         LOG_DEBUG(log, "Session ID response {}", log_idx);
@@ -66,7 +65,6 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
     else
     {
         auto request_for_session = parseRequest(data);
-        //LOG_DEBUG(log, "GOT REQUEST {}", Coordination::toString(request_for_session.request->getOpNum()));
         TestKeeperStorage::ResponsesForSessions responses_for_sessions;
         {
             std::lock_guard lock(storage_lock);
@@ -74,7 +72,6 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
         }
 
         last_committed_idx = log_idx;
-        //LOG_DEBUG(log, "TOTAL RESPONSES {} FIRST XID {} FOR LOG IDX {}", responses_for_sessions.size(), responses_for_sessions[0].response->xid, log_idx);
         return writeResponses(responses_for_sessions);
     }
 }
@@ -98,7 +95,6 @@ bool NuKeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
 
 nuraft::ptr<nuraft::snapshot> NuKeeperStateMachine::last_snapshot()
 {
-    LOG_DEBUG(log, "Trying to get last snapshot");
    // Just return the latest snapshot.
     std::lock_guard<std::mutex> lock(snapshots_lock);
     auto entry = snapshots.rbegin();
diff --git a/src/Coordination/TestKeeperStorageDispatcher.cpp b/src/Coordination/TestKeeperStorageDispatcher.cpp
index 9cc40f6e5c3..120e3b2aae6 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.cpp
+++ b/src/Coordination/TestKeeperStorageDispatcher.cpp
@@ -27,7 +27,7 @@ void TestKeeperStorageDispatcher::processingThread()
                 if (shutdown)
                     break;
 
-                auto responses = server.putRequests({request});
+                auto responses = server->putRequests({request});
                 for (const auto & response_for_session : responses)
                     setResponse(response_for_session.session_id, response_for_session.response);
             }
@@ -67,26 +67,27 @@ void TestKeeperStorageDispatcher::finalize()
             processing_thread.join();
     }
 
-    //TestKeeperStorage::RequestsForSessions expired_requests;
-    //TestKeeperStorage::RequestForSession request;
-    //while (requests_queue.tryPop(request))
-    //    expired_requests.push_back(TestKeeperStorage::RequestForSession{request});
+    if (server)
+    {
+        TestKeeperStorage::RequestsForSessions expired_requests;
+        TestKeeperStorage::RequestForSession request;
+        while (requests_queue.tryPop(request))
+            expired_requests.push_back(TestKeeperStorage::RequestForSession{request});
 
-    //auto expired_responses = storage.finalize(expired_requests);
+        auto expired_responses = server->shutdown(expired_requests);
 
-    //for (const auto & response_for_session : expired_responses)
-    //    setResponse(response_for_session.session_id, response_for_session.response);
-    /// TODO FIXME
-    server.shutdown();
+        for (const auto & response_for_session : expired_responses)
+            setResponse(response_for_session.session_id, response_for_session.response);
+    }
 }
 
-void TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
+bool TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
 {
 
     {
         std::lock_guard lock(session_to_response_callback_mutex);
         if (session_to_response_callback.count(session_id) == 0)
-            throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown session id {}", session_id);
+            return false;
     }
 
     TestKeeperStorage::RequestForSession request_info;
@@ -99,13 +100,43 @@ void TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperReques
         requests_queue.push(std::move(request_info));
     else if (!requests_queue.tryPush(std::move(request_info), operation_timeout.totalMilliseconds()))
         throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
+    return true;
 }
 
-TestKeeperStorageDispatcher::TestKeeperStorageDispatcher()
-    : server(1, "localhost", 44444)
+
+void TestKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration & config)
 {
-    server.startup();
+    int myid = config.getInt("test_keeper_server.server_id");
+    std::string myhostname;
+    int myport;
+
+    Poco::Util::AbstractConfiguration::Keys keys;
+    config.keys("test_keeper_server.raft_configuration", keys);
+
+    std::vector<std::tuple<int, std::string, int>> server_configs;
+    for (const auto & server_key : keys)
+    {
+        int server_id = config.getInt("test_keeper_server.raft_configuration." + server_key + ".id");
+        std::string hostname = config.getString("test_keeper_server.raft_configuration." + server_key + ".hostname");
+        int port = config.getInt("test_keeper_server.raft_configuration." + server_key + ".port");
+        if (server_id == myid)
+        {
+            myhostname = hostname;
+            myport = port;
+        }
+        else
+        {
+            server_configs.emplace_back(server_id, hostname, port);
+        }
+    }
+
+    server = std::make_unique<NuKeeperServer>(myid, myhostname, myport);
+    server->startup();
+    for (const auto & [id, hostname, port] : server_configs)
+        server->addServer(id, hostname + ":" + std::to_string(port));
+
     processing_thread = ThreadFromGlobalPool([this] { processingThread(); });
+
 }
 
 TestKeeperStorageDispatcher::~TestKeeperStorageDispatcher()
diff --git a/src/Coordination/TestKeeperStorageDispatcher.h b/src/Coordination/TestKeeperStorageDispatcher.h
index ef788a16369..aa220beecf2 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.h
+++ b/src/Coordination/TestKeeperStorageDispatcher.h
@@ -30,7 +30,7 @@ private:
 
     ThreadFromGlobalPool processing_thread;
 
-    NuKeeperServer server;
+    std::unique_ptr<NuKeeperServer> server;
     std::mutex session_id_mutex;
 
 private:
@@ -39,16 +39,18 @@ private:
     void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
 
 public:
-    TestKeeperStorageDispatcher();
+    TestKeeperStorageDispatcher() = default;
+
+    void initialize(const Poco::Util::AbstractConfiguration & config);
 
     ~TestKeeperStorageDispatcher();
 
-    void putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
+    bool putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
 
     int64_t getSessionID()
     {
         std::lock_guard lock(session_id_mutex);
-        return server.getSessionID();
+        return server->getSessionID();
     }
 
     void registerSession(int64_t session_id, ZooKeeperResponseCallback callback);
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 5f49a85843c..ee5be5f6edb 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1568,11 +1568,26 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const
     return shared->zookeeper;
 }
 
+void Context::initializeTestKeeperStorageDispatcher() const
+{
+    std::lock_guard lock(shared->test_keeper_storage_dispatcher_mutex);
+
+    if (shared->test_keeper_storage_dispatcher)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize TestKeeper multiple times");
+
+    auto & config = getConfigRef();
+    if (config.has("test_keeper_server"))
+    {
+        shared->test_keeper_storage_dispatcher = std::make_shared<TestKeeperStorageDispatcher>();
+        shared->test_keeper_storage_dispatcher->initialize(config);
+    }
+}
+
 std::shared_ptr<TestKeeperStorageDispatcher> & Context::getTestKeeperStorageDispatcher() const
 {
     std::lock_guard lock(shared->test_keeper_storage_dispatcher_mutex);
     if (!shared->test_keeper_storage_dispatcher)
-        shared->test_keeper_storage_dispatcher = std::make_shared<TestKeeperStorageDispatcher>();
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "TestKeeper must be initialized before requests");
 
     return shared->test_keeper_storage_dispatcher;
 }
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 5f3f6b25256..537ddcc0ec8 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -574,6 +574,7 @@ public:
     std::shared_ptr<zkutil::ZooKeeper> getAuxiliaryZooKeeper(const String & name) const;
 
 
+    void initializeTestKeeperStorageDispatcher() const;
     std::shared_ptr<TestKeeperStorageDispatcher> & getTestKeeperStorageDispatcher() const;
 
     /// Set auxiliary zookeepers configuration at server starting or configuration reloading.
diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp
index 97999c2b1c1..3e88d543112 100644
--- a/src/Server/TestKeeperTCPHandler.cpp
+++ b/src/Server/TestKeeperTCPHandler.cpp
@@ -30,6 +30,7 @@ namespace ErrorCodes
     extern const int SYSTEM_ERROR;
     extern const int LOGICAL_ERROR;
     extern const int UNEXPECTED_PACKET_FROM_CLIENT;
+    extern const int TIMEOUT_EXCEEDED;
 }
 
 struct PollResult
@@ -423,7 +424,8 @@ std::pair<Coordination::OpNum, Coordination::XID> TestKeeperTCPHandler::receiveR
     request->xid = xid;
     request->readImpl(*in);
 
-    test_keeper_storage_dispatcher->putRequest(request, session_id);
+    if (!test_keeper_storage_dispatcher->putRequest(request, session_id))
+        throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Sesssion {} already disconnected", session_id);
     return std::make_pair(opnum, xid);
 }
 
diff --git a/tests/config/config.d/test_keeper_port.xml b/tests/config/config.d/test_keeper_port.xml
index 79e993b41f7..fff60d749f6 100644
--- a/tests/config/config.d/test_keeper_port.xml
+++ b/tests/config/config.d/test_keeper_port.xml
@@ -3,5 +3,13 @@
         <tcp_port>9181</tcp_port>
         <operation_timeout_ms>10000</operation_timeout_ms>
         <session_timeout_ms>30000</session_timeout_ms>
+        <server_id>1</server_id>
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>localhost</hostname>
+                <port>44444</port>
+            </server>
+        </raft_configuration>
     </test_keeper_server>
 </yandex>

From 97b9dba460529d254a8416a80ae82f80bda302ac Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 25 Jan 2021 17:10:18 +0300
Subject: [PATCH 0156/1238] Multinode config

---
 programs/server/Server.cpp                    |  7 +++-
 src/Coordination/NuKeeperServer.cpp           | 11 ++++--
 src/Coordination/NuKeeperServer.h             |  6 ++-
 .../TestKeeperStorageDispatcher.cpp           | 16 +++++---
 .../configs/enable_test_keeper.xml            |  8 ++++
 .../test_testkeeper_multinode/__init__.py     |  1 +
 .../configs/enable_test_keeper1.xml           | 28 +++++++++++++
 .../configs/enable_test_keeper2.xml           | 28 +++++++++++++
 .../configs/enable_test_keeper3.xml           | 28 +++++++++++++
 .../configs/log_conf.xml                      | 12 ++++++
 .../configs/use_test_keeper.xml               |  8 ++++
 .../test_testkeeper_multinode/test.py         | 39 +++++++++++++++++++
 12 files changed, 179 insertions(+), 13 deletions(-)
 create mode 100644 tests/integration/test_testkeeper_multinode/__init__.py
 create mode 100644 tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
 create mode 100644 tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
 create mode 100644 tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
 create mode 100644 tests/integration/test_testkeeper_multinode/configs/log_conf.xml
 create mode 100644 tests/integration/test_testkeeper_multinode/configs/use_test_keeper.xml
 create mode 100644 tests/integration/test_testkeeper_multinode/test.py

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index ddd72e97dde..04919e8504c 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -842,8 +842,11 @@ int Server::main(const std::vector<std::string> & /*args*/)
         listen_try = true;
     }
 
-    /// Initialize test keeper RAFT. Do nothing if no test_keeper_server in config.
-    global_context->initializeTestKeeperStorageDispatcher();
+    if (config().has("test_keeper_server"))
+    {
+        /// Initialize test keeper RAFT. Do nothing if no test_keeper_server in config.
+        global_context->initializeTestKeeperStorageDispatcher();
+    }
 
     for (const auto & listen_host : listen_hosts)
     {
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index c79cdd64014..a3786342e05 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -19,19 +19,20 @@ namespace ErrorCodes
     extern const int RAFT_ERROR;
 }
 
-NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, int port_)
+NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, int port_, bool can_become_leader_)
     : server_id(server_id_)
     , hostname(hostname_)
     , port(port_)
     , endpoint(hostname + ":" + std::to_string(port))
+    , can_become_leader(can_become_leader_)
     , state_machine(nuraft::cs_new<NuKeeperStateMachine>())
     , state_manager(nuraft::cs_new<InMemoryStateManager>(server_id, endpoint))
 {
 }
 
-bool NuKeeperServer::addServer(int server_id_, const std::string & server_uri_)
+bool NuKeeperServer::addServer(int server_id_, const std::string & server_uri_, bool can_become_leader_)
 {
-    nuraft::srv_config config(server_id_, server_uri_);
+    nuraft::srv_config config(server_id_, 0, server_uri_, "", /*FIXME follower=*/ !can_become_leader_);
     auto ret1 = raft_instance->add_srv(config);
     return ret1->get_result_code() == nuraft::cmd_result_code::OK;
 }
@@ -69,7 +70,9 @@ void NuKeeperServer::startup()
 
 TestKeeperStorage::ResponsesForSessions NuKeeperServer::shutdown(const TestKeeperStorage::RequestsForSessions & expired_requests)
 {
-    auto responses = putRequests(expired_requests);
+    TestKeeperStorage::ResponsesForSessions responses;
+    if (can_become_leader)
+        responses = putRequests(expired_requests);
     if (!launcher.shutdown(5))
         LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Failed to shutdown RAFT server in {} seconds", 5);
     return responses;
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index 6f2ca72eae5..4c10614cd5c 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -21,6 +21,8 @@ private:
 
     std::string endpoint;
 
+    bool can_become_leader;
+
     nuraft::ptr<NuKeeperStateMachine> state_machine;
 
     nuraft::ptr<nuraft::state_mgr> state_manager;
@@ -38,7 +40,7 @@ private:
     TestKeeperStorage::ResponsesForSessions readZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer);
 
 public:
-    NuKeeperServer(int server_id_, const std::string & hostname_, int port_);
+    NuKeeperServer(int server_id_, const std::string & hostname_, int port_, bool can_become_leader_);
 
     void startup();
 
@@ -46,7 +48,7 @@ public:
 
     int64_t getSessionID();
 
-    bool addServer(int server_id_, const std::string & server_uri);
+    bool addServer(int server_id_, const std::string & server_uri, bool can_become_leader_);
 
     TestKeeperStorage::ResponsesForSessions shutdown(const TestKeeperStorage::RequestsForSessions & expired_requests);
 };
diff --git a/src/Coordination/TestKeeperStorageDispatcher.cpp b/src/Coordination/TestKeeperStorageDispatcher.cpp
index 120e3b2aae6..7c78ca0e79f 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.cpp
+++ b/src/Coordination/TestKeeperStorageDispatcher.cpp
@@ -112,28 +112,34 @@ void TestKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigura
 
     Poco::Util::AbstractConfiguration::Keys keys;
     config.keys("test_keeper_server.raft_configuration", keys);
+    bool my_can_become_leader = true;
 
-    std::vector<std::tuple<int, std::string, int>> server_configs;
+    std::vector<std::tuple<int, std::string, int, bool>> server_configs;
     for (const auto & server_key : keys)
     {
         int server_id = config.getInt("test_keeper_server.raft_configuration." + server_key + ".id");
         std::string hostname = config.getString("test_keeper_server.raft_configuration." + server_key + ".hostname");
         int port = config.getInt("test_keeper_server.raft_configuration." + server_key + ".port");
+        bool can_become_leader = config.getBool("test_keeper_server.raft_configuration." + server_key + ".can_become_leader", true);
         if (server_id == myid)
         {
             myhostname = hostname;
             myport = port;
+            my_can_become_leader = can_become_leader;
         }
         else
         {
-            server_configs.emplace_back(server_id, hostname, port);
+            server_configs.emplace_back(server_id, hostname, port, can_become_leader);
         }
     }
 
-    server = std::make_unique<NuKeeperServer>(myid, myhostname, myport);
+    server = std::make_unique<NuKeeperServer>(myid, myhostname, myport, my_can_become_leader);
     server->startup();
-    for (const auto & [id, hostname, port] : server_configs)
-        server->addServer(id, hostname + ":" + std::to_string(port));
+    if (my_can_become_leader)
+    {
+        for (const auto & [id, hostname, port, can_become_leader] : server_configs)
+            server->addServer(id, hostname + ":" + std::to_string(port), can_become_leader);
+    }
 
     processing_thread = ThreadFromGlobalPool([this] { processingThread(); });
 
diff --git a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
index 79e993b41f7..fff60d749f6 100644
--- a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
+++ b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
@@ -3,5 +3,13 @@
         <tcp_port>9181</tcp_port>
         <operation_timeout_ms>10000</operation_timeout_ms>
         <session_timeout_ms>30000</session_timeout_ms>
+        <server_id>1</server_id>
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>localhost</hostname>
+                <port>44444</port>
+            </server>
+        </raft_configuration>
     </test_keeper_server>
 </yandex>
diff --git a/tests/integration/test_testkeeper_multinode/__init__.py b/tests/integration/test_testkeeper_multinode/__init__.py
new file mode 100644
index 00000000000..e5a0d9b4834
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
new file mode 100644
index 00000000000..486942aec71
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
@@ -0,0 +1,28 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <operation_timeout_ms>10000</operation_timeout_ms>
+        <session_timeout_ms>30000</session_timeout_ms>
+        <server_id>1</server_id>
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>false</can_become_leader>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>false</can_become_leader>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
new file mode 100644
index 00000000000..94873883943
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
@@ -0,0 +1,28 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <operation_timeout_ms>10000</operation_timeout_ms>
+        <session_timeout_ms>30000</session_timeout_ms>
+        <server_id>2</server_id>
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>false</can_become_leader>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>false</can_become_leader>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
new file mode 100644
index 00000000000..0219a0e5763
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
@@ -0,0 +1,28 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <operation_timeout_ms>10000</operation_timeout_ms>
+        <session_timeout_ms>30000</session_timeout_ms>
+        <server_id>3</server_id>
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>false</can_become_leader>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>false</can_become_leader>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode/configs/log_conf.xml b/tests/integration/test_testkeeper_multinode/configs/log_conf.xml
new file mode 100644
index 00000000000..318a6bca95d
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode/configs/log_conf.xml
@@ -0,0 +1,12 @@
+<yandex>
+    <shutdown_wait_unfinished>3</shutdown_wait_unfinished>
+    <logger>
+        <level>trace</level>
+        <log>/var/log/clickhouse-server/log.log</log>
+        <errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
+        <size>1000M</size>
+        <count>10</count>
+        <stderr>/var/log/clickhouse-server/stderr.log</stderr>
+        <stdout>/var/log/clickhouse-server/stdout.log</stdout>
+    </logger>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode/configs/use_test_keeper.xml b/tests/integration/test_testkeeper_multinode/configs/use_test_keeper.xml
new file mode 100644
index 00000000000..20d731b8553
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode/configs/use_test_keeper.xml
@@ -0,0 +1,8 @@
+<yandex>
+    <zookeeper>
+        <node index="1">
+            <host>node1</host>
+            <port>9181</port>
+        </node>
+    </zookeeper>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
new file mode 100644
index 00000000000..d76e72ee92e
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -0,0 +1,39 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+import random
+import string
+import os
+import time
+from multiprocessing.dummy import Pool
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'])
+node2 = cluster.add_instance('node2', main_configs=['configs/enable_test_keeper2.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'])
+node3 = cluster.add_instance('node3', main_configs=['configs/enable_test_keeper3.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'])
+
+from kazoo.client import KazooClient
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_simple_replicated_table(started_cluster):
+
+    for i, node in enumerate([node1, node2, node3]):
+        node.query("CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{}') ORDER BY tuple()".format(i + 1))
+
+    node2.query("INSERT INTO t SELECT number FROM numbers(10)")
+
+    node1.query("SYSTEM SYNC REPLICA t", timeout=10)
+    node3.query("SYSTEM SYNC REPLICA t", timeout=10)
+
+    assert node1.query("SELECT COUNT() FROM t") == "10\n"
+    assert node2.query("SELECT COUNT() FROM t") == "10\n"
+    assert node3.query("SELECT COUNT() FROM t") == "10\n"

From 39c99edfe5db0a65537eab183d1eb676f035f313 Mon Sep 17 00:00:00 2001
From: Daria Mozhaeva <dmozhaeva@yandex-team.ru>
Date: Mon, 25 Jan 2021 17:13:29 +0300
Subject: [PATCH 0157/1238] edit eng ver

---
 docs/en/engines/table-engines/integrations/embedded-rocksdb.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
index 95602fa313a..b1d21cc5f00 100644
--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@@ -1,4 +1,4 @@
-.---
+---
 toc_priority: 6
 toc_title: EmbeddedRocksDB
 ---

From 1576800289f1fbb5d222b4192d625c670d93ebe1 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 25 Jan 2021 17:36:06 +0300
Subject: [PATCH 0158/1238] Remove races

---
 src/Coordination/tests/gtest_for_build.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index 0c7ff8a579c..d74eaafba27 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -330,6 +330,11 @@ TEST(CoordinationTest, TestStorageSerialization)
     EXPECT_EQ(new_storage.ephemerals[1].size(), 1);
 }
 
+/// Code with obvious races, but I don't want to make it
+/// more complex to avoid races.
+#if defined(__has_feature)
+#  if ! __has_feature(thread_sanitizer)
+
 TEST(CoordinationTest, TestNuKeeperRaft)
 {
     NuKeeperRaftServer s1(1, "localhost", 44447);
@@ -448,3 +453,6 @@ TEST(CoordinationTest, TestNuKeeperRaft)
     s3.launcher.shutdown(5);
     s4.launcher.shutdown(5);
 }
+
+#  endif
+#endif

From d7e805ad99565a1f19d02f9d43ca7c2f2ca0f07f Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 25 Jan 2021 17:47:03 +0300
Subject: [PATCH 0159/1238] Comment

---
 src/Coordination/SummingStateMachine.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Coordination/SummingStateMachine.h b/src/Coordination/SummingStateMachine.h
index df343378408..20d6258eb0b 100644
--- a/src/Coordination/SummingStateMachine.h
+++ b/src/Coordination/SummingStateMachine.h
@@ -9,6 +9,7 @@
 namespace DB
 {
 
+/// Example trivial state machine.
 class SummingStateMachine : public nuraft::state_machine
 {
 public:

From 43a2aae3686718ed6d09be6d5659b9492d53755e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 25 Jan 2021 17:59:10 +0300
Subject: [PATCH 0160/1238] Add non working cmake

---
 src/Coordination/ya.make | 25 +++++++++++++++++++++++++
 src/ya.make              |  1 +
 2 files changed, 26 insertions(+)

diff --git a/src/Coordination/ya.make b/src/Coordination/ya.make
index e69de29bb2d..de2be9df7ac 100644
--- a/src/Coordination/ya.make
+++ b/src/Coordination/ya.make
@@ -0,0 +1,25 @@
+# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
+OWNER(g:clickhouse)
+
+LIBRARY()
+
+PEERDIR(
+    clickhouse/src/Common
+    contrib/libs/NuRaft
+)
+
+
+SRCS(
+    InMemoryLogStore.cpp
+    InMemoryStateManager.cpp
+    NuKeeperServer.cpp
+    NuKeeperStateMachine.cpp
+    SummingStateMachine.cpp
+    TestKeeperStorage.cpp
+    TestKeeperStorageDispatcher.cpp
+    TestKeeperStorageSerializer.cpp
+    WriteBufferFromNuraftBuffer.cpp
+
+)
+
+END()
diff --git a/src/ya.make b/src/ya.make
index c3e6b41b9b9..5361c8a5695 100644
--- a/src/ya.make
+++ b/src/ya.make
@@ -9,6 +9,7 @@ PEERDIR(
     clickhouse/src/Columns
     clickhouse/src/Common
     clickhouse/src/Compression
+    clickhouse/src/Coordination
     clickhouse/src/Core
     clickhouse/src/Databases
     clickhouse/src/DataStreams

From 574454c2702c720edafd2eac4d5bb4930233f3b1 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Mon, 25 Jan 2021 18:07:09 +0300
Subject: [PATCH 0161/1238] cleanup

---
 src/Processors/Transforms/WindowTransform.cpp | 100 ++++++------------
 src/Processors/Transforms/WindowTransform.h   |  26 +----
 2 files changed, 32 insertions(+), 94 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 1bbbfc3d021..c893af42ec9 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -97,7 +97,7 @@ void WindowTransform::advancePartitionEnd()
     const size_t n = partition_by_indices.size();
     if (n == 0)
     {
-        fmt::print(stderr, "no partition by\n");
+//        fmt::print(stderr, "no partition by\n");
         // No PARTITION BY. All input is one partition, which will end when the
         // input ends.
         partition_end = end;
@@ -107,17 +107,14 @@ void WindowTransform::advancePartitionEnd()
     // The partition ends when the PARTITION BY columns change. We need an array
     // of reference columns for comparison. We might have already dropped the
     // blocks where the partition starts, but any row in the partition will do.
-    // We can't use group_start or frame_start, because we might have advanced
-    // them to be equal to the partition_end.
-    // Use the row previous to partition_end -- it should be valid.
-    // FIXME group_start is now valid;
-    //auto reference_row = partition_end;
-    //retreatRowNumber(partition_end);
+    // Use group_start -- it's always in the valid region, because it points to
+    // the start of the current group, which we haven't fully processed yet, and
+    // hence cannot drop.
     auto reference_row = group_start;
-    // assert(reference_row < partition_end);
     if (reference_row == partition_end)
     {
-        // This is for the very first partition. Try to get rid of it.
+        // This is for the very first partition and its first row. Try to get
+        // rid of this logic.
         advanceRowNumber(partition_end);
     }
     assert(reference_row < blocksEnd());
@@ -128,13 +125,13 @@ void WindowTransform::advancePartitionEnd()
         reference_partition_by.push_back(inputAt(reference_row)[i]);
     }
 
-    fmt::print(stderr, "{} cols to compare, reference at {}\n", n, group_start);
+//    fmt::print(stderr, "{} cols to compare, reference at {}\n", n, group_start);
 
-    for ( ; partition_end < end; advanceRowNumber(partition_end))
+    for (; partition_end < end; advanceRowNumber(partition_end))
     {
         // Check for partition end.
         size_t i = 0;
-        for ( ; i < n; i++)
+        for (; i < n; i++)
         {
             const auto * c = inputAt(partition_end)[partition_by_indices[i]].get();
             if (c->compareAt(partition_end.row,
@@ -174,7 +171,7 @@ void WindowTransform::advanceGroupEnd()
             advanceGroupEndRows();
             break;
         case WindowFrame::FrameType::Range:
-            advanceGroupEndRange();
+            assert(false);
             break;
     }
 }
@@ -182,45 +179,14 @@ void WindowTransform::advanceGroupEnd()
 void WindowTransform::advanceGroupEndRows()
 {
     // ROWS mode, peer groups always contains only the current row.
-//    if (group_end == partition_end)
-//    {
-//        // We might be already at the partition_end, if we got to it at the
-//        // previous work() call, but didn't know the partition ended there (it
-//        // was non-final end of data), and in the next work() call (now) we
-//        // discovered that either:
-//        // 1) we won't get more input, or
-//        // 2) we got new data and the new partition really began at this point,
-//        //    which is the beginning of the block.
-//        // Assert these conditions and do nothing.
-//        assert(input_is_finished || partition_end.row == 0);
-//    }
-//    else
-//    {
-//        assert(group_end < partition_end);
-//        advanceRowNumber(group_end);
-//        group_ended = true;
-//    }
-
-    assert(group_ended == false);
     // We cannot advance the groups if the group start is already beyond the
     // end of partition.
-    if (group_start == partition_end)
-    {
-        // should it be an assertion?
-        return;
-    }
-
     assert(group_start < partition_end);
     group_end = group_start;
     advanceRowNumber(group_end);
     group_ended = true;
 }
 
-void WindowTransform::advanceGroupEndRange()
-{
-    assert(false);
-}
-
 void WindowTransform::advanceGroupEndGroups()
 {
     const size_t n = order_by_indices.size();
@@ -239,11 +205,11 @@ void WindowTransform::advanceGroupEndGroups()
     }
 
     // `partition_end` is either end of partition or end of data.
-    for ( ; group_end < partition_end; advanceRowNumber(group_end))
+    for (; group_end < partition_end; advanceRowNumber(group_end))
     {
         // Check for group end.
         size_t i = 0;
-        for ( ; i < n; i++)
+        for (; i < n; i++)
         {
             const auto * c = inputAt(partition_end)[partition_by_indices[i]].get();
             if (c->compareAt(group_end.row,
@@ -342,7 +308,7 @@ void WindowTransform::advanceFrameEnd()
             const auto end = ((r.block + 1) == past_the_end_block)
                 ? past_the_end_row
                 : block.numRows();
-            for ( ; r.row < end; ++r.row)
+            for (; r.row < end; ++r.row)
             {
                 a->add(buf,
                     argument_columns.data(),
@@ -355,8 +321,8 @@ void WindowTransform::advanceFrameEnd()
 
 void WindowTransform::writeOutGroup()
 {
-    fmt::print(stderr, "write out group [{}..{})\n",
-        group_start, group_end);
+//    fmt::print(stderr, "write out group [{}..{})\n",
+//        group_start, group_end);
 
     // Empty groups don't make sense.
     assert(group_start < group_end);
@@ -401,7 +367,7 @@ void WindowTransform::writeOutGroup()
             const auto end = ((r.block + 1) == past_the_end_block)
                 ? past_the_end_row
                 : block.numRows();
-            for ( ; r.row < end; ++r.row)
+            for (; r.row < end; ++r.row)
             {
                 // FIXME does it also allocate the result on the arena?
                 // We'll have to pass it out with blocks then...
@@ -417,8 +383,8 @@ void WindowTransform::writeOutGroup()
 
 void WindowTransform::appendChunk(Chunk & chunk)
 {
-    fmt::print(stderr, "new chunk, {} rows, finished={}\n", chunk.getNumRows(),
-        input_is_finished);
+//    fmt::print(stderr, "new chunk, {} rows, finished={}\n", chunk.getNumRows(),
+//        input_is_finished);
 
     // First, prepare the new input block and add it to the queue. We might not
     // have it if it's end of data, though.
@@ -457,8 +423,8 @@ void WindowTransform::appendChunk(Chunk & chunk)
             assert(input_is_finished);
         }
 
-        fmt::print(stderr, "partition end '{}', {}\n", partition_end,
-            partition_ended);
+//        fmt::print(stderr, "partition end '{}', {}\n", partition_end,
+//            partition_ended);
 
         // After that, advance the peer groups. We can advance peer groups until
         // the end of partition or current end of data, which is precisely the
@@ -468,7 +434,7 @@ void WindowTransform::appendChunk(Chunk & chunk)
             group_start = group_end;
             advanceGroupEnd();
 
-            fmt::print(stderr, "group end '{}'\n", group_end);
+//            fmt::print(stderr, "group end '{}'\n", group_end);
 
             // If the group didn't end yet, wait.
             if (!group_ended)
@@ -535,8 +501,8 @@ void WindowTransform::appendChunk(Chunk & chunk)
         // The group pointers are already reset to the partition start, see the
         // above loop.
 
-        fmt::print(stderr, "reinitialize agg data at start of {}\n",
-            new_partition_start);
+//        fmt::print(stderr, "reinitialize agg data at start of {}\n",
+//            new_partition_start);
         // Reinitialize the aggregate function states because the new partition
         // has started.
         for (auto & ws : workspaces)
@@ -569,9 +535,9 @@ void WindowTransform::appendChunk(Chunk & chunk)
 
 IProcessor::Status WindowTransform::prepare()
 {
-    fmt::print(stderr, "prepare, next output {}, not ready row {}, first block {}, hold {} blocks\n",
-        next_output_block_number, first_not_ready_row, first_block_number,
-        blocks.size());
+//    fmt::print(stderr, "prepare, next output {}, not ready row {}, first block {}, hold {} blocks\n",
+//        next_output_block_number, first_not_ready_row, first_block_number,
+//        blocks.size());
 
     if (output.isFinished())
     {
@@ -599,7 +565,7 @@ IProcessor::Status WindowTransform::prepare()
         if (output.canPush())
         {
             // Output the ready block.
-            fmt::print(stderr, "output block {}\n", next_output_block_number);
+//            fmt::print(stderr, "output block {}\n", next_output_block_number);
             const auto i = next_output_block_number - first_block_number;
             ++next_output_block_number;
             auto & block = blocks[i];
@@ -694,18 +660,12 @@ void WindowTransform::work()
     // We can drop the old blocks if we already returned them as output, and the
     // frame and group are already past them. Note that the frame start can be
     // further than group start for some frame specs, so we have to check both.
-    // Both pointers can also be at the end of partition, but we need at least
-    // one row before that, so that we can use it as an etalon for finding the
-    // partition boundaries, hence the "-1", and the weird std::max(1, ...)
-    // wrapper is to avoid unsigned overflow.
-    // FIXME the above "-1" is not needed anymore, I changed how we advance the
-    // group_start
     const auto first_used_block = std::min(next_output_block_number,
-        std::max(1ul, std::min(frame_start.block, group_start.block)) - 1);
+            std::min(frame_start.block, group_start.block));
     if (first_block_number < first_used_block)
     {
-        fmt::print(stderr, "will drop blocks from {} to {}\n", first_block_number,
-            first_used_block);
+//        fmt::print(stderr, "will drop blocks from {} to {}\n", first_block_number,
+//            first_used_block);
 
         blocks.erase(blocks.begin(),
             blocks.begin() + first_used_block - first_block_number);
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 8acece9fd17..d81914fe6f8 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -39,32 +39,10 @@ struct WindowTransformBlock
     size_t numRows() const { return input_columns[0]->size(); }
 };
 
-/*
-// Use half the range of the unsigned int data type, to allow wraparound and
-// comparison. I.e. even when the counter overflows we can still tell that it is
-// greater than another counter, unless they are more than half the range apart.
-template <typename T>
-struct Wraparound
-{
-    T value;
-
-    // exclusive?
-    constexpr auto max_value = T(1) << (sizeof(T) * 8 - 1);
-
-    operator T() const { return value; }
-    operator T&() { return value; }
-    bool operator == (const T & other) { return other.value = value; }
-    Wraparound & operator ++ () { value++; return *this; }
-    bool operator < (const T & other) { return value % max_value < other.value % max_value; }
-    Wraparound & operator + (const T & other) { value = value + other.value; return *this; }
-};
-*/
-
-
 struct RowNumber
 {
     uint64_t block = 0;
-    uint16_t row = 0;
+    uint64_t row = 0;
 
     bool operator < (const RowNumber & other) const
     {
@@ -155,7 +133,7 @@ private:
         assert(x.block >= first_block_number);
         assert(x.block - first_block_number < blocks.size());
 
-        const int block_rows = inputAt(x)[0]->size();
+        const auto block_rows = inputAt(x)[0]->size();
         assert(x.row < block_rows);
 
         x.row++;

From eccd9a29de5498998d957697531ae37db8b8a39f Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 25 Jan 2021 18:32:59 +0300
Subject: [PATCH 0162/1238] Build NuRaft even in fast test

---
 docker/test/fasttest/run.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 7211ce31a87..cf4a5031f8b 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -163,6 +163,7 @@ function clone_submodules
             contrib/xz
             contrib/dragonbox
             contrib/fast_float
+            contrib/NuRaft
         )
 
         git submodule sync

From 46ca832aa1a75cb9d20f631169501cc4cf0f0b13 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 25 Jan 2021 18:53:13 +0300
Subject: [PATCH 0163/1238] Enable nuraft in fast test

---
 docker/test/fasttest/run.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index cf4a5031f8b..b1ebd97a78c 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -183,6 +183,7 @@ function run_cmake
         "-DENABLE_EMBEDDED_COMPILER=0"
         "-DENABLE_THINLTO=0"
         "-DUSE_UNWIND=1"
+        "-DENABLE_NURAFT=1"
     )
 
     # TODO remove this? we don't use ccache anyway. An option would be to download it

From 045935151f37e628f44b17ad0048d60e98827d9c Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 25 Jan 2021 19:09:22 +0300
Subject: [PATCH 0164/1238] Bump


From 3146a1a9542b16d3e56730ca6aa289d23fd70689 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Mon, 25 Jan 2021 21:59:23 +0300
Subject: [PATCH 0165/1238] fix

---
 docker/test/stress/stress                       |  7 +++++--
 src/Interpreters/DDLTask.cpp                    |  2 +-
 src/Interpreters/DDLWorker.cpp                  | 17 +++++++++++++----
 src/Interpreters/InterpreterCreateQuery.cpp     |  2 +-
 .../test_materialize_mysql_database/test.py     |  2 +-
 5 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/docker/test/stress/stress b/docker/test/stress/stress
index 458f78fcdb4..c530f605da7 100755
--- a/docker/test/stress/stress
+++ b/docker/test/stress/stress
@@ -22,12 +22,15 @@ def get_options(i):
     if 0 < i:
         options += " --order=random"
 
-    if i % 2 == 1:
+    if i % 3 == 1:
         options += " --db-engine=Ordinary"
 
+    if i % 3 == 2:
+        options += ''' --db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i)
+
     # If database name is not specified, new database is created for each functional test.
     # Run some threads with one database for all tests.
-    if i % 3 == 1:
+    if i % 2 == 1:
         options += " --database=test_{}".format(i)
 
     if i == 13:
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 3d9297880c1..fd2de014581 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -140,7 +140,7 @@ bool DDLTask::findCurrentHostID(const Context & global_context, Poco::Logger * l
 
 void DDLTask::setClusterInfo(const Context & context, Poco::Logger * log)
 {
-    auto query_on_cluster = dynamic_cast<ASTQueryWithOnCluster *>(query.get());
+    auto * query_on_cluster = dynamic_cast<ASTQueryWithOnCluster *>(query.get());
     if (!query_on_cluster)
         throw Exception("Received unknown DDL query", ErrorCodes::UNKNOWN_TYPE_OF_QUERY);
 
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 91a5309bb5d..fc72e4d8366 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -201,11 +201,7 @@ void DDLWorker::shutdown()
     stop_flag = true;
     queue_updated_event->set();
     cleanup_event->set();
-}
 
-DDLWorker::~DDLWorker()
-{
-    shutdown();
     worker_pool.reset();
     if (main_thread.joinable())
         main_thread.join();
@@ -213,6 +209,11 @@ DDLWorker::~DDLWorker()
         cleanup_thread.join();
 }
 
+DDLWorker::~DDLWorker()
+{
+    shutdown();
+}
+
 
 ZooKeeperPtr DDLWorker::tryGetZooKeeper() const
 {
@@ -490,9 +491,14 @@ void DDLWorker::processTask(DDLTaskBase & task)
             }
 
             if (task.execute_on_leader)
+            {
                 tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper);
+            }
             else
+            {
+                storage.reset();
                 tryExecuteQuery(rewritten_query, task);
+            }
         }
         catch (const Coordination::Exception &)
         {
@@ -892,6 +898,7 @@ void DDLWorker::initializeMainThread()
         {
             tryLogCurrentException(log, "Cannot initialize DDL queue.");
             reset_state(false);
+            sleepForSeconds(5);
         }
     }
     while (!initialized && !stop_flag);
@@ -949,11 +956,13 @@ void DDLWorker::runMainThread()
                 LOG_ERROR(log, "Unexpected ZooKeeper error: {}", getCurrentExceptionMessage(true));
                 reset_state();
             }
+            sleepForSeconds(5);
         }
         catch (...)
         {
             tryLogCurrentException(log, "Unexpected error, will try to restart main thread:");
             reset_state();
+            sleepForSeconds(5);
         }
     }
 }
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 40789fc1a8a..b66af77930c 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -718,7 +718,7 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
     const auto * kind = create.is_dictionary ? "Dictionary" : "Table";
     const auto * kind_upper = create.is_dictionary ? "DICTIONARY" : "TABLE";
 
-    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind == ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind == ClientInfo::QueryKind::REPLICATED_LOG_QUERY && !internal)
     {
         if (create.uuid == UUIDHelpers::Nil)
             throw Exception("Table UUID is not specified in DDL log", ErrorCodes::LOGICAL_ERROR);
diff --git a/tests/integration/test_materialize_mysql_database/test.py b/tests/integration/test_materialize_mysql_database/test.py
index dbd6e894987..3cdc527d33d 100644
--- a/tests/integration/test_materialize_mysql_database/test.py
+++ b/tests/integration/test_materialize_mysql_database/test.py
@@ -14,7 +14,7 @@ DOCKER_COMPOSE_PATH = get_docker_compose_path()
 
 cluster = ClickHouseCluster(__file__)
 
-node_db_ordinary = cluster.add_instance('node1', user_configs=["configs/users.xml"], with_mysql=False, stay_alive=True)
+node_db_ordinary = cluster.add_instance('node1', user_configs=["configs/users.xml"], with_mysql=False, stay_alive=True, with_zookeeper=True)    #FIXME
 node_db_atomic = cluster.add_instance('node2', user_configs=["configs/users_db_atomic.xml"], with_mysql=False, stay_alive=True)
 
 
From 9ee5c1535ef282889f4a6c361fcb27c66dc95f08 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 25 Jan 2021 23:29:04 +0300
Subject: [PATCH 0166/1238] Allow to disable checksums on read

---
 src/Core/Settings.h                                    | 1 +
 src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 3 ++-
 src/Storages/MergeTree/MergeTreeIOSettings.h           | 2 ++
 src/Storages/MergeTree/MergeTreeReaderCompact.cpp      | 6 ++++++
 src/Storages/MergeTree/MergeTreeReaderStream.cpp       | 3 +++
 5 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index cc32417af09..11c10b6f5c6 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -139,6 +139,7 @@ class IColumn;
     \
     M(UInt64, min_bytes_to_use_direct_io, 0, "The minimum number of bytes for reading the data with O_DIRECT option during SELECT queries execution. 0 - disabled.", 0) \
     M(UInt64, min_bytes_to_use_mmap_io, 0, "The minimum number of bytes for reading the data with mmap option during SELECT queries execution. 0 - disabled.", 0) \
+    M(Bool, checksum_on_read, true, "Validate checksums on reading. It is enabled by default and should be always enabled in production. Please do not expect any benefits in disabling this setting. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network.", 0) \
     \
     M(Bool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
     M(Bool, force_primary_key, 0, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 6b2e3c5a8a4..c414e735c0a 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -547,7 +547,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
         .min_bytes_to_use_direct_io = settings.min_bytes_to_use_direct_io,
         .min_bytes_to_use_mmap_io = settings.min_bytes_to_use_mmap_io,
         .max_read_buffer_size = settings.max_read_buffer_size,
-        .save_marks_in_cache = true
+        .checksum_on_read = settings.checksum_on_read,
+        .save_marks_in_cache = true,
     };
 
     /// PREWHERE
diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h
index d82aa7dd7c2..f2469494792 100644
--- a/src/Storages/MergeTree/MergeTreeIOSettings.h
+++ b/src/Storages/MergeTree/MergeTreeIOSettings.h
@@ -16,6 +16,8 @@ struct MergeTreeReaderSettings
     bool save_marks_in_cache = false;
     /// Convert old-style nested (single arrays with same prefix, `n.a`, `n.b`...) to subcolumns of data type Nested.
     bool convert_nested_to_subcolumns = false;
+    /// Validate checksums on reading (should be always enabled in production).
+    bool checksum_on_read = true;
 };
 
 struct MergeTreeWriterSettings
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index 635c59cf19a..67268e8afd8 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -92,6 +92,9 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
             if (profile_callback_)
                 buffer->setProfileCallback(profile_callback_, clock_type_);
 
+            if (!settings.checksum_on_read)
+                buffer->disableChecksumming();
+
             cached_buffer = std::move(buffer);
             data_buffer = cached_buffer.get();
         }
@@ -106,6 +109,9 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
             if (profile_callback_)
                 buffer->setProfileCallback(profile_callback_, clock_type_);
 
+            if (!settings.checksum_on_read)
+                buffer->disableChecksumming();
+
             non_cached_buffer = std::move(buffer);
             data_buffer = non_cached_buffer.get();
         }
diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
index 1754fb201eb..08cb49445f0 100644
--- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
@@ -96,6 +96,9 @@ MergeTreeReaderStream::MergeTreeReaderStream(
         if (profile_callback)
             buffer->setProfileCallback(profile_callback, clock_type);
 
+        if (!settings.checksum_on_read)
+            buffer->disableChecksumming();
+
         cached_buffer = std::move(buffer);
         data_buffer = cached_buffer.get();
     }

From 8dfa9330287981eea28f57ad168c999e46954ba7 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 25 Jan 2021 23:48:10 +0300
Subject: [PATCH 0167/1238] Amend

---
 src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index c414e735c0a..b44e7197c12 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -547,8 +547,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
         .min_bytes_to_use_direct_io = settings.min_bytes_to_use_direct_io,
         .min_bytes_to_use_mmap_io = settings.min_bytes_to_use_mmap_io,
         .max_read_buffer_size = settings.max_read_buffer_size,
-        .checksum_on_read = settings.checksum_on_read,
         .save_marks_in_cache = true,
+        .checksum_on_read = settings.checksum_on_read,
     };
 
     /// PREWHERE

From c4b9c700c516132471586bff36fcac6f63d5de10 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Tue, 26 Jan 2021 02:09:17 +0300
Subject: [PATCH 0168/1238] Map type and map function.

Data type description template also added.
---
 .../template-data-type.md                     | 29 +++++++++
 docs/en/sql-reference/data-types/map.md       | 56 ++++++++++++++++
 .../functions/tuple-map-functions.md          | 64 ++++++++++++++++++-
 3 files changed, 148 insertions(+), 1 deletion(-)
 create mode 100644 docs/_description_templates/template-data-type.md
 create mode 100644 docs/en/sql-reference/data-types/map.md

diff --git a/docs/_description_templates/template-data-type.md b/docs/_description_templates/template-data-type.md
new file mode 100644
index 00000000000..edb6586ee7d
--- /dev/null
+++ b/docs/_description_templates/template-data-type.md
@@ -0,0 +1,29 @@
+---
+toc_priority: 
+toc_title: 
+---
+
+# data_type_name {#data_type-name}
+
+Description.
+
+**Parameters** (Optional)
+
+-   `x` — Description. [Type name](relative/path/to/type/dscr.md#type).
+-   `y` — Description. [Type name](relative/path/to/type/dscr.md#type).
+
+**Examples**
+
+```sql
+
+```
+
+## Additional Info {#additional-info} (Optional)
+
+The name of an additional section can be any, for example, **Usage**.
+
+**See Also** (Optional)
+
+-   [link](#)
+
+[Original article](https://clickhouse.tech/docs/en/data_types/<data-type-name>/) <!--hide-->
diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md
new file mode 100644
index 00000000000..5f1300896e8
--- /dev/null
+++ b/docs/en/sql-reference/data-types/map.md
@@ -0,0 +1,56 @@
+---
+toc_priority: 65
+toc_title: Map(key, value)
+---
+
+# Map(key, value) {#data_type-map}
+
+`Map(key, value)` data type stores `key:value` pairs in structures like JSON. 
+
+**Parameters** 
+-   `key` — Key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — Value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
+
+!!! warning "Warning"
+    Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
+
+To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax.
+
+**Example**
+
+Query:
+
+``` sql
+CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
+INSERT INTO table_map VALUES ({'key1':1, 'key2':100}), ({'key1':2,'key2':200}), ({'key1':3,'key2':300});
+SELECT a['key2'] FROM table_map;
+```
+Result:
+
+```text
+┌─arrayElement(a, 'key2')─┐
+│                     100 │
+│                     200 │
+│                     300 │
+└─────────────────────────┘
+```
+
+## Map() and Tuple() Types {#map-and-tuple}
+
+You can cast `Tuple()` as `Map()`:
+
+``` sql
+SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
+```
+
+``` text
+┌─map───────────────────────────┐
+│ {1:'Ready',2:'Steady',3:'Go'} │
+└───────────────────────────────┘
+```
+
+**See Also**
+
+-   [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
+
+[Original article](https://clickhouse.tech/docs/en/data_types/map/) <!--hide-->
diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md
index a46c36395b8..3de570e6dcc 100644
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@@ -5,6 +5,68 @@ toc_title: Working with maps
 
 # Functions for maps {#functions-for-working-with-tuple-maps}
 
+## map {#function-map}
+
+Arranges `key:value` pairs into a JSON data structure.
+
+**Syntax** 
+
+``` sql
+map(key1, value1[, key2, value2, ...])
+```
+
+**Parameters** 
+
+-   `key` — Key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — Value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
+
+**Returned value**
+
+-   JSON with `key:value` pairs.
+
+Type: [Map(key, value)](../../sql-reference/data-types/map.md).
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
+```
+
+Result:
+
+``` text
+┌─map('key1', number, 'key2', multiply(number, 2))─┐
+│ {'key1':0,'key2':0}                              │
+│ {'key1':1,'key2':2}                              │
+│ {'key1':2,'key2':4}                              │
+└──────────────────────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a;
+INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
+SELECT a['key2'] FROM table_map;
+```
+
+Result:
+
+``` text
+┌─arrayElement(a, 'key2')─┐
+│                       0 │
+│                       2 │
+│                       4 │
+└─────────────────────────┘
+```
+
+**See Also** 
+
+-   [Map(key, value)](../../sql-reference/data-types/map.md) data type
+
+
 ## mapAdd {#function-mapadd}
 
 Collect all the keys and sum corresponding values.
@@ -112,4 +174,4 @@ Result:
 └──────────────────────────────┴───────────────────────────────────┘
 ```
 
-[Original article](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) <!--hide-->
+[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide-->

From 9152a7a4be08bef001b3e44bb29bcd09dfc93b1f Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Sat, 23 Jan 2021 10:10:49 -0800
Subject: [PATCH 0169/1238] init - make toIPv6 parse IPv4 addresses

---
 src/Functions/FunctionsCoding.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h
index 7756f37d418..8757e9568fb 100644
--- a/src/Functions/FunctionsCoding.h
+++ b/src/Functions/FunctionsCoding.h
@@ -298,10 +298,19 @@ public:
                  out_offset += IPV6_BINARY_LENGTH, ++i)
             {
                 /// In case of failure, the function fills vec_res with zero bytes.
-                parseIPv6(reinterpret_cast<const char *>(&vec_src[src_offset]), reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
+                String result;
+
+                if (DB::parseIPv4(reinterpret_cast<const char *>(&vec_src[src_offset]), reinterpret_cast<unsigned char *>(&result)))
+                {
+                    result = std::string("::ffff:") + std::string(vec_src.raw_data());
+                }
+                else
+                {
+                    result = std::string(vec_res.raw_data());
+                }
+                parseIPv6(reinterpret_cast<const char *>(&result), reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
                 src_offset = offsets_src[i];
             }
-
             return col_res;
         }
         else

From 562d01e8d81b8855d318d5de874032dc04f07557 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Sun, 24 Jan 2021 22:01:51 -0800
Subject: [PATCH 0170/1238] fix code and rebase

---
 src/Functions/FunctionsCoding.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h
index 8757e9568fb..2e72518a63a 100644
--- a/src/Functions/FunctionsCoding.h
+++ b/src/Functions/FunctionsCoding.h
@@ -299,16 +299,17 @@ public:
             {
                 /// In case of failure, the function fills vec_res with zero bytes.
                 String result;
-
+                auto src = reinterpret_cast<const char *>(&vec_src[src_offset]);
+                auto res = reinterpret_cast<unsigned char *>(&vec_res[out_offset]);
                 if (DB::parseIPv4(reinterpret_cast<const char *>(&vec_src[src_offset]), reinterpret_cast<unsigned char *>(&result)))
                 {
-                    result = std::string("::ffff:") + std::string(vec_src.raw_data());
+                    auto ipv4_src = std::string("::ffff:") + std::string(src);
+                    parseIPv6(ipv4_src.c_str(), res);
                 }
                 else
                 {
-                    result = std::string(vec_res.raw_data());
+                    parseIPv6(src, res);
                 }
-                parseIPv6(reinterpret_cast<const char *>(&result), reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
                 src_offset = offsets_src[i];
             }
             return col_res;

From 7abedaeaa024c3a906774c4b4c4949dba087b4ac Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Sun, 24 Jan 2021 23:08:30 -0800
Subject: [PATCH 0171/1238] simplify logic flow

---
 src/Functions/FunctionsCoding.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h
index 2e72518a63a..e55822cb6b8 100644
--- a/src/Functions/FunctionsCoding.h
+++ b/src/Functions/FunctionsCoding.h
@@ -297,21 +297,21 @@ public:
                  out_offset < vec_res.size();
                  out_offset += IPV6_BINARY_LENGTH, ++i)
             {
+                auto src_string = std::string(reinterpret_cast<const char *>(&vec_src[src_offset]));
+                auto out = reinterpret_cast<unsigned char *>(&vec_res[out_offset]);
+                auto subnet_prefix = std::string("::ffff:");
+
+                /// If the source IP address is parsable as an IPv4 address, then transform it into a valid IPv6 address.
+                /// Keeping it simple by just prefixing `::ffff:` to the IPv4 address to represent it as a valid IPv6 address.
+                if (DB::parseIPv4(src_string.c_str(), out))
+                {
+                    src_string = subnet_prefix + src_string;
+                }
                 /// In case of failure, the function fills vec_res with zero bytes.
-                String result;
-                auto src = reinterpret_cast<const char *>(&vec_src[src_offset]);
-                auto res = reinterpret_cast<unsigned char *>(&vec_res[out_offset]);
-                if (DB::parseIPv4(reinterpret_cast<const char *>(&vec_src[src_offset]), reinterpret_cast<unsigned char *>(&result)))
-                {
-                    auto ipv4_src = std::string("::ffff:") + std::string(src);
-                    parseIPv6(ipv4_src.c_str(), res);
-                }
-                else
-                {
-                    parseIPv6(src, res);
-                }
+                parseIPv6(src_string.c_str(), out);
                 src_offset = offsets_src[i];
             }
+
             return col_res;
         }
         else

From a102c783287b3e49faeddc80ee9f5669dd8913a3 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Sun, 24 Jan 2021 23:23:30 -0800
Subject: [PATCH 0172/1238] tests for toIPv6 and IPv6StringToNum

---
 tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference | 2 ++
 tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql       | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference
index 5060b5253fe..12b309316aa 100644
--- a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference
+++ b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference
@@ -49,3 +49,5 @@ FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF is ipv6 string: 	1
 ::ffff:127.0.0.1 is ipv6 string:                        	1
 ::ffff:8.8.8.8 is ipv6 string:                          	1
 2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D is ipv6 string: 	1
+::ffff:127.0.0.1
+::ffff:127.0.0.1
diff --git a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql
index 099dc20762e..c070dcfe835 100644
--- a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql
+++ b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql
@@ -84,3 +84,7 @@ SELECT '::ffff:127.0.0.1 is ipv6 string:                        ', isIPv6String(
 SELECT '::ffff:8.8.8.8 is ipv6 string:                          ', isIPv6String('::ffff:8.8.8.8');
 SELECT '2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D is ipv6 string: ', isIPv6String('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D');
 
+-- IPV6 functions parse IPv4 addresses.
+
+SELECT toIPv6('127.0.0.1');
+SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0);

From fc8e22075e6b400a07d25bf165b554960d51f515 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Mon, 25 Jan 2021 15:55:56 -0800
Subject: [PATCH 0173/1238] refactor code to not use strings

---
 src/Functions/FunctionsCoding.h | 36 ++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h
index e55822cb6b8..f980c920e29 100644
--- a/src/Functions/FunctionsCoding.h
+++ b/src/Functions/FunctionsCoding.h
@@ -263,6 +263,14 @@ public:
     static constexpr auto name = "IPv6StringToNum";
     static FunctionPtr create(const Context &) { return std::make_shared<FunctionIPv6StringToNum>(); }
 
+    static inline UInt32 parseIPv4(const char * pos)
+    {
+        UInt32 result = 0;
+        DB::parseIPv4(pos, reinterpret_cast<unsigned char *>(&result));
+
+        return result;
+    }
+
     String getName() const override { return name; }
 
     size_t getNumberOfArguments() const override { return 1; }
@@ -270,8 +278,8 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         if (!isString(arguments[0]))
-            throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
-            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            throw Exception(
+                "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         return std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH);
     }
@@ -293,22 +301,26 @@ public:
             const ColumnString::Offsets & offsets_src = col_in->getOffsets();
             size_t src_offset = 0;
 
-            for (size_t out_offset = 0, i = 0;
-                 out_offset < vec_res.size();
-                 out_offset += IPV6_BINARY_LENGTH, ++i)
+            char subnet_prefix[] = "::ffff:";
+            char src_ipv4_buf[sizeof(subnet_prefix) + IPV4_MAX_TEXT_LENGTH + 1];
+            strcpy(src_ipv4_buf, subnet_prefix);
+
+            for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); out_offset += IPV6_BINARY_LENGTH, ++i)
             {
-                auto src_string = std::string(reinterpret_cast<const char *>(&vec_src[src_offset]));
-                auto out = reinterpret_cast<unsigned char *>(&vec_res[out_offset]);
-                auto subnet_prefix = std::string("::ffff:");
+                /// For both cases below: In case of failure, the function parseIPv6 fills vec_res with zero bytes.
 
                 /// If the source IP address is parsable as an IPv4 address, then transform it into a valid IPv6 address.
                 /// Keeping it simple by just prefixing `::ffff:` to the IPv4 address to represent it as a valid IPv6 address.
-                if (DB::parseIPv4(src_string.c_str(), out))
+                if (parseIPv4(reinterpret_cast<const char *>(&vec_src[src_offset])))
                 {
-                    src_string = subnet_prefix + src_string;
+                    std::strcat(src_ipv4_buf, reinterpret_cast<const char *>(&vec_src[src_offset]));
+                    parseIPv6(reinterpret_cast<const char *>(&src_ipv4_buf), reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
+                }
+                else
+                {
+                    parseIPv6(
+                        reinterpret_cast<const char *>(&vec_src[src_offset]), reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
                 }
-                /// In case of failure, the function fills vec_res with zero bytes.
-                parseIPv6(src_string.c_str(), out);
                 src_offset = offsets_src[i];
             }
 

From db8975c38f45be36c2d96ef4d086ddefa77c7cff Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Mon, 25 Jan 2021 17:38:39 -0800
Subject: [PATCH 0174/1238] remove redundant strcpy call

---
 src/Functions/FunctionsCoding.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h
index f980c920e29..38940be1c58 100644
--- a/src/Functions/FunctionsCoding.h
+++ b/src/Functions/FunctionsCoding.h
@@ -301,9 +301,7 @@ public:
             const ColumnString::Offsets & offsets_src = col_in->getOffsets();
             size_t src_offset = 0;
 
-            char subnet_prefix[] = "::ffff:";
-            char src_ipv4_buf[sizeof(subnet_prefix) + IPV4_MAX_TEXT_LENGTH + 1];
-            strcpy(src_ipv4_buf, subnet_prefix);
+            char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] = "::ffff:";
 
             for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); out_offset += IPV6_BINARY_LENGTH, ++i)
             {

From 31027dbaf74061813ebe45a166932084b9ddae10 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Mon, 25 Jan 2021 19:04:08 -0800
Subject: [PATCH 0175/1238] update docs

---
 .../functions/ip-address-functions.md         | 23 ++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md
index faf551601ac..1361eb65a56 100644
--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@@ -115,9 +115,20 @@ LIMIT 10
 
 ## IPv6StringToNum(s) {#ipv6stringtonums}
 
-The reverse function of IPv6NumToString. If the IPv6 address has an invalid format, it returns a string of null bytes.
+The reverse function of IPv6NumToString. If the IPv6 address has an invalid format, it returns a string of null bytes. 
+If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned.
 HEX can be uppercase or lowercase.
 
+``` sql
+SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0);
+```
+
+``` text
+┌─cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0)─┐
+│ ::ffff:127.0.0.1                            │
+└─────────────────────────────────────────────┘
+```
+
 ## IPv4ToIPv6(x) {#ipv4toipv6x}
 
 Takes a `UInt32` number. Interprets it as an IPv4 address in [big endian](https://en.wikipedia.org/wiki/Endianness). Returns a `FixedString(16)` value containing the IPv6 address in binary format. Examples:
@@ -214,6 +225,7 @@ SELECT
 ## toIPv6(string) {#toipv6string}
 
 An alias to `IPv6StringToNum()` that takes a string form of IPv6 address and returns value of [IPv6](../../sql-reference/data-types/domains/ipv6.md) type, which is binary equal to value returned by `IPv6StringToNum()`.
+If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned.
 
 ``` sql
 WITH
@@ -243,6 +255,15 @@ SELECT
 └───────────────────────────────────┴──────────────────────────────────┘
 ```
 
+``` sql
+SELECT toIPv6('127.0.0.1')
+```
+
+``` text
+┌─toIPv6('127.0.0.1')─┐
+│ ::ffff:127.0.0.1    │
+└─────────────────────┘
+```
 
 ## isIPv4String
 

From 66fe97d8bdaf271396a3bc9dfab493587c8a7183 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 25 Jan 2021 13:01:39 +0800
Subject: [PATCH 0176/1238] Per MergeTree table query limit

---
 src/Processors/Pipe.cpp                       |  2 +
 src/Processors/Pipe.h                         |  3 +
 src/Processors/QueryPlan/QueryIdHolder.cpp    | 15 +++++
 src/Processors/QueryPlan/QueryIdHolder.h      | 21 +++++++
 src/Storages/MergeTree/MergeTreeData.cpp      | 21 +++++++
 src/Storages/MergeTree/MergeTreeData.h        | 10 +++
 .../MergeTree/MergeTreeDataSelectExecutor.cpp | 48 ++++++++++----
 .../MergeTree/MergeTreeDataSelectExecutor.h   |  9 ++-
 src/Storages/MergeTree/MergeTreeSettings.h    |  2 +
 ...01666_merge_tree_max_query_limit.reference | 14 +++++
 .../01666_merge_tree_max_query_limit.sh       | 63 +++++++++++++++++++
 11 files changed, 193 insertions(+), 15 deletions(-)
 create mode 100644 src/Processors/QueryPlan/QueryIdHolder.cpp
 create mode 100644 src/Processors/QueryPlan/QueryIdHolder.h
 create mode 100644 tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference
 create mode 100755 tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh

diff --git a/src/Processors/Pipe.cpp b/src/Processors/Pipe.cpp
index e8943790e68..129bebf452a 100644
--- a/src/Processors/Pipe.cpp
+++ b/src/Processors/Pipe.cpp
@@ -105,6 +105,8 @@ Pipe::Holder & Pipe::Holder::operator=(Holder && rhs)
     for (auto & plan : rhs.query_plans)
         query_plans.emplace_back(std::move(plan));
 
+    query_id_holder = std::move(rhs.query_id_holder);
+
     return *this;
 }
 
diff --git a/src/Processors/Pipe.h b/src/Processors/Pipe.h
index 2d64de3e664..f21f4761977 100644
--- a/src/Processors/Pipe.h
+++ b/src/Processors/Pipe.h
@@ -1,6 +1,7 @@
 #pragma once
 #include <Processors/IProcessor.h>
 #include <Processors/Sources/SourceWithProgress.h>
+#include <Processors/QueryPlan/QueryIdHolder.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 
 namespace DB
@@ -108,6 +109,7 @@ public:
     /// This methods are from QueryPipeline. Needed to make conversion from pipeline to pipe possible.
     void addInterpreterContext(std::shared_ptr<Context> context) { holder.interpreter_context.emplace_back(std::move(context)); }
     void addStorageHolder(StoragePtr storage) { holder.storage_holders.emplace_back(std::move(storage)); }
+    void addQueryIdHolder(std::shared_ptr<QueryIdHolder> query_id_holder) { holder.query_id_holder = std::move(query_id_holder); }
     /// For queries with nested interpreters (i.e. StorageDistributed)
     void addQueryPlan(std::unique_ptr<QueryPlan> plan) { holder.query_plans.emplace_back(std::move(plan)); }
 
@@ -128,6 +130,7 @@ private:
         std::vector<StoragePtr> storage_holders;
         std::vector<TableLockHolder> table_locks;
         std::vector<std::unique_ptr<QueryPlan>> query_plans;
+        std::shared_ptr<QueryIdHolder> query_id_holder;
     };
 
     Holder holder;
diff --git a/src/Processors/QueryPlan/QueryIdHolder.cpp b/src/Processors/QueryPlan/QueryIdHolder.cpp
new file mode 100644
index 00000000000..87f6f892cd1
--- /dev/null
+++ b/src/Processors/QueryPlan/QueryIdHolder.cpp
@@ -0,0 +1,15 @@
+#include <Processors/QueryPlan/QueryIdHolder.h>
+#include <Storages/MergeTree/MergeTreeData.h>
+
+namespace DB
+{
+QueryIdHolder::QueryIdHolder(const String & query_id_, const MergeTreeData & data_) : query_id(query_id_), data(data_)
+{
+}
+
+QueryIdHolder::~QueryIdHolder()
+{
+    data.removeQueryId(query_id);
+}
+
+}
diff --git a/src/Processors/QueryPlan/QueryIdHolder.h b/src/Processors/QueryPlan/QueryIdHolder.h
new file mode 100644
index 00000000000..ed8f9ec1d6b
--- /dev/null
+++ b/src/Processors/QueryPlan/QueryIdHolder.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <string>
+
+namespace DB
+{
+class MergeTreeData;
+
+/// Holds the current query id and do something meaningful in destructor.
+/// Currently it's used for cleaning query id in the MergeTreeData query set.
+struct QueryIdHolder
+{
+    QueryIdHolder(const std::string & query_id_, const MergeTreeData & data_);
+
+    ~QueryIdHolder();
+
+    std::string query_id;
+    const MergeTreeData & data;
+};
+
+}
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 276ac10aeaf..701e05430fb 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -114,6 +114,7 @@ namespace ErrorCodes
     extern const int NOT_ENOUGH_SPACE;
     extern const int ALTER_OF_COLUMN_IS_FORBIDDEN;
     extern const int SUPPORT_IS_DISABLED;
+    extern const int TOO_MANY_SIMULTANEOUS_QUERIES;
 }
 
 
@@ -3988,4 +3989,24 @@ void MergeTreeData::setDataVolume(size_t bytes, size_t rows, size_t parts)
     total_active_size_rows.store(rows, std::memory_order_release);
     total_active_size_parts.store(parts, std::memory_order_release);
 }
+
+void MergeTreeData::insertQueryIdOrThrow(const String & query_id, size_t max_queries) const
+{
+    std::lock_guard lock(query_id_set_mutex);
+    if (query_id_set.find(query_id) != query_id_set.end())
+        return;
+    if (query_id_set.size() >= max_queries)
+        throw Exception(
+            ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries for table {}. Maximum is: {}", log_name, max_queries);
+    query_id_set.insert(query_id);
+}
+
+void MergeTreeData::removeQueryId(const String & query_id) const
+{
+    std::lock_guard lock(query_id_set_mutex);
+    if (query_id_set.find(query_id) == query_id_set.end())
+        LOG_WARNING(log, "We have query_id removed but it's not recorded. This is a bug");
+    else
+        query_id_set.erase(query_id);
+}
 }
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 9d021815888..425dcbfb316 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -702,6 +702,12 @@ public:
     /// section from config.xml.
     CompressionCodecPtr getCompressionCodecForPart(size_t part_size_compressed, const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t current_time) const;
 
+    /// Record current query id where querying the table. Throw if there are already `max_queries` queries accessing the same table.
+    void insertQueryIdOrThrow(const String & query_id, size_t max_queries) const;
+
+    /// Remove current query id after query finished.
+    void removeQueryId(const String & query_id) const;
+
     /// Limiting parallel sends per one table, used in DataPartsExchange
     std::atomic_uint current_table_sends {0};
 
@@ -958,6 +964,10 @@ private:
     std::atomic<size_t> total_active_size_bytes = 0;
     std::atomic<size_t> total_active_size_rows = 0;
     std::atomic<size_t> total_active_size_parts = 0;
+
+    // Record all query ids which access the table. It's guarded by `query_id_set_mutex` and is always mutable.
+    mutable std::set<String> query_id_set;
+    mutable std::mutex query_id_set_mutex;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 6b2e3c5a8a4..2dc88f08b30 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -33,6 +33,7 @@
 #include <Processors/QueryPlan/MergingSortedStep.h>
 #include <Processors/QueryPlan/UnionStep.h>
 #include <Processors/QueryPlan/MergingFinal.h>
+#include <Processors/QueryPlan/ReadNothingStep.h>
 
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeEnum.h>
@@ -707,8 +708,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
     if (parts_with_ranges.empty())
         return std::make_unique<QueryPlan>();
 
+    const auto data_settings = data.getSettings();
     auto max_partitions_to_read
-        = settings.max_partitions_to_read.changed ? settings.max_partitions_to_read : data.getSettings()->max_partitions_to_read;
+        = settings.max_partitions_to_read.changed ? settings.max_partitions_to_read : data_settings->max_partitions_to_read;
     if (max_partitions_to_read > 0)
     {
         std::set<String> partitions;
@@ -722,6 +724,18 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
                 max_partitions_to_read);
     }
 
+    String query_id;
+    if (data_settings->max_concurrent_queries > 0)
+    {
+        if (data_settings->min_marks_to_honor_max_concurrent_queries > 0
+            && sum_marks >= data_settings->min_marks_to_honor_max_concurrent_queries)
+        {
+            query_id = context.getCurrentQueryId();
+            if (!query_id.empty())
+                data.insertQueryIdOrThrow(query_id, data_settings->max_concurrent_queries);
+        }
+    }
+
     ProfileEvents::increment(ProfileEvents::SelectedParts, parts_with_ranges.size());
     ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges);
     ProfileEvents::increment(ProfileEvents::SelectedMarks, sum_marks);
@@ -758,7 +772,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
             virt_column_names,
             settings,
             reader_settings,
-            result_projection);
+            result_projection,
+            query_id);
     }
     else if ((settings.optimize_read_in_order || settings.optimize_aggregation_in_order) && query_info.input_order_info)
     {
@@ -781,7 +796,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
             virt_column_names,
             settings,
             reader_settings,
-            result_projection);
+            result_projection,
+            query_id);
     }
     else
     {
@@ -795,7 +811,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
             query_info,
             virt_column_names,
             settings,
-            reader_settings);
+            reader_settings,
+            query_id);
     }
 
     if (!plan)
@@ -895,7 +912,7 @@ size_t minMarksForConcurrentRead(
 
 }
 
-static QueryPlanPtr createPlanFromPipe(Pipe pipe, const std::string & description = "")
+static QueryPlanPtr createPlanFromPipe(Pipe pipe, const String & query_id, const MergeTreeData & data, const std::string & description = "")
 {
     auto plan = std::make_unique<QueryPlan>();
 
@@ -903,6 +920,10 @@ static QueryPlanPtr createPlanFromPipe(Pipe pipe, const std::string & descriptio
     if (!description.empty())
         storage_name += ' ' + description;
 
+    // Attach QueryIdHolder if needed
+    if (!query_id.empty())
+        pipe.addQueryIdHolder(std::make_shared<QueryIdHolder>(query_id, data));
+
     auto step = std::make_unique<ReadFromStorageStep>(std::move(pipe), storage_name);
     plan->addStep(std::move(step));
     return plan;
@@ -918,7 +939,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
     const SelectQueryInfo & query_info,
     const Names & virt_columns,
     const Settings & settings,
-    const MergeTreeReaderSettings & reader_settings) const
+    const MergeTreeReaderSettings & reader_settings,
+    const String & query_id) const
 {
     /// Count marks for each part.
     std::vector<size_t> sum_marks_in_parts(parts.size());
@@ -1003,7 +1025,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
             res.emplace_back(std::move(source));
         }
 
-        return createPlanFromPipe(Pipe::unitePipes(std::move(res)));
+        return createPlanFromPipe(Pipe::unitePipes(std::move(res)), query_id, data);
     }
     else
     {
@@ -1027,7 +1049,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
         if (pipe.numOutputPorts() > 1)
             pipe.addTransform(std::make_shared<ConcatProcessor>(pipe.getHeader(), pipe.numOutputPorts()));
 
-        return createPlanFromPipe(std::move(pipe));
+        return createPlanFromPipe(std::move(pipe), query_id, data);
     }
 }
 
@@ -1051,7 +1073,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
     const Names & virt_columns,
     const Settings & settings,
     const MergeTreeReaderSettings & reader_settings,
-    ActionsDAGPtr & out_projection) const
+    ActionsDAGPtr & out_projection,
+    const String & query_id) const
 {
     size_t sum_marks = 0;
     const InputOrderInfoPtr & input_order_info = query_info.input_order_info;
@@ -1242,7 +1265,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
             }
         }
 
-        auto plan = createPlanFromPipe(Pipe::unitePipes(std::move(pipes)), " with order");
+        auto plan = createPlanFromPipe(Pipe::unitePipes(std::move(pipes)), query_id, data, " with order");
 
         if (input_order_info->direction != 1)
         {
@@ -1310,7 +1333,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
     const Names & virt_columns,
     const Settings & settings,
     const MergeTreeReaderSettings & reader_settings,
-    ActionsDAGPtr & out_projection) const
+    ActionsDAGPtr & out_projection,
+    const String & query_id) const
 {
     const auto data_settings = data.getSettings();
     size_t sum_marks = 0;
@@ -1406,7 +1430,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
             if (!out_projection)
                 out_projection = createProjection(pipe.getHeader());
 
-            plan = createPlanFromPipe(std::move(pipe), "with final");
+            plan = createPlanFromPipe(std::move(pipe), query_id, data, "with final");
         }
 
         /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
index af4e3717749..c3b3020ebf5 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
@@ -58,7 +58,8 @@ private:
         const SelectQueryInfo & query_info,
         const Names & virt_columns,
         const Settings & settings,
-        const MergeTreeReaderSettings & reader_settings) const;
+        const MergeTreeReaderSettings & reader_settings,
+        const String & query_id) const;
 
     /// out_projection - save projection only with columns, requested to read
     QueryPlanPtr spreadMarkRangesAmongStreamsWithOrder(
@@ -73,7 +74,8 @@ private:
         const Names & virt_columns,
         const Settings & settings,
         const MergeTreeReaderSettings & reader_settings,
-        ActionsDAGPtr & out_projection) const;
+        ActionsDAGPtr & out_projection,
+        const String & query_id) const;
 
     QueryPlanPtr spreadMarkRangesAmongStreamsFinal(
         RangesInDataParts && parts,
@@ -86,7 +88,8 @@ private:
         const Names & virt_columns,
         const Settings & settings,
         const MergeTreeReaderSettings & reader_settings,
-        ActionsDAGPtr & out_projection) const;
+        ActionsDAGPtr & out_projection,
+        const String & query_id) const;
 
     /// Get the approximate value (bottom estimate - only by full marks) of the number of rows falling under the index.
     size_t getApproximateTotalRowsToRead(
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 7f23a1a42ab..713bfffde05 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -111,6 +111,8 @@ struct Settings;
     M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \
     M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \
     M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited. This setting is the default that can be overridden by the query-level setting with the same name.", 0) \
+    M(UInt64, max_concurrent_queries, 0, "Max number of concurrently executed queries related to the MergeTree table (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \
+    M(UInt64, min_marks_to_honor_max_concurrent_queries, 0, "Minimal number of marks to honor the MergeTree-level's max_concurrent_queries (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \
     \
     /** Obsolete settings. Kept for backward compatibility only. */ \
     M(UInt64, min_relative_delay_to_yield_leadership, 120, "Obsolete setting, does nothing.", 0) \
diff --git a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference
new file mode 100644
index 00000000000..25880a7d740
--- /dev/null
+++ b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference
@@ -0,0 +1,14 @@
+Spin up a long running query
+Check if another query with some marks to read is throttled
+yes
+Check if another query with less marks to read is passed
+0	100
+Modify min_marks_to_honor_max_concurrent_queries to 1
+Check if another query with less marks to read is throttled
+yes
+Modify max_concurrent_queries to 2
+Check if another query is passed
+0	100
+Modify max_concurrent_queries back to 1
+Check if another query with less marks to read is throttled
+yes
diff --git a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh
new file mode 100755
index 00000000000..0bf37673e91
--- /dev/null
+++ b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+${CLICKHOUSE_CLIENT} --multiline --multiquery --query "
+drop table if exists simple;
+
+create table simple (i int, j int) engine = MergeTree order by i
+settings index_granularity = 1, max_concurrent_queries = 1, min_marks_to_honor_max_concurrent_queries = 2;
+
+insert into simple select number, number + 100 from numbers(10);
+"
+
+echo "Spin up a long running query"
+${CLICKHOUSE_CLIENT} --query "select sleepEachRow(1) from simple settings max_block_size = 1 format Null" --query_id "long_running_query" &
+sleep 3
+
+# query which reads marks >= min_marks_to_honor_max_concurrent_queries is throttled
+echo "Check if another query with some marks to read is throttled"
+${CLICKHOUSE_CLIENT} --query "select * from simple" 2> /dev/null;
+CODE=$?
+[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1;
+echo "yes"
+
+# query which reads marks less than min_marks_to_honor_max_concurrent_queries is allowed
+echo "Check if another query with less marks to read is passed"
+${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0"
+
+# We can modify the settings to take effect for future queries
+echo "Modify min_marks_to_honor_max_concurrent_queries to 1"
+${CLICKHOUSE_CLIENT} --query "alter table simple modify setting min_marks_to_honor_max_concurrent_queries = 1"
+
+# Now smaller queries are also throttled
+echo "Check if another query with less marks to read is throttled"
+${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0" 2> /dev/null;
+CODE=$?
+[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1;
+echo "yes"
+
+echo "Modify max_concurrent_queries to 2"
+${CLICKHOUSE_CLIENT} --query "alter table simple modify setting max_concurrent_queries = 2"
+
+# Now more queries are accepted
+echo "Check if another query is passed"
+${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0"
+
+echo "Modify max_concurrent_queries back to 1"
+${CLICKHOUSE_CLIENT} --query "alter table simple modify setting max_concurrent_queries = 1"
+
+# Now queries are throttled again
+echo "Check if another query with less marks to read is throttled"
+${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0" 2> /dev/null;
+CODE=$?
+[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1;
+echo "yes"
+
+wait
+
+${CLICKHOUSE_CLIENT} --multiline --multiquery --query "
+drop table simple
+"

From 10cec45e53ebf4774ee299d339cf12fe91a17770 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 26 Jan 2021 10:47:04 +0300
Subject: [PATCH 0177/1238] Fix obvious deadlock

---
 src/Coordination/NuKeeperServer.cpp           | 21 +++++++++++---
 .../TestKeeperStorageDispatcher.cpp           | 28 +++++++++----------
 2 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index a3786342e05..c7f9012f287 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -46,7 +46,7 @@ void NuKeeperServer::startup()
     params.election_timeout_upper_bound_ = 400;
     params.reserved_log_items_ = 5000;
     params.snapshot_distance_ = 5000;
-    params.client_req_timeout_ = 3000;
+    params.client_req_timeout_ = 10000;
     params.return_method_ = nuraft::raft_params::blocking;
 
     raft_instance = launcher.init(
@@ -145,10 +145,23 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKe
 
     auto result = raft_instance->append_entries(entries);
     if (!result->get_accepted())
-        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot send requests to RAFT, mostly because we are not leader");
+        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot send requests to RAFT, mostly because we are not leader, code {}, message: '{}'", result->get_result_code(), result->get_result_str());
 
-    if (result->get_result_code() != nuraft::cmd_result_code::OK)
-        throw Exception(ErrorCodes::RAFT_ERROR, "Requests failed");
+    if (result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
+    {
+        TestKeeperStorage::ResponsesForSessions responses;
+        for (const auto & [session_id, request] : requests)
+        {
+            auto response = request->makeResponse();
+            response->xid = request->xid;
+            response->zxid = 0; /// FIXME what we can do with it?
+            response->error = Coordination::Error::ZOPERATIONTIMEOUT;
+            responses.push_back(DB::TestKeeperStorage::ResponseForSession{session_id, response});
+        }
+        return responses;
+    }
+    else if (result->get_result_code() != nuraft::cmd_result_code::OK)
+        throw Exception(ErrorCodes::RAFT_ERROR, "Requests result failed with code {} and message: '{}'", result->get_result_code(), result->get_result_str());
 
     return readZooKeeperResponses(result->get());
 }
diff --git a/src/Coordination/TestKeeperStorageDispatcher.cpp b/src/Coordination/TestKeeperStorageDispatcher.cpp
index 7c78ca0e79f..3aef5213adc 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.cpp
+++ b/src/Coordination/TestKeeperStorageDispatcher.cpp
@@ -14,30 +14,28 @@ namespace ErrorCodes
 void TestKeeperStorageDispatcher::processingThread()
 {
     setThreadName("TestKeeperSProc");
-    try
+    while (!shutdown)
     {
-        while (!shutdown)
+        TestKeeperStorage::RequestForSession request;
+
+        UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds());
+
+        if (requests_queue.tryPop(request, max_wait))
         {
-            TestKeeperStorage::RequestForSession request;
-
-            UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds());
-
-            if (requests_queue.tryPop(request, max_wait))
+            if (shutdown)
+                break;
+            try
             {
-                if (shutdown)
-                    break;
-
                 auto responses = server->putRequests({request});
                 for (const auto & response_for_session : responses)
                     setResponse(response_for_session.session_id, response_for_session.response);
             }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+            }
         }
     }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-        finalize();
-    }
 }
 
 void TestKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)

From ddeb008bbb6ee7209fd8c862fb1dd00672001ef7 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 26 Jan 2021 10:52:34 +0300
Subject: [PATCH 0178/1238] Replace ulong with size_t

---
 src/Coordination/SummingStateMachine.cpp | 2 +-
 src/Coordination/SummingStateMachine.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Coordination/SummingStateMachine.cpp b/src/Coordination/SummingStateMachine.cpp
index bf2a5bb818f..59649850123 100644
--- a/src/Coordination/SummingStateMachine.cpp
+++ b/src/Coordination/SummingStateMachine.cpp
@@ -110,7 +110,7 @@ void SummingStateMachine::save_logical_snp_obj(
 int SummingStateMachine::read_logical_snp_obj(
     nuraft::snapshot & s,
     void* & /*user_snp_ctx*/,
-    ulong obj_id,
+    size_t obj_id,
     nuraft::ptr<nuraft::buffer> & data_out,
     bool & is_last_obj)
 {
diff --git a/src/Coordination/SummingStateMachine.h b/src/Coordination/SummingStateMachine.h
index 20d6258eb0b..9aca02c6bdc 100644
--- a/src/Coordination/SummingStateMachine.h
+++ b/src/Coordination/SummingStateMachine.h
@@ -41,7 +41,7 @@ public:
     int read_logical_snp_obj(
         nuraft::snapshot & s,
         void* & user_snp_ctx,
-        ulong obj_id,
+        size_t obj_id,
         nuraft::ptr<nuraft::buffer> & data_out,
         bool & is_last_obj) override;
 

From 71dca6dc006f1042156ec4b6799da9e4dbc52e06 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 26 Jan 2021 11:17:19 +0300
Subject: [PATCH 0179/1238] Tidy fixes

---
 src/Coordination/LoggerWrapper.h                | 17 ++++++++++-------
 src/Coordination/NuKeeperServer.cpp             |  2 +-
 src/Coordination/NuKeeperStateMachine.cpp       |  7 ++++---
 src/Coordination/NuKeeperStateMachine.h         |  4 ++--
 src/Coordination/SummingStateMachine.cpp        |  3 ++-
 .../TestKeeperStorageSerializer.cpp             |  4 ++--
 src/Coordination/TestKeeperStorageSerializer.h  |  4 ++--
 src/Coordination/tests/gtest_for_build.cpp      |  4 ++--
 src/Interpreters/Context.cpp                    |  2 +-
 9 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/src/Coordination/LoggerWrapper.h b/src/Coordination/LoggerWrapper.h
index 5895457441a..00d4c6544a5 100644
--- a/src/Coordination/LoggerWrapper.h
+++ b/src/Coordination/LoggerWrapper.h
@@ -11,33 +11,36 @@ class LoggerWrapper : public nuraft::logger
 public:
     LoggerWrapper(const std::string & name)
         : log(&Poco::Logger::get(name))
+        , level(4)
     {
-        set_level(4);
+        log->setLevel(level);
     }
 
     void put_details(
-        int level,
+        int level_,
         const char * /* source_file */,
         const char * /* func_name */,
         size_t /* line_number */,
         const std::string & msg) override
     {
-        LOG_IMPL(log, static_cast<DB::LogsLevel>(level), static_cast<Poco::Message::Priority>(level), msg);
+        LOG_IMPL(log, static_cast<DB::LogsLevel>(level_), static_cast<Poco::Message::Priority>(level_), msg);
     }
 
-    void set_level(int level) override
+    void set_level(int level_) override
     {
-        level = std::min(6, std::max(1, level));
-        log->setLevel(level);
+        level_ = std::min(6, std::max(1, level_));
+        log->setLevel(level_);
+        level = level_;
     }
 
     int get_level() override
     {
-        return log->getLevel();
+        return level;
     }
 
 private:
     Poco::Logger * log;
+    std::atomic<int> level;
 };
 
 }
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index c7f9012f287..5b5aeb206c4 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -137,7 +137,7 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(n
 TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKeeperStorage::RequestsForSessions & requests)
 {
     std::vector<nuraft::ptr<nuraft::buffer>> entries;
-    for (auto & [session_id, request] : requests)
+    for (const auto & [session_id, request] : requests)
     {
         ops_mapping[session_id][request->xid] = request->makeResponse();
         entries.push_back(getZooKeeperLogEntry(session_id, request));
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 13c0f92e604..52c82f44784 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -8,6 +8,8 @@
 namespace DB
 {
 
+static constexpr int MAX_SNAPSHOTS = 3;
+
 TestKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
 {
     ReadBufferFromNuraftBuffer buffer(data);
@@ -112,7 +114,7 @@ NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::createSnapshotInt
     return std::make_shared<NuKeeperStateMachine::StorageSnapshot>(ss, storage);
 }
 
-NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::readSnapshot(nuraft::snapshot & s, nuraft::buffer & in) const
+NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::readSnapshot(nuraft::snapshot & s, nuraft::buffer & in)
 {
     nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
     nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
@@ -125,7 +127,7 @@ NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::readSnapshot(nura
 }
 
 
-void NuKeeperStateMachine::writeSnapshot(const NuKeeperStateMachine::StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out) const
+void NuKeeperStateMachine::writeSnapshot(const NuKeeperStateMachine::StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out)
 {
     TestKeeperStorageSerializer serializer;
 
@@ -143,7 +145,6 @@ void NuKeeperStateMachine::create_snapshot(
     {
         std::lock_guard<std::mutex> lock(snapshots_lock);
         snapshots[s.get_last_log_idx()] = snapshot;
-        const int MAX_SNAPSHOTS = 3;
         int num = snapshots.size();
         auto entry = snapshots.begin();
 
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index 4e5e8406039..a120e3f1cf6 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -63,9 +63,9 @@ private:
 
     StorageSnapshotPtr createSnapshotInternal(nuraft::snapshot & s);
 
-    StorageSnapshotPtr readSnapshot(nuraft::snapshot & s, nuraft::buffer & in) const;
+    static StorageSnapshotPtr readSnapshot(nuraft::snapshot & s, nuraft::buffer & in);
 
-    void writeSnapshot(const StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out) const;
+    static void writeSnapshot(const StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out);
 
     TestKeeperStorage storage;
     /// Mutex for snapshots
diff --git a/src/Coordination/SummingStateMachine.cpp b/src/Coordination/SummingStateMachine.cpp
index 59649850123..f9a3f4f9de2 100644
--- a/src/Coordination/SummingStateMachine.cpp
+++ b/src/Coordination/SummingStateMachine.cpp
@@ -4,6 +4,8 @@
 namespace DB
 {
 
+static constexpr int MAX_SNAPSHOTS = 3;
+
 static int64_t deserializeValue(nuraft::buffer & buffer)
 {
     nuraft::buffer_serializer bs(buffer);
@@ -68,7 +70,6 @@ void SummingStateMachine::createSnapshotInternal(nuraft::snapshot & s)
     snapshots[s.get_last_log_idx()] = ctx;
 
     // Maintain last 3 snapshots only.
-    const int MAX_SNAPSHOTS = 3;
     int num = snapshots.size();
     auto entry = snapshots.begin();
 
diff --git a/src/Coordination/TestKeeperStorageSerializer.cpp b/src/Coordination/TestKeeperStorageSerializer.cpp
index cb3a2643f68..f6116d29104 100644
--- a/src/Coordination/TestKeeperStorageSerializer.cpp
+++ b/src/Coordination/TestKeeperStorageSerializer.cpp
@@ -29,7 +29,7 @@ namespace
     }
 }
 
-void TestKeeperStorageSerializer::serialize(const TestKeeperStorage & storage, WriteBuffer & out) const
+void TestKeeperStorageSerializer::serialize(const TestKeeperStorage & storage, WriteBuffer & out)
 {
     Coordination::write(storage.zxid, out);
     Coordination::write(storage.session_id_counter, out);
@@ -49,7 +49,7 @@ void TestKeeperStorageSerializer::serialize(const TestKeeperStorage & storage, W
     }
 }
 
-void TestKeeperStorageSerializer::deserialize(TestKeeperStorage & storage, ReadBuffer & in) const
+void TestKeeperStorageSerializer::deserialize(TestKeeperStorage & storage, ReadBuffer & in)
 {
     int64_t session_id_counter, zxid;
     Coordination::read(zxid, in);
diff --git a/src/Coordination/TestKeeperStorageSerializer.h b/src/Coordination/TestKeeperStorageSerializer.h
index 5a6a0cea0a5..a3909c24694 100644
--- a/src/Coordination/TestKeeperStorageSerializer.h
+++ b/src/Coordination/TestKeeperStorageSerializer.h
@@ -9,9 +9,9 @@ namespace DB
 class TestKeeperStorageSerializer
 {
 public:
-    void serialize(const TestKeeperStorage & storage, WriteBuffer & out) const;
+    static void serialize(const TestKeeperStorage & storage, WriteBuffer & out);
 
-    void deserialize(TestKeeperStorage & storage, ReadBuffer & in) const;
+    static void deserialize(TestKeeperStorage & storage, ReadBuffer & in);
 };
 
 }
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index d74eaafba27..b0fcec7e10d 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -379,7 +379,7 @@ TEST(CoordinationTest, TestNuKeeperRaft)
     EXPECT_TRUE(ret_leader->get_accepted()) << "failed to replicate create entry:" << ret_leader->get_result_code();
     EXPECT_EQ(ret_leader->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate create entry:" << ret_leader->get_result_code();
 
-    auto result = ret_leader.get();
+    auto * result = ret_leader.get();
 
     auto responses = getZooKeeperResponses(result->get(), create_request);
 
@@ -418,7 +418,7 @@ TEST(CoordinationTest, TestNuKeeperRaft)
     EXPECT_TRUE(ret_leader_get->get_accepted()) << "failed to replicate create entry: " << ret_leader_get->get_result_code();
     EXPECT_EQ(ret_leader_get->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate create entry: " << ret_leader_get->get_result_code();
 
-    auto result_get = ret_leader_get.get();
+    auto * result_get = ret_leader_get.get();
 
     auto get_responses = getZooKeeperResponses(result_get->get(), get_request);
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index ee5be5f6edb..0b381cf3fae 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1575,7 +1575,7 @@ void Context::initializeTestKeeperStorageDispatcher() const
     if (shared->test_keeper_storage_dispatcher)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize TestKeeper multiple times");
 
-    auto & config = getConfigRef();
+    const auto & config = getConfigRef();
     if (config.has("test_keeper_server"))
     {
         shared->test_keeper_storage_dispatcher = std::make_shared<TestKeeperStorageDispatcher>();

From 61d006cbab6609c2cbde732546d05ee98980f3c2 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 26 Jan 2021 11:18:00 +0300
Subject: [PATCH 0180/1238] Fix typo

---
 src/Server/TestKeeperTCPHandler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp
index 3e88d543112..81eaee3382c 100644
--- a/src/Server/TestKeeperTCPHandler.cpp
+++ b/src/Server/TestKeeperTCPHandler.cpp
@@ -425,7 +425,7 @@ std::pair<Coordination::OpNum, Coordination::XID> TestKeeperTCPHandler::receiveR
     request->readImpl(*in);
 
     if (!test_keeper_storage_dispatcher->putRequest(request, session_id))
-        throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Sesssion {} already disconnected", session_id);
+        throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Session {} already disconnected", session_id);
     return std::make_pair(opnum, xid);
 }
 

From a65430fcee7f4e0f25bd91a3f554f78963e63bf8 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 26 Jan 2021 11:33:16 +0300
Subject: [PATCH 0181/1238] Trying to fix fast test

---
 contrib/nuraft-cmake/CMakeLists.txt | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/contrib/nuraft-cmake/CMakeLists.txt b/contrib/nuraft-cmake/CMakeLists.txt
index e5bb7f7d11b..83137fe73bf 100644
--- a/contrib/nuraft-cmake/CMakeLists.txt
+++ b/contrib/nuraft-cmake/CMakeLists.txt
@@ -30,7 +30,12 @@ set(SRCS
 
 add_library(nuraft ${SRCS})
 
-target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1)
+
+if (NOT OPENSSL_SSL_LIBRARY OR NOT OPENSSL_CRYPTO_LIBRARY)
+    target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1 SSL_LIBRARY_NOT_FOUND=1)
+else()
+    target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1)
+endif()
 
 target_include_directories (nuraft SYSTEM PRIVATE ${LIBRARY_DIR}/include/libnuraft)
 # for some reason include "asio.h" directly without "boost/" prefix.

From 45192a2ef2ec24a3dd2d7c34a68685e4378d0f21 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 26 Jan 2021 11:46:05 +0300
Subject: [PATCH 0182/1238] Fix epoll events in boost asio for msan

---
 contrib/boost | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/boost b/contrib/boost
index 8e259cd2a6b..b2368f43f37 160000
--- a/contrib/boost
+++ b/contrib/boost
@@ -1 +1 @@
-Subproject commit 8e259cd2a6b60d75dd17e73432f11bb7b9351bb1
+Subproject commit b2368f43f37c4a592b17b1e9a474b93749c47319

From 29a2ef3089c2ada0398341f7e080a0b0dd5b63ec Mon Sep 17 00:00:00 2001
From: kreuzerkrieg <kreuzerkrieg@gmail.com>
Date: Sat, 23 Jan 2021 17:20:15 +0200
Subject: [PATCH 0183/1238] Add IStoragePolicy interface

---
 src/Disks/IStoragePolicy.h                    | 62 +++++++++++++++++++
 src/Disks/StoragePolicy.cpp                   | 24 +++++--
 src/Disks/StoragePolicy.h                     | 43 +++++++------
 src/Interpreters/Aggregator.cpp               |  1 -
 src/Interpreters/Context.h                    |  4 +-
 src/Interpreters/SortedBlocksWriter.cpp       |  2 +-
 .../Transforms/MergeSortingTransform.cpp      |  2 +-
 src/Server/HTTPHandler.cpp                    |  2 +-
 src/Storages/IStorage.h                       |  4 +-
 .../MergeTree/MergeTreeDataMergerMutator.cpp  |  1 -
 src/Storages/StorageDistributed.cpp           |  1 -
 src/Storages/StorageMergeTree.cpp             |  1 -
 src/Storages/StorageReplicatedMergeTree.cpp   |  1 -
 src/Storages/System/StorageSystemTables.cpp   |  2 +-
 14 files changed, 109 insertions(+), 41 deletions(-)
 create mode 100644 src/Disks/IStoragePolicy.h

diff --git a/src/Disks/IStoragePolicy.h b/src/Disks/IStoragePolicy.h
new file mode 100644
index 00000000000..a41ea87c328
--- /dev/null
+++ b/src/Disks/IStoragePolicy.h
@@ -0,0 +1,62 @@
+#pragma once
+#include <memory>
+#include <vector>
+#include <common/types.h>
+
+namespace DB
+{
+class IStoragePolicy;
+using StoragePolicyPtr = std::shared_ptr<const IStoragePolicy>;
+class IVolume;
+using VolumePtr = std::shared_ptr<IVolume>;
+using Volumes = std::vector<VolumePtr>;
+class IDisk;
+using DiskPtr = std::shared_ptr<IDisk>;
+using Disks = std::vector<DiskPtr>;
+class IReservation;
+using ReservationPtr = std::unique_ptr<IReservation>;
+using Reservations = std::vector<ReservationPtr>;
+
+using String = std::string;
+
+class IStoragePolicy
+{
+public:
+    virtual ~IStoragePolicy() = default;
+    virtual const String & getName() const = 0;
+    virtual const Volumes & getVolumes() const = 0;
+    /// Returns number [0., 1.] -- fraction of free space on disk
+    /// which should be kept with help of background moves
+    virtual double getMoveFactor() const = 0;
+    virtual bool isDefaultPolicy() const = 0;
+    /// Returns disks ordered by volumes priority
+    virtual Disks getDisks() const = 0;
+    /// Returns any disk
+    /// Used when it's not important, for example for
+    /// mutations files
+    virtual DiskPtr getAnyDisk() const = 0;
+    virtual DiskPtr getDiskByName(const String & disk_name) const = 0;
+    /// Get free space from most free disk
+    virtual UInt64 getMaxUnreservedFreeSpace() const = 0;
+    /// Reserves space on any volume with index > min_volume_index or returns nullptr
+    virtual ReservationPtr reserve(UInt64 bytes, size_t min_volume_index) const = 0;
+    /// Returns valid reservation or nullptr
+    virtual ReservationPtr reserve(UInt64 bytes) const = 0;
+    /// Reserves space on any volume or throws
+    virtual ReservationPtr reserveAndCheck(UInt64 bytes) const = 0;
+    /// Reserves 0 bytes on disk with max available space
+    /// Do not use this function when it is possible to predict size.
+    virtual ReservationPtr makeEmptyReservationOnLargestDisk() const = 0;
+    /// Get volume by index.
+    virtual VolumePtr getVolume(size_t index) const = 0;
+    virtual VolumePtr getVolumeByName(const String & volume_name) const = 0;
+    /// Checks if storage policy can be replaced by another one.
+    virtual void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const = 0;
+    /// Find volume index, which contains disk
+    virtual size_t getVolumeIndexByDisk(const DiskPtr & disk_ptr) const = 0;
+    /// Check if we have any volume with stopped merges
+    virtual bool hasAnyVolumeWithDisabledMerges() const = 0;
+    virtual bool containsVolume(const String & volume_name) const = 0;
+};
+
+}
diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp
index e3a937cae55..a1345879c83 100644
--- a/src/Disks/StoragePolicy.cpp
+++ b/src/Disks/StoragePolicy.cpp
@@ -93,17 +93,17 @@ StoragePolicy::StoragePolicy(String name_, Volumes volumes_, double move_factor_
 }
 
 
-StoragePolicy::StoragePolicy(const StoragePolicy & storage_policy,
+StoragePolicy::StoragePolicy(StoragePolicyPtr storage_policy,
         const Poco::Util::AbstractConfiguration & config,
         const String & config_prefix,
         DiskSelectorPtr disks)
-    : StoragePolicy(storage_policy.getName(), config, config_prefix, disks)
+    : StoragePolicy(storage_policy->getName(), config, config_prefix, disks)
 {
     for (auto & volume : volumes)
     {
-        if (storage_policy.volume_index_by_volume_name.count(volume->getName()) > 0)
+        if (storage_policy->containsVolume(volume->getName()))
         {
-            auto old_volume = storage_policy.getVolumeByName(volume->getName());
+            auto old_volume = storage_policy->getVolumeByName(volume->getName());
             try
             {
                 auto new_volume = updateVolumeFromConfig(old_volume, config, config_prefix + ".volumes." + volume->getName(), disks);
@@ -112,7 +112,7 @@ StoragePolicy::StoragePolicy(const StoragePolicy & storage_policy,
             catch (Exception & e)
             {
                 /// Default policies are allowed to be missed in configuration.
-                if (e.code() != ErrorCodes::NO_ELEMENTS_IN_CONFIG || storage_policy.getName() != DEFAULT_STORAGE_POLICY_NAME)
+                if (e.code() != ErrorCodes::NO_ELEMENTS_IN_CONFIG || storage_policy->getName() != DEFAULT_STORAGE_POLICY_NAME)
                     throw;
 
                 Poco::Util::AbstractConfiguration::Keys keys;
@@ -331,6 +331,11 @@ bool StoragePolicy::hasAnyVolumeWithDisabledMerges() const
     return false;
 }
 
+bool StoragePolicy::containsVolume(const String & volume_name) const
+{
+    return volume_index_by_volume_name.contains(volume_name);
+}
+
 StoragePolicySelector::StoragePolicySelector(
     const Poco::Util::AbstractConfiguration & config,
     const String & config_prefix,
@@ -345,6 +350,13 @@ StoragePolicySelector::StoragePolicySelector(
             throw Exception(
                 "Storage policy name can contain only alphanumeric and '_' (" + backQuote(name) + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
 
+        /*
+         * A customization point for StoragePolicy, here one can add his own policy, for example, based on policy's name
+         * if (name == "MyCustomPolicy")
+         *      policies.emplace(name, std::make_shared<CustomPolicy>(name, config, config_prefix + "." + name, disks));
+         *  else
+         */
+
         policies.emplace(name, std::make_shared<StoragePolicy>(name, config, config_prefix + "." + name, disks));
         LOG_INFO(&Poco::Logger::get("StoragePolicySelector"), "Storage policy {} loaded", backQuote(name));
     }
@@ -374,7 +386,7 @@ StoragePolicySelectorPtr StoragePolicySelector::updateFromConfig(const Poco::Uti
     /// Second pass, load.
     for (const auto & [name, policy] : policies)
     {
-        result->policies[name] = std::make_shared<StoragePolicy>(*policy, config, config_prefix + "." + name, disks);
+        result->policies[name] = std::make_shared<StoragePolicy>(policy, config, config_prefix + "." + name, disks);
     }
 
     return result;
diff --git a/src/Disks/StoragePolicy.h b/src/Disks/StoragePolicy.h
index 9135c27d1c0..6676ab19043 100644
--- a/src/Disks/StoragePolicy.h
+++ b/src/Disks/StoragePolicy.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <Disks/IStoragePolicy.h>
 #include <Disks/DiskSelector.h>
 #include <Disks/IDisk.h>
 #include <Disks/IVolume.h>
@@ -23,14 +24,11 @@
 namespace DB
 {
 
-class StoragePolicy;
-using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
-
 /**
  * Contains all information about volumes configuration for Storage.
  * Can determine appropriate Volume and Disk for each reservation.
  */
-class StoragePolicy
+class StoragePolicy : public IStoragePolicy
 {
 public:
     StoragePolicy(String name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks);
@@ -38,62 +36,63 @@ public:
     StoragePolicy(String name_, Volumes volumes_, double move_factor_);
 
     StoragePolicy(
-        const StoragePolicy & storage_policy,
+        StoragePolicyPtr storage_policy,
         const Poco::Util::AbstractConfiguration & config,
         const String & config_prefix,
         DiskSelectorPtr disks
     );
 
-    bool isDefaultPolicy() const;
+    bool isDefaultPolicy() const override;
 
     /// Returns disks ordered by volumes priority
-    Disks getDisks() const;
+    Disks getDisks() const override;
 
     /// Returns any disk
     /// Used when it's not important, for example for
     /// mutations files
-    DiskPtr getAnyDisk() const;
+    DiskPtr getAnyDisk() const override;
 
-    DiskPtr getDiskByName(const String & disk_name) const;
+    DiskPtr getDiskByName(const String & disk_name) const override;
 
     /// Get free space from most free disk
-    UInt64 getMaxUnreservedFreeSpace() const;
+    UInt64 getMaxUnreservedFreeSpace() const override;
 
-    const String & getName() const { return name; }
+    const String & getName() const override{ return name; }
 
     /// Returns valid reservation or nullptr
-    ReservationPtr reserve(UInt64 bytes) const;
+    ReservationPtr reserve(UInt64 bytes) const override;
 
     /// Reserves space on any volume or throws
-    ReservationPtr reserveAndCheck(UInt64 bytes) const;
+    ReservationPtr reserveAndCheck(UInt64 bytes) const override;
 
     /// Reserves space on any volume with index > min_volume_index or returns nullptr
-    ReservationPtr reserve(UInt64 bytes, size_t min_volume_index) const;
+    ReservationPtr reserve(UInt64 bytes, size_t min_volume_index) const override;
 
     /// Find volume index, which contains disk
-    size_t getVolumeIndexByDisk(const DiskPtr & disk_ptr) const;
+    size_t getVolumeIndexByDisk(const DiskPtr & disk_ptr) const override;
 
     /// Reserves 0 bytes on disk with max available space
     /// Do not use this function when it is possible to predict size.
-    ReservationPtr makeEmptyReservationOnLargestDisk() const;
+    ReservationPtr makeEmptyReservationOnLargestDisk() const override;
 
-    const Volumes & getVolumes() const { return volumes; }
+    const Volumes & getVolumes() const  override{ return volumes; }
 
     /// Returns number [0., 1.] -- fraction of free space on disk
     /// which should be kept with help of background moves
-    double getMoveFactor() const { return move_factor; }
+    double getMoveFactor() const  override{ return move_factor; }
 
     /// Get volume by index.
-    VolumePtr getVolume(size_t index) const;
+    VolumePtr getVolume(size_t index) const override;
 
-    VolumePtr getVolumeByName(const String & volume_name) const;
+    VolumePtr getVolumeByName(const String & volume_name) const override;
 
     /// Checks if storage policy can be replaced by another one.
-    void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const;
+    void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const override;
 
     /// Check if we have any volume with stopped merges
-    bool hasAnyVolumeWithDisabledMerges() const;
+    bool hasAnyVolumeWithDisabledMerges() const override;
 
+    bool containsVolume(const String & volume_name) const override;
 private:
     Volumes volumes;
     const String name;
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index d83fef72882..8040091256c 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -19,7 +19,6 @@
 #include <Common/assert_cast.h>
 #include <AggregateFunctions/AggregateFunctionArray.h>
 #include <AggregateFunctions/AggregateFunctionState.h>
-#include <Disks/StoragePolicy.h>
 #include <IO/Operators.h>
 
 
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 8e15d0a4fed..e460fc732f9 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -102,8 +102,8 @@ using DiskPtr = std::shared_ptr<IDisk>;
 class DiskSelector;
 using DiskSelectorPtr = std::shared_ptr<const DiskSelector>;
 using DisksMap = std::map<String, DiskPtr>;
-class StoragePolicy;
-using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
+class IStoragePolicy;
+using StoragePolicyPtr = std::shared_ptr<const IStoragePolicy>;
 using StoragePoliciesMap = std::map<String, StoragePolicyPtr>;
 class StoragePolicySelector;
 using StoragePolicySelectorPtr = std::shared_ptr<const StoragePolicySelector>;
diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp
index 0dba09bc80f..f28bd53bd94 100644
--- a/src/Interpreters/SortedBlocksWriter.cpp
+++ b/src/Interpreters/SortedBlocksWriter.cpp
@@ -3,7 +3,7 @@
 #include <DataStreams/MergingSortedBlockInputStream.h>
 #include <DataStreams/OneBlockInputStream.h>
 #include <DataStreams/TemporaryFileStream.h>
-#include <Disks/StoragePolicy.h>
+#include <Disks/IVolume.h>
 
 namespace DB
 {
diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp
index ce6d0ad1f6c..1806693db3a 100644
--- a/src/Processors/Transforms/MergeSortingTransform.cpp
+++ b/src/Processors/Transforms/MergeSortingTransform.cpp
@@ -8,7 +8,7 @@
 #include <Compression/CompressedWriteBuffer.h>
 #include <DataStreams/NativeBlockInputStream.h>
 #include <DataStreams/NativeBlockOutputStream.h>
-#include <Disks/StoragePolicy.h>
+#include <Disks/IVolume.h>
 
 
 namespace ProfileEvents
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index 5006a817b5b..e161b5752ae 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -19,7 +19,7 @@
 #include <common/getFQDNOrHostName.h>
 #include <Common/setThreadName.h>
 #include <Common/SettingsChanges.h>
-#include <Disks/StoragePolicy.h>
+#include <Disks/IVolume.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Compression/CompressedWriteBuffer.h>
 #include <IO/ReadBufferFromIStream.h>
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 031b960fac1..1c0149ac261 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -50,8 +50,8 @@ class Pipe;
 class QueryPlan;
 using QueryPlanPtr = std::unique_ptr<QueryPlan>;
 
-class StoragePolicy;
-using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
+class IStoragePolicy;
+using StoragePolicyPtr = std::shared_ptr<const IStoragePolicy>;
 
 struct StreamLocalLimits;
 class EnabledQuota;
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index f999aa67bbe..791c53633e9 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -3,7 +3,6 @@
 #include <Storages/MergeTree/MergeTreeSequentialSource.h>
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
 #include <Storages/MergeTree/MergedColumnOnlyOutputStream.h>
-#include <Disks/StoragePolicy.h>
 #include <Storages/MergeTree/SimpleMergeSelector.h>
 #include <Storages/MergeTree/AllMergeSelector.h>
 #include <Storages/MergeTree/TTLMergeSelector.h>
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index afd7d6b876e..5227cd8a33e 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -1,7 +1,6 @@
 #include <Storages/StorageDistributed.h>
 
 #include <Databases/IDatabase.h>
-#include <Disks/StoragePolicy.h>
 #include <Disks/IDisk.h>
 
 #include <DataTypes/DataTypeFactory.h>
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 070e6eb0483..83596b5b19d 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -22,7 +22,6 @@
 #include <Storages/MergeTree/MergeTreeBlockOutputStream.h>
 #include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/PartitionPruner.h>
-#include <Disks/StoragePolicy.h>
 #include <Storages/MergeTree/MergeList.h>
 #include <Storages/MergeTree/checkDataPart.h>
 #include <Processors/Pipe.h>
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 70e90e9706a..2244b5c3ae1 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -27,7 +27,6 @@
 #include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
 #include <Storages/VirtualColumnUtils.h>
 
-#include <Disks/StoragePolicy.h>
 
 #include <Databases/IDatabase.h>
 
diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp
index 363a2a20828..132ed234323 100644
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@@ -15,7 +15,7 @@
 #include <Common/StringUtils/StringUtils.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeArray.h>
-#include <Disks/StoragePolicy.h>
+#include <Disks/IStoragePolicy.h>
 #include <Processors/Sources/SourceWithProgress.h>
 #include <Processors/Pipe.h>
 #include <DataTypes/DataTypeUUID.h>

From e8a320cfd0d449f9a1118c751c94b913ba257407 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 26 Jan 2021 14:10:44 +0300
Subject: [PATCH 0184/1238] Fix more warnings

---
 src/Coordination/InMemoryLogStore.h      | 2 +-
 src/Coordination/SummingStateMachine.cpp | 1 +
 src/Coordination/TestKeeperStorage.h     | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Coordination/InMemoryLogStore.h b/src/Coordination/InMemoryLogStore.h
index e9c41b50cf6..37f76f056ba 100644
--- a/src/Coordination/InMemoryLogStore.h
+++ b/src/Coordination/InMemoryLogStore.h
@@ -39,7 +39,7 @@ public:
     bool flush() override { return true; }
 
 private:
-    std::map<ulong, nuraft::ptr<nuraft::log_entry>> logs;
+    std::map<size_t, nuraft::ptr<nuraft::log_entry>> logs;
     mutable std::mutex logs_lock;
     std::atomic<size_t> start_idx;
 };
diff --git a/src/Coordination/SummingStateMachine.cpp b/src/Coordination/SummingStateMachine.cpp
index f9a3f4f9de2..0cb7a7da6c3 100644
--- a/src/Coordination/SummingStateMachine.cpp
+++ b/src/Coordination/SummingStateMachine.cpp
@@ -1,5 +1,6 @@
 #include <Coordination/SummingStateMachine.h>
 #include <iostream>
+#include <cstring>
 
 namespace DB
 {
diff --git a/src/Coordination/TestKeeperStorage.h b/src/Coordination/TestKeeperStorage.h
index 2c7c6bad4fa..6f70ff1c584 100644
--- a/src/Coordination/TestKeeperStorage.h
+++ b/src/Coordination/TestKeeperStorage.h
@@ -24,7 +24,7 @@ public:
     struct Node
     {
         String data;
-        Coordination::ACLs acls;
+        Coordination::ACLs acls{};
         bool is_ephemeral = false;
         bool is_sequental = false;
         Coordination::Stat stat{};

From 578f36e4f3c84173e322c35470a5e1cc24dd0348 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 26 Jan 2021 16:58:49 +0300
Subject: [PATCH 0185/1238] Update test.Fix ya.make.

---
 src/Processors/ya.make                               |  1 +
 .../01666_merge_tree_max_query_limit.reference       |  2 ++
 .../0_stateless/01666_merge_tree_max_query_limit.sh  | 12 +++++++++---
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index 2eb27be8899..d779cb320e6 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -117,6 +117,7 @@ SRCS(
     QueryPlan/MergingSortedStep.cpp
     QueryPlan/OffsetStep.cpp
     QueryPlan/PartialSortingStep.cpp
+    QueryPlan/QueryIdHolder.cpp
     QueryPlan/QueryPlan.cpp
     QueryPlan/ReadFromPreparedSource.cpp
     QueryPlan/ReadNothingStep.cpp
diff --git a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference
index 25880a7d740..9011a5d1204 100644
--- a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference
+++ b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference
@@ -12,3 +12,5 @@ Check if another query is passed
 Modify max_concurrent_queries back to 1
 Check if another query with less marks to read is throttled
 yes
+was cancelled
+finished	long_running_query	default	select sleepEachRow(0.01) from simple settings max_block_size = 1 format Null
diff --git a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh
index 0bf37673e91..27716aa8b28 100755
--- a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh
+++ b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh
@@ -4,18 +4,23 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
+function wait_for_query_to_start()
+{
+    while [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT sum(read_rows) FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.1; done
+}
+
 ${CLICKHOUSE_CLIENT} --multiline --multiquery --query "
 drop table if exists simple;
 
 create table simple (i int, j int) engine = MergeTree order by i
 settings index_granularity = 1, max_concurrent_queries = 1, min_marks_to_honor_max_concurrent_queries = 2;
 
-insert into simple select number, number + 100 from numbers(10);
+insert into simple select number, number + 100 from numbers(1000);
 "
 
 echo "Spin up a long running query"
-${CLICKHOUSE_CLIENT} --query "select sleepEachRow(1) from simple settings max_block_size = 1 format Null" --query_id "long_running_query" &
-sleep 3
+${CLICKHOUSE_CLIENT} --query "select sleepEachRow(0.01) from simple settings max_block_size = 1 format Null" --query_id "long_running_query" 2>&1 | grep -o 'was cancelled' | head -1 &
+wait_for_query_to_start 'long_running_query'
 
 # query which reads marks >= min_marks_to_honor_max_concurrent_queries is throttled
 echo "Check if another query with some marks to read is throttled"
@@ -56,6 +61,7 @@ CODE=$?
 [ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1;
 echo "yes"
 
+${CLICKHOUSE_CLIENT} --query "KILL QUERY WHERE query_id = 'long_running_query' SYNC"
 wait
 
 ${CLICKHOUSE_CLIENT} --multiline --multiquery --query "

From 817eb100a186e1244f51247d7b83956152c6c8da Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 26 Jan 2021 17:08:31 +0300
Subject: [PATCH 0186/1238] Better shutdown

---
 src/Coordination/NuKeeperServer.cpp           | 12 +++-
 .../TestKeeperStorageDispatcher.cpp           | 65 ++++++++++---------
 .../TestKeeperStorageDispatcher.h             |  6 +-
 src/Interpreters/Context.cpp                  |  2 +-
 4 files changed, 48 insertions(+), 37 deletions(-)

diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 5b5aeb206c4..6d70eff1121 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -72,7 +72,17 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::shutdown(const TestKeepe
 {
     TestKeeperStorage::ResponsesForSessions responses;
     if (can_become_leader)
-        responses = putRequests(expired_requests);
+    {
+        try
+        {
+            responses = putRequests(expired_requests);
+        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
+    }
+
     if (!launcher.shutdown(5))
         LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Failed to shutdown RAFT server in {} seconds", 5);
     return responses;
diff --git a/src/Coordination/TestKeeperStorageDispatcher.cpp b/src/Coordination/TestKeeperStorageDispatcher.cpp
index 3aef5213adc..7ce81df0bfd 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.cpp
+++ b/src/Coordination/TestKeeperStorageDispatcher.cpp
@@ -14,7 +14,7 @@ namespace ErrorCodes
 void TestKeeperStorageDispatcher::processingThread()
 {
     setThreadName("TestKeeperSProc");
-    while (!shutdown)
+    while (!shutdown_called)
     {
         TestKeeperStorage::RequestForSession request;
 
@@ -22,8 +22,9 @@ void TestKeeperStorageDispatcher::processingThread()
 
         if (requests_queue.tryPop(request, max_wait))
         {
-            if (shutdown)
+            if (shutdown_called)
                 break;
+
             try
             {
                 auto responses = server->putRequests({request});
@@ -51,34 +52,6 @@ void TestKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordina
         session_to_response_callback.erase(session_writer);
 }
 
-void TestKeeperStorageDispatcher::finalize()
-{
-    {
-        std::lock_guard lock(push_request_mutex);
-
-        if (shutdown)
-            return;
-
-        shutdown = true;
-
-        if (processing_thread.joinable())
-            processing_thread.join();
-    }
-
-    if (server)
-    {
-        TestKeeperStorage::RequestsForSessions expired_requests;
-        TestKeeperStorage::RequestForSession request;
-        while (requests_queue.tryPop(request))
-            expired_requests.push_back(TestKeeperStorage::RequestForSession{request});
-
-        auto expired_responses = server->shutdown(expired_requests);
-
-        for (const auto & response_for_session : expired_responses)
-            setResponse(response_for_session.session_id, response_for_session.response);
-    }
-}
-
 bool TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
 {
 
@@ -143,11 +116,34 @@ void TestKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigura
 
 }
 
-TestKeeperStorageDispatcher::~TestKeeperStorageDispatcher()
+void TestKeeperStorageDispatcher::shutdown()
 {
     try
     {
-        finalize();
+        {
+            std::lock_guard lock(push_request_mutex);
+
+            if (shutdown_called)
+                return;
+
+            shutdown_called = true;
+
+            if (processing_thread.joinable())
+                processing_thread.join();
+        }
+
+        if (server)
+        {
+            TestKeeperStorage::RequestsForSessions expired_requests;
+            TestKeeperStorage::RequestForSession request;
+            while (requests_queue.tryPop(request))
+                expired_requests.push_back(TestKeeperStorage::RequestForSession{request});
+
+            auto expired_responses = server->shutdown(expired_requests);
+
+            for (const auto & response_for_session : expired_responses)
+                setResponse(response_for_session.session_id, response_for_session.response);
+        }
     }
     catch (...)
     {
@@ -155,6 +151,11 @@ TestKeeperStorageDispatcher::~TestKeeperStorageDispatcher()
     }
 }
 
+TestKeeperStorageDispatcher::~TestKeeperStorageDispatcher()
+{
+    shutdown();
+}
+
 void TestKeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback)
 {
     std::lock_guard lock(session_to_response_callback_mutex);
diff --git a/src/Coordination/TestKeeperStorageDispatcher.h b/src/Coordination/TestKeeperStorageDispatcher.h
index aa220beecf2..5107f2f9cba 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.h
+++ b/src/Coordination/TestKeeperStorageDispatcher.h
@@ -16,13 +16,12 @@ class TestKeeperStorageDispatcher
 private:
     Poco::Timespan operation_timeout{0, Coordination::DEFAULT_OPERATION_TIMEOUT_MS * 1000};
 
-    using clock = std::chrono::steady_clock;
 
     std::mutex push_request_mutex;
 
     using RequestsQueue = ConcurrentBoundedQueue<TestKeeperStorage::RequestForSession>;
     RequestsQueue requests_queue{1};
-    std::atomic<bool> shutdown{false};
+    std::atomic<bool> shutdown_called{false};
     using SessionToResponseCallback = std::unordered_map<int64_t, ZooKeeperResponseCallback>;
 
     std::mutex session_to_response_callback_mutex;
@@ -35,7 +34,6 @@ private:
 
 private:
     void processingThread();
-    void finalize();
     void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
 
 public:
@@ -43,6 +41,8 @@ public:
 
     void initialize(const Poco::Util::AbstractConfiguration & config);
 
+    void shutdown();
+
     ~TestKeeperStorageDispatcher();
 
     bool putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 0b381cf3fae..033f4b54a64 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -447,7 +447,7 @@ struct ContextShared
         /// Stop zookeeper connection
         zookeeper.reset();
         /// Stop test_keeper storage
-        test_keeper_storage_dispatcher.reset();
+        test_keeper_storage_dispatcher->shutdown();
     }
 
     bool hasTraceCollector() const

From 3935d51b14813e6ad2563eaf72b1a17b7f15f7b4 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 26 Jan 2021 17:23:10 +0300
Subject: [PATCH 0187/1238] Fix segfault

---
 src/Interpreters/Context.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 033f4b54a64..4c396bd29f4 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -447,7 +447,8 @@ struct ContextShared
         /// Stop zookeeper connection
         zookeeper.reset();
         /// Stop test_keeper storage
-        test_keeper_storage_dispatcher->shutdown();
+        if (test_keeper_storage_dispatcher)
+            test_keeper_storage_dispatcher->shutdown();
     }
 
     bool hasTraceCollector() const

From 403e74d941de3083cc8f4335cea3ccfb0613b879 Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Tue, 26 Jan 2021 18:12:08 +0300
Subject: [PATCH 0188/1238] Add more debuginfo for test_concurrent_ttl_merges
 test

---
 tests/integration/helpers/test_tools.py       |  3 +++
 .../test_concurrent_ttl_merges/test.py        | 25 ++++++++++---------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/tests/integration/helpers/test_tools.py b/tests/integration/helpers/test_tools.py
index 75ae8f67f7a..bbab12e55d4 100644
--- a/tests/integration/helpers/test_tools.py
+++ b/tests/integration/helpers/test_tools.py
@@ -38,6 +38,9 @@ class TSV:
     def __str__(self):
         return '\n'.join(self.lines)
 
+    def __len__(self):
+        return len(self.lines)
+
     @staticmethod
     def toMat(contents):
         return [line.split("\t") for line in contents.split("\n") if line.strip()]
diff --git a/tests/integration/test_concurrent_ttl_merges/test.py b/tests/integration/test_concurrent_ttl_merges/test.py
index 65bc3828b38..68913329e6b 100644
--- a/tests/integration/test_concurrent_ttl_merges/test.py
+++ b/tests/integration/test_concurrent_ttl_merges/test.py
@@ -2,7 +2,7 @@ import time
 
 import pytest
 from helpers.cluster import ClickHouseCluster
-from helpers.test_tools import assert_eq_with_retry
+from helpers.test_tools import assert_eq_with_retry, TSV
 
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance('node1', main_configs=['configs/fast_background_pool.xml', 'configs/log_conf.xml'], with_zookeeper=True)
@@ -28,12 +28,13 @@ def count_ttl_merges_in_queue(node, table):
     return int(result.strip())
 
 
-def count_ttl_merges_in_background_pool(node, table):
-    result = node.query(
-        "SELECT count() FROM system.merges WHERE merge_type = 'TTL_DELETE' and table = '{}'".format(table))
-    if not result:
-        return 0
-    return int(result.strip())
+def count_ttl_merges_in_background_pool(node, table, level):
+    result = TSV(node.query(
+        "SELECT * FROM system.merges WHERE merge_type = 'TTL_DELETE' and table = '{}'".format(table)))
+    count = len(result)
+    if count >= level:
+        print("count_ttl_merges_in_background_pool: merges more than warn level:\n{}".format(result))
+    return count
 
 
 def count_regular_merges_in_background_pool(node, table):
@@ -67,7 +68,7 @@ def test_no_ttl_merges_in_busy_pool(started_cluster):
 
     while count_running_mutations(node1, "test_ttl") < 6:
         print("Mutations count", count_running_mutations(node1, "test_ttl"))
-        assert count_ttl_merges_in_background_pool(node1, "test_ttl") == 0
+        assert count_ttl_merges_in_background_pool(node1, "test_ttl", 1) == 0
         time.sleep(0.5)
 
     node1.query("SYSTEM START TTL MERGES")
@@ -100,7 +101,7 @@ def test_limited_ttl_merges_in_empty_pool(started_cluster):
 
     merges_with_ttl_count = set({})
     while True:
-        merges_with_ttl_count.add(count_ttl_merges_in_background_pool(node1, "test_ttl_v2"))
+        merges_with_ttl_count.add(count_ttl_merges_in_background_pool(node1, "test_ttl_v2", 3))
         time.sleep(0.01)
         if node1.query("SELECT COUNT() FROM test_ttl_v2") == "0\n":
             break
@@ -124,7 +125,7 @@ def test_limited_ttl_merges_in_empty_pool_replicated(started_cluster):
     merges_with_ttl_count = set({})
     entries_with_ttl_count = set({})
     while True:
-        merges_with_ttl_count.add(count_ttl_merges_in_background_pool(node1, "replicated_ttl"))
+        merges_with_ttl_count.add(count_ttl_merges_in_background_pool(node1, "replicated_ttl", 3))
         entries_with_ttl_count.add(count_ttl_merges_in_queue(node1, "replicated_ttl"))
         time.sleep(0.01)
         if node1.query("SELECT COUNT() FROM replicated_ttl") == "0\n":
@@ -159,8 +160,8 @@ def test_limited_ttl_merges_two_replicas(started_cluster):
     merges_with_ttl_count_node1 = set({})
     merges_with_ttl_count_node2 = set({})
     while True:
-        merges_with_ttl_count_node1.add(count_ttl_merges_in_background_pool(node1, "replicated_ttl_2"))
-        merges_with_ttl_count_node2.add(count_ttl_merges_in_background_pool(node2, "replicated_ttl_2"))
+        merges_with_ttl_count_node1.add(count_ttl_merges_in_background_pool(node1, "replicated_ttl_2"), 3)
+        merges_with_ttl_count_node2.add(count_ttl_merges_in_background_pool(node2, "replicated_ttl_2"), 3)
         if node1.query("SELECT COUNT() FROM replicated_ttl_2") == "0\n" and node2.query(
                 "SELECT COUNT() FROM replicated_ttl_2") == "0\n":
             break

From b8a2a29f94c946a0e6ef1f1ee0c7135c8040419b Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 26 Jan 2021 18:31:09 +0300
Subject: [PATCH 0189/1238] cleanup

---
 src/Processors/Transforms/WindowTransform.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index c893af42ec9..eb5b0627d11 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -350,7 +350,7 @@ void WindowTransform::writeOutGroup()
             past_the_end_block = first_block_number + blocks.size();
             past_the_end_row = blocks.back().numRows();
         }
-        for ( auto r = group_start;
+        for (auto r = group_start;
             r.block < past_the_end_block;
             ++r.block, r.row = 0)
         {

From 76adc85c7562af482c66d7740bb5ca2c8f87312f Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Tue, 26 Jan 2021 19:33:37 +0300
Subject: [PATCH 0190/1238] fix

---
 tests/integration/test_concurrent_ttl_merges/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_concurrent_ttl_merges/test.py b/tests/integration/test_concurrent_ttl_merges/test.py
index 68913329e6b..ba5ed9f0758 100644
--- a/tests/integration/test_concurrent_ttl_merges/test.py
+++ b/tests/integration/test_concurrent_ttl_merges/test.py
@@ -160,8 +160,8 @@ def test_limited_ttl_merges_two_replicas(started_cluster):
     merges_with_ttl_count_node1 = set({})
     merges_with_ttl_count_node2 = set({})
     while True:
-        merges_with_ttl_count_node1.add(count_ttl_merges_in_background_pool(node1, "replicated_ttl_2"), 3)
-        merges_with_ttl_count_node2.add(count_ttl_merges_in_background_pool(node2, "replicated_ttl_2"), 3)
+        merges_with_ttl_count_node1.add(count_ttl_merges_in_background_pool(node1, "replicated_ttl_2", 3))
+        merges_with_ttl_count_node2.add(count_ttl_merges_in_background_pool(node2, "replicated_ttl_2", 3))
         if node1.query("SELECT COUNT() FROM replicated_ttl_2") == "0\n" and node2.query(
                 "SELECT COUNT() FROM replicated_ttl_2") == "0\n":
             break

From f20d5e3b419b1efc77e3a3a1b7aa46f86ac4c201 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Tue, 26 Jan 2021 20:51:25 +0300
Subject: [PATCH 0191/1238] fix

---
 src/Databases/DatabaseAtomic.cpp              | 13 +++--
 src/Databases/DatabaseReplicated.h            |  2 +-
 src/Interpreters/Context.cpp                  |  3 +-
 src/Interpreters/Context.h                    |  1 +
 src/Interpreters/DDLTask.h                    |  3 +-
 src/Interpreters/DDLWorker.cpp                | 53 ++++++++-----------
 src/Interpreters/InterpreterRenameQuery.cpp   |  7 +++
 src/Interpreters/executeDDLQueryOnCluster.cpp |  7 +--
 src/Parsers/ASTAlterQuery.cpp                 | 14 ++++-
 src/Parsers/ASTAlterQuery.h                   |  4 ++
 src/Storages/StorageMaterializedView.cpp      |  6 ++-
 tests/clickhouse-test                         | 16 ++++--
 12 files changed, 78 insertions(+), 51 deletions(-)

diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index 1da23b9beef..8b75f439152 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -115,8 +115,8 @@ void DatabaseAtomic::dropTable(const Context & context, const String & table_nam
         std::unique_lock lock(mutex);
         table = getTableUnlocked(table_name, lock);
         table_metadata_path_drop = DatabaseCatalog::instance().getPathForDroppedMetadata(table->getStorageID());
-
-        if (auto txn = context.getMetadataTransaction())
+        auto txn = context.getMetadataTransaction();
+        if (txn && !context.isInternalSubquery())
             txn->commit();      /// Commit point (a sort of) for Replicated database
 
         /// NOTE: replica will be lost if server crashes before the following rename
@@ -241,7 +241,8 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
     }
 
     /// Table renaming actually begins here
-    if (auto txn = context.getMetadataTransaction())
+    auto txn = context.getMetadataTransaction();
+    if (txn && !context.isInternalSubquery())
         txn->commit();     /// Commit point (a sort of) for Replicated database
 
     /// NOTE: replica will be lost if server crashes before the following rename
@@ -301,7 +302,8 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora
         DatabaseCatalog::instance().addUUIDMapping(query.uuid);
         locked_uuid = true;
 
-        if (auto txn = query_context.getMetadataTransaction())
+        auto txn = query_context.getMetadataTransaction();
+        if (txn && !query_context.isInternalSubquery())
             txn->commit();     /// Commit point (a sort of) for Replicated database
 
         /// NOTE: replica will be lost if server crashes before the following renameNoReplace(...)
@@ -335,7 +337,8 @@ void DatabaseAtomic::commitAlterTable(const StorageID & table_id, const String &
     if (table_id.uuid != actual_table_id.uuid)
         throw Exception("Cannot alter table because it was renamed", ErrorCodes::CANNOT_ASSIGN_ALTER);
 
-    if (auto txn = query_context.getMetadataTransaction())
+    auto txn = query_context.getMetadataTransaction();
+    if (txn && !query_context.isInternalSubquery())
         txn->commit();      /// Commit point (a sort of) for Replicated database
 
     /// NOTE: replica will be lost if server crashes before the following rename
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 8085c234af4..586f381c962 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -64,7 +64,7 @@ public:
 
     void shutdown() override;
 
-    void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach = false) override;
+    void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach) override;
 
     String getFullReplicaName() const { return shard_name + '|' + replica_name; }
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 3d102553f5a..6895439b855 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2522,8 +2522,7 @@ void Context::initMetadataTransaction(MetadataTransactionPtr txn)
 
 MetadataTransactionPtr Context::getMetadataTransaction() const
 {
-    //FIXME
-    //assert(query_context == this);
+    assert(!metadata_transaction || hasQueryContext());
     return metadata_transaction;
 }
 
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index dcb581b98c6..37ed01d4dbc 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -536,6 +536,7 @@ public:
     const Context & getQueryContext() const;
     Context & getQueryContext();
     bool hasQueryContext() const { return query_context != nullptr; }
+    bool isInternalSubquery() const { return hasQueryContext() && query_context != this; }
 
     const Context & getSessionContext() const;
     Context & getSessionContext();
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 7501c01aa8f..a12676ab8a3 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -85,9 +85,10 @@ struct DDLTaskBase
     ExecutionStatus execution_status;
     bool was_executed = false;
 
+    std::atomic_bool completely_processed = false;
+
     DDLTaskBase(const String & name, const String & path) : entry_name(name), entry_path(path) {}
     DDLTaskBase(const DDLTaskBase &) = delete;
-    DDLTaskBase(DDLTaskBase &&) = default;
     virtual ~DDLTaskBase() = default;
 
     void parseQueryFromEntry(const Context & context);
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index fc72e4d8366..cb38c733582 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -341,9 +341,10 @@ void DDLWorker::scheduleTasks()
         auto & min_task = *std::min_element(current_tasks.begin(), current_tasks.end());
         begin_node = std::upper_bound(queue_nodes.begin(), queue_nodes.end(), min_task->entry_name);
         current_tasks.clear();
-        //FIXME better way of maintaning current tasks list and min_task name;
     }
 
+    assert(current_tasks.empty());
+
     for (auto it = begin_node; it != queue_nodes.end() && !stop_flag; ++it)
     {
         String entry_name = *it;
@@ -378,12 +379,8 @@ void DDLWorker::scheduleTasks()
 
 DDLTaskBase & DDLWorker::saveTask(DDLTaskPtr && task)
 {
-    //assert(current_tasks.size() <= pool_size + 1);
-    //if (current_tasks.size() == pool_size)
-    //{
-    //    assert(current_tasks.front()->ops.empty()); //FIXME
-    //    current_tasks.pop_front();
-    //}
+    std::remove_if(current_tasks.begin(), current_tasks.end(), [](const DDLTaskPtr & t) { return t->completely_processed.load(); });
+    assert(current_tasks.size() <= pool_size);
     current_tasks.emplace_back(std::move(task));
     return *current_tasks.back();
 }
@@ -555,6 +552,8 @@ void DDLWorker::processTask(DDLTaskBase & task)
         active_node->reset();
         task.ops.clear();
     }
+
+    task.completely_processed = true;
 }
 
 
@@ -572,6 +571,9 @@ bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, const Storage
         // Setting alters should be executed on all replicas
         if (alter->isSettingsAlter())
             return false;
+
+        if (alter->isFreezeAlter())
+            return false;
     }
 
     return storage->supportsReplication();
@@ -856,28 +858,20 @@ String DDLWorker::enqueueQuery(DDLLogEntry & entry)
 
 void DDLWorker::initializeMainThread()
 {
-    auto reset_state = [&](bool reset_pool = true)
-    {
-        initialized = false;
-        /// It will wait for all threads in pool to finish and will not rethrow exceptions (if any).
-        /// We create new thread pool to forget previous exceptions.
-        if (reset_pool)
-            worker_pool = std::make_unique<ThreadPool>(pool_size);
-        /// Clear other in-memory state, like server just started.
-        current_tasks.clear();
-        max_id = 0;
-    };
-
+    assert(!initialized);
+    assert(max_id == 0);
+    assert(current_tasks.empty());
     setThreadName("DDLWorker");
     LOG_DEBUG(log, "Started DDLWorker thread");
 
-    do
+    while (!stop_flag)
     {
         try
         {
             auto zookeeper = getAndSetZooKeeper();
             zookeeper->createAncestors(fs::path(queue_dir) / "");
             initialized = true;
+            return;
         }
         catch (const Coordination::Exception & e)
         {
@@ -885,33 +879,29 @@ void DDLWorker::initializeMainThread()
             {
                 /// A logical error.
                 LOG_ERROR(log, "ZooKeeper error: {}. Failed to start DDLWorker.",getCurrentExceptionMessage(true));
-                reset_state(false);
                 assert(false);  /// Catch such failures in tests with debug build
             }
 
             tryLogCurrentException(__PRETTY_FUNCTION__);
-
-            /// Avoid busy loop when ZooKeeper is not available.
-            sleepForSeconds(5);
         }
         catch (...)
         {
             tryLogCurrentException(log, "Cannot initialize DDL queue.");
-            reset_state(false);
-            sleepForSeconds(5);
         }
+
+        /// Avoid busy loop when ZooKeeper is not available.
+        sleepForSeconds(5);
     }
-    while (!initialized && !stop_flag);
 }
 
 void DDLWorker::runMainThread()
 {
-    auto reset_state = [&](bool reset_pool = true)
+    auto reset_state = [&]()
     {
         initialized = false;
         /// It will wait for all threads in pool to finish and will not rethrow exceptions (if any).
         /// We create new thread pool to forget previous exceptions.
-        if (reset_pool)
+        if (1 < pool_size)
             worker_pool = std::make_unique<ThreadPool>(pool_size);
         /// Clear other in-memory state, like server just started.
         current_tasks.clear();
@@ -944,6 +934,7 @@ void DDLWorker::runMainThread()
             if (Coordination::isHardwareError(e.code))
             {
                 initialized = false;
+                LOG_INFO(log, "Lost ZooKeeper connection, will try to connect again: {}", getCurrentExceptionMessage(true));
             }
             else if (e.code == Coordination::Error::ZNONODE)
             {
@@ -953,10 +944,10 @@ void DDLWorker::runMainThread()
             }
             else
             {
-                LOG_ERROR(log, "Unexpected ZooKeeper error: {}", getCurrentExceptionMessage(true));
+                LOG_ERROR(log, "Unexpected ZooKeeper error, will try to restart main thread: {}", getCurrentExceptionMessage(true));
                 reset_state();
             }
-            sleepForSeconds(5);
+            sleepForSeconds(1);
         }
         catch (...)
         {
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index 72398103d62..a6075643a96 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -13,6 +13,10 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
 
 InterpreterRenameQuery::InterpreterRenameQuery(const ASTPtr & query_ptr_, Context & context_)
     : query_ptr(query_ptr_), context(context_)
@@ -78,6 +82,9 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
         DatabasePtr database = database_catalog.getDatabase(elem.from_database_name);
         if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
         {
+            if (1 < descriptions.size())
+                throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Database {} is Replicated, "
+                                "it does not support renaming of multiple tables in single query.", elem.from_database_name);
             return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
         }
         else
diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
index cf801caed04..fb155e82926 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.cpp
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -31,12 +31,13 @@ namespace ErrorCodes
 
 bool isSupportedAlterType(int type)
 {
+    assert(type != ASTAlterCommand::NO_TYPE);
     static const std::unordered_set<int> unsupported_alter_types{
+        /// It's dangerous, because it may duplicate data if executed on multiple replicas
         ASTAlterCommand::ATTACH_PARTITION,
-        ASTAlterCommand::REPLACE_PARTITION,
+        /// Usually followed by ATTACH PARTITION
         ASTAlterCommand::FETCH_PARTITION,
-        ASTAlterCommand::FREEZE_PARTITION,
-        ASTAlterCommand::FREEZE_ALL,
+        /// Logical error
         ASTAlterCommand::NO_TYPE,
     };
 
diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp
index 8a44dcc7c3b..f24b26d5b54 100644
--- a/src/Parsers/ASTAlterQuery.cpp
+++ b/src/Parsers/ASTAlterQuery.cpp
@@ -344,7 +344,7 @@ void ASTAlterCommand::formatImpl(
         throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
 }
 
-bool ASTAlterQuery::isSettingsAlter() const
+bool ASTAlterQuery::isOneCommandTypeOnly(const ASTAlterCommand::Type & type) const
 {
     if (command_list)
     {
@@ -353,7 +353,7 @@ bool ASTAlterQuery::isSettingsAlter() const
         for (const auto & child : command_list->children)
         {
             const auto & command = child->as<const ASTAlterCommand &>();
-            if (command.type != ASTAlterCommand::MODIFY_SETTING)
+            if (command.type != type)
                 return false;
         }
         return true;
@@ -361,6 +361,16 @@ bool ASTAlterQuery::isSettingsAlter() const
     return false;
 }
 
+bool ASTAlterQuery::isSettingsAlter() const
+{
+    return isOneCommandTypeOnly(ASTAlterCommand::MODIFY_SETTING);
+}
+
+bool ASTAlterQuery::isFreezeAlter() const
+{
+    return isOneCommandTypeOnly(ASTAlterCommand::FREEZE_PARTITION) || isOneCommandTypeOnly(ASTAlterCommand::FREEZE_ALL);
+}
+
 /** Get the text that identifies this element. */
 String ASTAlterQuery::getID(char delim) const
 {
diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h
index f53a987905e..4cc01aa889e 100644
--- a/src/Parsers/ASTAlterQuery.h
+++ b/src/Parsers/ASTAlterQuery.h
@@ -189,6 +189,8 @@ public:
 
     bool isSettingsAlter() const;
 
+    bool isFreezeAlter() const;
+
     String getID(char) const override;
 
     ASTPtr clone() const override;
@@ -200,6 +202,8 @@ public:
 
 protected:
     void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
+
+    bool isOneCommandTypeOnly(const ASTAlterCommand::Type & type) const;
 };
 
 }
diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp
index af00b37b1d5..29aea3e6150 100644
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@@ -89,6 +89,7 @@ StorageMaterializedView::StorageMaterializedView(
     else
     {
         /// We will create a query to create an internal table.
+        auto create_context = Context(local_context);
         auto manual_create_query = std::make_shared<ASTCreateQuery>();
         manual_create_query->database = getStorageID().database_name;
         manual_create_query->table = generateInnerTableName(getStorageID());
@@ -99,7 +100,7 @@ StorageMaterializedView::StorageMaterializedView(
         manual_create_query->set(manual_create_query->columns_list, new_columns_list);
         manual_create_query->set(manual_create_query->storage, query.storage->ptr());
 
-        InterpreterCreateQuery create_interpreter(manual_create_query, local_context);
+        InterpreterCreateQuery create_interpreter(manual_create_query, create_context);
         create_interpreter.setInternal(true);
         create_interpreter.execute();
 
@@ -205,7 +206,8 @@ static void executeDropQuery(ASTDropQuery::Kind kind, Context & global_context,
         drop_query->no_delay = no_delay;
         drop_query->if_exists = true;
         ASTPtr ast_drop_query = drop_query;
-        InterpreterDropQuery drop_interpreter(ast_drop_query, global_context);
+        auto drop_context = Context(global_context);
+        InterpreterDropQuery drop_interpreter(ast_drop_query, drop_context);
         drop_interpreter.execute();
     }
 }
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index d5c6019d28f..13e7b4be001 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -162,7 +162,12 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
     while (datetime.now() - start_time).total_seconds() < args.timeout and proc.poll() is None:
         sleep(0.01)
 
-    if not args.database:
+    need_drop_database = not args.database
+    if need_drop_database and args.no_drop_if_fail:
+        maybe_passed = (proc.returncode == 0) and (proc.stderr is None) and (proc.stdout is None or 'Exception' not in proc.stdout)
+        need_drop_database = not maybe_passed
+
+    if need_drop_database:
         clickhouse_proc_create = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True)
         seconds_left = max(args.timeout - (datetime.now() - start_time).total_seconds(), 10)
         try:
@@ -181,9 +186,10 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
 
     total_time = (datetime.now() - start_time).total_seconds()
 
-    # Normalize randomized database names in stdout, stderr files.
-    os.system("LC_ALL=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=database, file=stdout_file))
-    os.system("LC_ALL=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=database, file=stderr_file))
+    if not args.show_db_name:
+        # Normalize randomized database names in stdout, stderr files.
+        os.system("LC_ALL=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=database, file=stdout_file))
+        os.system("LC_ALL=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=database, file=stderr_file))
 
     stdout = open(stdout_file, 'rb').read() if os.path.exists(stdout_file) else b''
     stdout = str(stdout, errors='replace', encoding='utf-8')
@@ -884,6 +890,8 @@ if __name__ == '__main__':
     parser.add_argument('--hung-check', action='store_true', default=False)
     parser.add_argument('--force-color', action='store_true', default=False)
     parser.add_argument('--database', help='Database for tests (random name test_XXXXXX by default)')
+    parser.add_argument('--no-drop-if-fail', action='store_true', help='Do not drop database for test if test has failed')
+    parser.add_argument('--show-db-name', action='store_true', help='Do not replace random database name with "default"')
     parser.add_argument('--parallel', default='1/1', help='One parallel test run number/total')
     parser.add_argument('-j', '--jobs', default=1, nargs='?', type=int, help='Run all tests in parallel')
     parser.add_argument('-U', '--unified', default=3, type=int, help='output NUM lines of unified context')

From 10a8831d8b721a2fabefb1b3300947e583a354d0 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 26 Jan 2021 20:59:38 +0300
Subject: [PATCH 0192/1238] partition by -- single loop

---
 src/Processors/Transforms/WindowTransform.cpp | 81 ++++++++++---------
 src/Processors/Transforms/WindowTransform.h   |  6 ++
 2 files changed, 49 insertions(+), 38 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index eb5b0627d11..00181e3577f 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -79,6 +79,8 @@ void WindowTransform::advancePartitionEnd()
 
     const RowNumber end = blocksEnd();
 
+    fmt::print(stderr, "end {}, partition_end {}\n", end, partition_end);
+
     // If we're at the total end of data, we must end the partition. This is the
     // only place in calculations where we need special handling for end of data,
     // other places will work as usual based on `partition_ended` = true, because
@@ -93,49 +95,48 @@ void WindowTransform::advancePartitionEnd()
         return;
     }
 
+    // If we got to the end of the block already, just stop.
+    if (partition_end == end)
+    {
+        return;
+    }
+
+    // We process one block at a time, but we can process each block many times,
+    // if it contains multiple partitions. The `partition_end` is a
+    // past-the-end pointer, so it must be already in the "next" block we haven't
+    // processed yet. This is also the last block we have.
+    // The exception to this rule is end of data, for which we checked above.
+    assert(end.block == partition_end.block + 1);
+
     // Try to advance the partition end pointer.
     const size_t n = partition_by_indices.size();
     if (n == 0)
     {
-//        fmt::print(stderr, "no partition by\n");
         // No PARTITION BY. All input is one partition, which will end when the
         // input ends.
         partition_end = end;
         return;
     }
 
-    // The partition ends when the PARTITION BY columns change. We need an array
-    // of reference columns for comparison. We might have already dropped the
-    // blocks where the partition starts, but any row in the partition will do.
-    // Use group_start -- it's always in the valid region, because it points to
-    // the start of the current group, which we haven't fully processed yet, and
-    // hence cannot drop.
-    auto reference_row = group_start;
-    if (reference_row == partition_end)
+    // Check for partition end.
+    // The partition ends when the PARTITION BY columns change. We need
+    // some reference columns for comparison. We might have already
+    // dropped the blocks where the partition starts, but any row in the
+    // partition will do. Use group_start -- it's always in the valid
+    // region, because it points to the start of the current group,
+    // which we haven't fully processed yet, and therefore cannot drop.
+    // It might be the same as the partition_end if it's the first group of the
+    // first partition, so we compare it to itself, but it still works correctly.
+    const auto block_rows = blockRowsNumber(partition_end);
+    for (; partition_end.row < block_rows; ++partition_end.row)
     {
-        // This is for the very first partition and its first row. Try to get
-        // rid of this logic.
-        advanceRowNumber(partition_end);
-    }
-    assert(reference_row < blocksEnd());
-    assert(reference_row.block >= first_block_number);
-    Columns reference_partition_by;
-    for (const auto i : partition_by_indices)
-    {
-        reference_partition_by.push_back(inputAt(reference_row)[i]);
-    }
-
-//    fmt::print(stderr, "{} cols to compare, reference at {}\n", n, group_start);
-
-    for (; partition_end < end; advanceRowNumber(partition_end))
-    {
-        // Check for partition end.
         size_t i = 0;
         for (; i < n; i++)
         {
+            const auto * ref = inputAt(group_start)[partition_by_indices[i]].get();
             const auto * c = inputAt(partition_end)[partition_by_indices[i]].get();
             if (c->compareAt(partition_end.row,
-                    group_start.row, *reference_partition_by[i],
+                    group_start.row, *ref,
                     1 /* nan_direction_hint */) != 0)
             {
                 break;
@@ -144,13 +145,17 @@ void WindowTransform::advancePartitionEnd()
 
         if (i < n)
         {
-//            fmt::print(stderr, "col {} doesn't match at {}: ref {}, val {}\n",
-//                i, partition_end, inputAt(partition_end)[i]);
             partition_ended = true;
             return;
         }
     }
 
+    if (partition_end.row == block_rows)
+    {
+        ++partition_end.block;
+        partition_end.row = 0;
+    }
+
     // Went until the end of data and didn't find the new partition.
     assert(!partition_ended && partition_end == blocksEnd());
 }
@@ -198,12 +203,6 @@ void WindowTransform::advanceGroupEndGroups()
         group_ended = partition_ended;
     }
 
-    Columns reference_order_by;
-    for (const auto i : order_by_indices)
-    {
-        reference_order_by.push_back(inputAt(group_start)[i]);
-    }
-
     // `partition_end` is either end of partition or end of data.
     for (; group_end < partition_end; advanceRowNumber(group_end))
     {
@@ -211,9 +210,9 @@ void WindowTransform::advanceGroupEndGroups()
         size_t i = 0;
         for (; i < n; i++)
         {
-            const auto * c = inputAt(partition_end)[partition_by_indices[i]].get();
-            if (c->compareAt(group_end.row,
-                    group_start.row, *reference_order_by[i],
+            const auto * ref = inputAt(group_start)[order_by_indices[i]].get();
+            const auto * c = inputAt(group_end)[order_by_indices[i]].get();
+            if (c->compareAt(group_end.row, group_start.row, *ref,
                     1 /* nan_direction_hint */) != 0)
             {
                 break;
@@ -381,6 +380,10 @@ void WindowTransform::writeOutGroup()
     first_not_ready_row = group_end;
 }
 
+void WindowTransform::initPerBlockCaches()
+{
+}
+
 void WindowTransform::appendChunk(Chunk & chunk)
 {
 //    fmt::print(stderr, "new chunk, {} rows, finished={}\n", chunk.getNumRows(),
@@ -410,6 +413,8 @@ void WindowTransform::appendChunk(Chunk & chunk)
         }
     }
 
+    initPerBlockCaches();
+
     // Start the calculations. First, advance the partition end.
     for (;;)
     {
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index d81914fe6f8..49efb19ae69 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -110,6 +110,7 @@ private:
     void advanceFrameStart();
     void advanceFrameEnd();
     void writeOutGroup();
+    void initPerBlockCaches();
 
     Columns & inputAt(const RowNumber & x)
     {
@@ -121,6 +122,11 @@ private:
     const Columns & inputAt(const RowNumber & x) const
     { return const_cast<WindowTransform *>(this)->inputAt(x); }
 
+    size_t blockRowsNumber(const RowNumber & x) const
+    {
+        return inputAt(x)[0]->size();
+    }
+
     MutableColumns & outputAt(const RowNumber & x)
     {
         assert(x.block >= first_block_number);

From b6657855ee3b6ab467aed20d7a8175955cd56b8e Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Tue, 26 Jan 2021 11:06:07 -0800
Subject: [PATCH 0193/1238] Docs - fixes the description of window param

---
 .../sql-reference/aggregate-functions/parametric-functions.md   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
index 3b02e145ff4..4b3bf12aa8c 100644
--- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
@@ -241,7 +241,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
 
 **Parameters**
 
--   `window` — Length of the sliding window in seconds.
+-   `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
 -   `mode` - It is an optional argument.
     -   `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
 -   `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1).

From f4448ef01e468018d64feabd4fcbcd5041d6528a Mon Sep 17 00:00:00 2001
From: Daria Mozhaeva <dmozhaeva@yandex-team.ru>
Date: Tue, 26 Jan 2021 22:24:06 +0300
Subject: [PATCH 0194/1238] links

---
 .../table-engines/integrations/embedded-rocksdb.md |  4 +++-
 docs/en/operations/settings/settings.md            |  2 +-
 .../table-engines/integrations/embedded-rocksdb.md | 14 +++++++++-----
 docs/ru/operations/settings/settings.md            |  2 +-
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
index b1d21cc5f00..6e864751cc3 100644
--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@@ -37,4 +37,6 @@ CREATE TABLE test
 )
 ENGINE = EmbeddedRocksDB
 PRIMARY KEY key
-```
\ No newline at end of file
+```
+
+[Original article](https://clickhouse.tech/docs/en/operations/table_engines/embedded-rocksdb/) <!--hide-->
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 03c3f4397ac..27a566dad44 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -428,7 +428,7 @@ Possible values:
 
 -   `'basic'` — Use basic parser.
 
-    ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `'2019-08-20 10:18:56'` or `2019-08-20`.
+    ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`.
 
 Default value: `'basic'`.
 
diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
index cb59cc9b568..9b68bcfc770 100644
--- a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md
@@ -3,7 +3,7 @@ toc_priority: 6
 toc_title: EmbeddedRocksDB
 ---
 
-# EmbeddedRocksDB Engine {#EmbeddedRocksDB-engine}
+# Движок EmbeddedRocksDB {#EmbeddedRocksDB-engine}
 
 Этот движок позволяет интегрировать ClickHouse с [rocksdb](http://rocksdb.org/).
 
@@ -21,9 +21,11 @@ PRIMARY KEY(primary_key_name);
 
 Обязательные параметры:
 
-`primary_key_name` может быть любое имя столбца из списка столбцов.
-Указание первичного ключа `primary key` является обязательным. Он будет сериализован в двоичном формате как ключ `rocksdb`. Поддерживается только один столбец в первичном ключе.
-Столбцы, которые отличаются от первичного ключа, будут сериализованы в двоичном формате как значение `rockdb` в соответствующем порядке. Запросы с фильтрацией по ключу `equals` или `in` оптимизируются для поиска по нескольким ключам из `rocksdb`.
+- `primary_key_name` может быть любое имя столбца из списка столбцов.
+- Указание первичного ключа `primary key` является обязательным. Он будет сериализован в двоичном формате как ключ `rocksdb`. 
+- Поддерживается только один столбец в первичном ключе.
+- Столбцы, которые отличаются от первичного ключа, будут сериализованы в двоичном формате как значение `rockdb` в соответствующем порядке. 
+- Запросы с фильтрацией по ключу `equals` или `in` оптимизируются для поиска по нескольким ключам из `rocksdb`.
 
 Пример:
 
@@ -37,4 +39,6 @@ CREATE TABLE test
 )
 ENGINE = EmbeddedRocksDB
 PRIMARY KEY key;
-```
\ No newline at end of file
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/embedded-rocksdb/) <!--hide-->
\ No newline at end of file
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 2b4164f49ea..662a597ba0d 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -412,7 +412,7 @@ ClickHouse может парсить базовый формат `YYYY-MM-DD HH:
 
 -   `basic` — используется базовый парсер.
 
-ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `'2019-08-20 10:18:56'` или `2019-08-20`.
+ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `2019-08-20 10:18:56` или `2019-08-20`.
 
 Значение по умолчанию: `basic`.
 

From 924e5c6ad3571d4fcde5a66e3648cbe75e24bc15 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Wed, 27 Jan 2021 00:20:49 +0300
Subject: [PATCH 0195/1238] Old syntax deleted

---
 docs/en/sql-reference/statements/alter/quota.md |  5 +----
 .../en/sql-reference/statements/create/quota.md | 17 +----------------
 docs/ru/sql-reference/statements/alter/quota.md |  5 +----
 .../ru/sql-reference/statements/create/quota.md |  6 +-----
 4 files changed, 4 insertions(+), 29 deletions(-)

diff --git a/docs/en/sql-reference/statements/alter/quota.md b/docs/en/sql-reference/statements/alter/quota.md
index 08a36e8598c..18083e4a523 100644
--- a/docs/en/sql-reference/statements/alter/quota.md
+++ b/docs/en/sql-reference/statements/alter/quota.md
@@ -12,15 +12,12 @@ Syntax:
 ``` sql
 ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
     [RENAME TO new_name]
-    [KEYED BY {NONE | USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
+    [KEYED BY {USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
     [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
         {MAX { {QUERIES | ERRORS | RESULT_ROWS | RESULT_BYTES | READ_ROWS | READ_BYTES | EXECUTION_TIME} = number } [,...] |
         NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
-Multiword key types may be written either with underscores (`CLIENT_KEY`), or with spaces and in simple quotes (`'client key'`). You may also use `'client key or user name'` instead of `CLIENT_KEY, USER_NAME`, and `'client key or ip address'` instead of `CLIENT_KEY, IP_ADDRESS`.
-
-Multiword resource types may be written either with underscores (`RESULT_ROWS`) or without them (`RESULT ROWS`). 
 
 **Examples**
 
diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md
index 8ae49e4e1a8..d284dfa0ded 100644
--- a/docs/en/sql-reference/statements/create/quota.md
+++ b/docs/en/sql-reference/statements/create/quota.md
@@ -11,7 +11,7 @@ Syntax:
 
 ``` sql
 CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
-    [KEYED BY {NONE | USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
+    [KEYED BY {USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
     [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
         {MAX { {QUERIES | ERRORS | RESULT_ROWS | RESULT_BYTES | READ_ROWS | READ_BYTES | EXECUTION_TIME} = number } [,...] |
          NO LIMITS | TRACKING ONLY} [,...]]
@@ -19,21 +19,6 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
 ```
 
 `ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
-
-Multiword key types may be written either with underscores (`CLIENT_KEY`), or with spaces and in simple quotes (`'client key'`). You may also use `'client key or user name'` instead of `CLIENT_KEY, USER_NAME`, and `'client key or ip address'` instead of `CLIENT_KEY, IP_ADDRESS`.
-
-Multiword resource types may be written either with underscores (`RESULT_ROWS`) or without them (`RESULT ROWS`). 
-## Example {#create-quota-example}
-
-Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
-
-``` sql
-CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER
-```
-Multiword key types may be written either with underscores (`CLIENT_KEY`), or with spaces and in simple quotes (`'client key'`). You can also use `'client key or user name'` instead of `CLIENT_KEY, USER_NAME`, and `'client key or ip address'` instead of `CLIENT_KEY, IP_ADDRESS`.
-
-Multiword resource types may be written either with underscores (`RESULT_ROWS`) or without them (`RESULT ROWS`). 
-
 ## Examples {#create-quota-example}
 
 Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
diff --git a/docs/ru/sql-reference/statements/alter/quota.md b/docs/ru/sql-reference/statements/alter/quota.md
index 4f524db2a6d..1abb6336321 100644
--- a/docs/ru/sql-reference/statements/alter/quota.md
+++ b/docs/ru/sql-reference/statements/alter/quota.md
@@ -12,15 +12,12 @@ toc_title: QUOTA
 ``` sql
 ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
     [RENAME TO new_name]
-    [KEYED BY {NONE | USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
+    [KEYED BY {USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
     [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
         {MAX { {QUERIES | ERRORS | RESULT_ROWS | RESULT_BYTES | READ_ROWS | READ_BYTES | EXECUTION_TIME} = number } [,...] |
         NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
-Поддерживаются два варианта написания составных типов ключей: с подчеркиванием (`CLIENT_KEY`) или через пробел и в одинарных кавычках (`'client key'`). Также можно использовать ключ `'client key or user name'` вместо `CLIENT_KEY, USER_NAME`, и ключ `'client key or ip address'` вместо `CLIENT_KEY, IP_ADDRESS`.
-
-Поддерживаются также два варианта написания составных типов ресурсов: с подчеркиванием (`RESULT_ROWS`) или без подчеркивания, через пробел (`RESULT ROWS`). 
 
 **Примеры**
 
diff --git a/docs/ru/sql-reference/statements/create/quota.md b/docs/ru/sql-reference/statements/create/quota.md
index 073c4eda85c..8ae3cc45ee1 100644
--- a/docs/ru/sql-reference/statements/create/quota.md
+++ b/docs/ru/sql-reference/statements/create/quota.md
@@ -11,7 +11,7 @@ toc_title: "\u041a\u0432\u043e\u0442\u0430"
 
 ``` sql
 CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
-    [KEYED BY {NONE | USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
+    [KEYED BY {USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
     [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
         {MAX { {QUERIES | ERRORS | RESULT_ROWS | RESULT_BYTES | READ_ROWS | READ_BYTES | EXECUTION_TIME} = number } [,...] |
          NO LIMITS | TRACKING ONLY} [,...]]
@@ -20,10 +20,6 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
 
 В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
 
-Поддерживаются два варианта написания составных типов ключей: с подчеркиванием (`CLIENT_KEY`) или через пробел и в одинарных кавычках (`'client key'`). Также можно использовать ключ `'client key or user name'` вместо `CLIENT_KEY, USER_NAME`, и ключ `'client key or ip address'` вместо `CLIENT_KEY, IP_ADDRESS`.
-
-Поддерживаются также два варианта написания составных типов ресурсов: с подчеркиванием (`RESULT_ROWS`) или без подчеркивания, через пробел (`RESULT ROWS`). 
-
 **Примеры**
 
 Ограничить максимальное количество запросов для текущего пользователя — не более 123 запросов за каждые 15 месяцев:

From a767eb5b51079ac210e54a1029060ed29b1cae0d Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Wed, 27 Jan 2021 00:25:34 +0300
Subject: [PATCH 0196/1238] Syntax (headings) unified

---
 docs/en/sql-reference/statements/create/quota.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md
index d284dfa0ded..8f3b89790e4 100644
--- a/docs/en/sql-reference/statements/create/quota.md
+++ b/docs/en/sql-reference/statements/create/quota.md
@@ -19,7 +19,8 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
 ```
 
 `ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
-## Examples {#create-quota-example}
+
+**Examples** 
 
 Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
 

From 83cfdde6d9e04d0f804e7328ec0e73ef56721564 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 27 Jan 2021 03:08:15 +0300
Subject: [PATCH 0197/1238] RANGE frame works in some cases

---
 src/Interpreters/AggregateDescription.h       |   8 +
 src/Interpreters/ExpressionAnalyzer.cpp       |   3 +-
 src/Parsers/ASTWindowDefinition.cpp           |   7 +-
 src/Parsers/ExpressionElementParsers.cpp      |  60 ++--
 src/Processors/Transforms/WindowTransform.cpp | 212 ++++++++------
 src/Processors/Transforms/WindowTransform.h   |  41 ++-
 .../01591_window_functions.reference          | 269 ++++++++++++++++--
 .../0_stateless/01591_window_functions.sql    |  46 +++
 8 files changed, 504 insertions(+), 142 deletions(-)

diff --git a/src/Interpreters/AggregateDescription.h b/src/Interpreters/AggregateDescription.h
index 89d1cdf4cb4..7f286b9c763 100644
--- a/src/Interpreters/AggregateDescription.h
+++ b/src/Interpreters/AggregateDescription.h
@@ -57,6 +57,14 @@ struct WindowFrame
 
      * OffsetType end_offset = Current;
      */
+
+
+    bool operator == (const WindowFrame & other) const
+    {
+        // We don't compare is_default because it's not a real property of the
+        // frame, and only influences how we display it.
+        return other.type == type;
+    }
 };
 
 struct WindowDescription
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index f38ef1c6a1d..d22edda471f 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -516,7 +516,8 @@ void makeWindowDescriptionFromAST(WindowDescription & desc, const IAST * ast)
     desc.full_sort_description.insert(desc.full_sort_description.end(),
         desc.order_by.begin(), desc.order_by.end());
 
-    if (definition.frame.type != WindowFrame::FrameType::Rows)
+    if (definition.frame.type != WindowFrame::FrameType::Rows
+        && definition.frame.type != WindowFrame::FrameType::Range)
     {
         std::string name = definition.frame.type == WindowFrame::FrameType::Rows
             ? "ROWS"
diff --git a/src/Parsers/ASTWindowDefinition.cpp b/src/Parsers/ASTWindowDefinition.cpp
index ef28b54b613..af2c49d4e4c 100644
--- a/src/Parsers/ASTWindowDefinition.cpp
+++ b/src/Parsers/ASTWindowDefinition.cpp
@@ -52,13 +52,18 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
         order_by->formatImpl(settings, state, format_frame);
     }
 
+    if ((partition_by || order_by) && !frame.is_default)
+    {
+        settings.ostr << " ";
+    }
+
     if (!frame.is_default)
     {
         const auto * name = frame.type == WindowFrame::FrameType::Rows
             ? "ROWS" : frame.type == WindowFrame::FrameType::Groups
                 ? "GROUPS" : "RANGE";
 
-        settings.ostr << name << " BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW";
+        settings.ostr << name << " UNBOUNDED PRECEDING";
     }
 }
 
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index a80f4561eb0..11369e3495f 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -535,29 +535,51 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
     ParserKeyword keyword_and("AND");
     ParserKeyword keyword_current_row("CURRENT ROW");
 
-    if (!keyword_between.ignore(pos, expected))
+    // There are two variants of grammar for the frame:
+    // 1) ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+    // 2) ROWS UNBOUNDED PRECEDING
+    // When the frame end is not specified (2), it defaults to CURRENT ROW.
+    if (keyword_between.ignore(pos, expected))
     {
-        return false;
+        // 1) ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+        if (!keyword_unbounded.ignore(pos, expected))
+        {
+            return false;
+        }
+
+        if (!keyword_preceding.ignore(pos, expected))
+        {
+            return false;
+        }
+
+        if (!keyword_and.ignore(pos, expected))
+        {
+            return false;
+        }
+
+        if (!keyword_current_row.ignore(pos, expected))
+        {
+            return false;
+        }
+
+    }
+    else
+    {
+        // 2) ROWS UNBOUNDED PRECEDING
+        if (!keyword_unbounded.ignore(pos, expected))
+        {
+            return false;
+        }
+
+        if (!keyword_preceding.ignore(pos, expected))
+        {
+            return false;
+        }
     }
 
-    if (!keyword_unbounded.ignore(pos, expected))
+    if (node->frame != WindowFrame{})
     {
-        return false;
-    }
-
-    if (!keyword_preceding.ignore(pos, expected))
-    {
-        return false;
-    }
-
-    if (!keyword_and.ignore(pos, expected))
-    {
-        return false;
-    }
-
-    if (!keyword_current_row.ignore(pos, expected))
-    {
-        return false;
+        node->frame.is_default = false;
     }
 
     return true;
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 00181e3577f..a60265cb3ca 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -79,7 +79,7 @@ void WindowTransform::advancePartitionEnd()
 
     const RowNumber end = blocksEnd();
 
-    fmt::print(stderr, "end {}, partition_end {}\n", end, partition_end);
+//    fmt::print(stderr, "end {}, partition_end {}\n", end, partition_end);
 
     // If we're at the total end of data, we must end the partition. This is the
     // only place in calculations where we need special handling for end of data,
@@ -95,7 +95,8 @@ void WindowTransform::advancePartitionEnd()
         return;
     }
 
-    // If we got to the end of the block already, just stop.
+    // If we got to the end of the block already, but expect more data, wait for
+    // it.
     if (partition_end == end)
     {
         return;
@@ -122,18 +123,17 @@ void WindowTransform::advancePartitionEnd()
     // The partition ends when the PARTITION BY columns change. We need
     // some reference columns for comparison. We might have already
     // dropped the blocks where the partition starts, but any row in the
-    // partition will do. Use group_start -- it's always in the valid
-    // region, because it points to the start of the current group,
-    // which we haven't fully processed yet, and therefore cannot drop.
-    // It might be the same as the partition_end if it's the first group of the
+    // partition will do. We use a special partition_etalon pointer for this.
+    // It might be the same as the partition_end if we're at the first row of the
     // first partition, so we compare it to itself, but it still works correctly.
+    const auto block_number = partition_end.block;
     const auto block_rows = blockRowsNumber(partition_end);
     for (; partition_end.row < block_rows; ++partition_end.row)
     {
         size_t i = 0;
         for (; i < n; i++)
         {
-            const auto * ref = inputAt(group_start)[partition_by_indices[i]].get();
+            const auto * ref = inputAt(partition_etalon)[partition_by_indices[i]].get();
             const auto * c = inputAt(partition_end)[partition_by_indices[i]].get();
             if (c->compareAt(partition_end.row,
                     group_start.row, *ref,
@@ -150,14 +150,19 @@ void WindowTransform::advancePartitionEnd()
         }
     }
 
-    if (partition_end.row == block_rows)
-    {
-        ++partition_end.block;
-        partition_end.row = 0;
-    }
+    // Went until the end of block, go to the next.
+    assert(partition_end.row == block_rows);
+    ++partition_end.block;
+    partition_end.row = 0;
 
     // Went until the end of data and didn't find the new partition.
     assert(!partition_ended && partition_end == blocksEnd());
+
+    // Advance the partition etalon so that we can drop the old blocks.
+    // We can use the last valid row of the block as the partition etalon.
+    // Shouldn't have empty blocks here (what would it mean?).
+    assert(block_rows > 0);
+    partition_etalon = RowNumber{block_number, block_rows - 1};
 }
 
 void WindowTransform::advanceGroupEnd()
@@ -169,19 +174,17 @@ void WindowTransform::advanceGroupEnd()
 
     switch (window_description.frame.type)
     {
+        case WindowFrame::FrameType::Range:
         case WindowFrame::FrameType::Groups:
-            advanceGroupEndGroups();
+            advanceGroupEndOrderBy();
             break;
         case WindowFrame::FrameType::Rows:
-            advanceGroupEndRows();
-            break;
-        case WindowFrame::FrameType::Range:
-            assert(false);
+            advanceGroupEndTrivial();
             break;
     }
 }
 
-void WindowTransform::advanceGroupEndRows()
+void WindowTransform::advanceGroupEndTrivial()
 {
     // ROWS mode, peer groups always contains only the current row.
     // We cannot advance the groups if the group start is already beyond the
@@ -192,7 +195,7 @@ void WindowTransform::advanceGroupEndRows()
     group_ended = true;
 }
 
-void WindowTransform::advanceGroupEndGroups()
+void WindowTransform::advanceGroupEndOrderBy()
 {
     const size_t n = order_by_indices.size();
     if (n == 0)
@@ -252,7 +255,7 @@ void WindowTransform::advanceFrameEnd()
     // Frame end is always the current group end, for now.
     // In ROWS mode the group is going to contain only the current row.
     frame_end = group_end;
-    frame_ended = true;
+    frame_ended = group_ended;
 
     // Add the columns over which we advanced the frame to the aggregate function
     // states.
@@ -275,7 +278,7 @@ void WindowTransform::advanceFrameEnd()
         uint64_t past_the_end_block;
         // Note that the past-the-end row is not in the past-the-end block, but
         // in the block before it.
-        uint32_t past_the_end_row;
+        uint64_t past_the_end_row;
 
         if (frame_end.block < first_block_number + blocks.size())
         {
@@ -326,7 +329,6 @@ void WindowTransform::writeOutGroup()
     // Empty groups don't make sense.
     assert(group_start < group_end);
 
-    std::vector<const IColumn *> argument_columns;
     for (size_t wi = 0; wi < workspaces.size(); ++wi)
     {
         auto & ws = workspaces[wi];
@@ -334,56 +336,95 @@ void WindowTransform::writeOutGroup()
         const auto * a = f.aggregate_function.get();
         auto * buf = ws.aggregate_function_state.data();
 
-        // Need to use a tricky loop to be able to batch per-block (but we don't
-        // do it yet...). See the comments to the similar loop in
-        // advanceFrameEnd() above.
+        // We'll calculate the value once for the first row in the group, and
+        // insert its copy for each other row in the group.
+        IColumn * reference_column = outputAt(group_start)[wi].get();
+        const size_t reference_row = group_start.row;
+        // FIXME does it also allocate the result on the arena?
+        // We'll have to pass it out with blocks then...
+        a->insertResultInto(buf, *reference_column, arena.get());
+        // The row we just added to the end of the column must correspond to the
+        // first row of the group.
+        assert(reference_column->size() == reference_row + 1);
+
+//        fmt::print(stderr, "calculated value of function {} is '{}'\n",
+//            wi, toString((*reference_column)[reference_row]));
+
+        // Now duplicate the calculated value into all other rows.
+        auto first_row_to_copy_to = group_start;
+        advanceRowNumber(first_row_to_copy_to);
+
+
+        // We use two explicit loops here instead of using advanceRowNumber(),
+        // because we want to batch the inserts per-block.
+        // Unfortunately this leads to tricky loop conditions, because the
+        // frame_end might be either a past-the-end block, or a valid block, in
+        // which case we also have to process its head. We have to avoid stepping
+        // into the past-the-end block because it might not be valid.
+        // Moreover, the past-the-end row is not in the past-the-end block, but
+        // in the block before it.
+        // And we also have to remember to reset the row number when moving to
+        // the next block.
         uint64_t past_the_end_block;
-        uint32_t past_the_end_row;
-        if (frame_end.block < first_block_number + blocks.size())
+        uint64_t past_the_end_row;
+        if (group_end.row == 0)
         {
-            past_the_end_block = frame_end.block + 1;
-            past_the_end_row = frame_end.row;
+            // group_end might not be valid.
+            past_the_end_block = group_end.block;
+
+            // Otherwise a group would end at the start of data, this is not
+            // possible.
+            assert(group_end.block > 0);
+
+            const size_t first_valid_block = group_end.block - 1;
+            assert(first_valid_block >= first_block_number);
+
+            past_the_end_row = blocks[first_valid_block - first_block_number]
+                .input_columns[0]->size();
         }
         else
         {
-            past_the_end_block = first_block_number + blocks.size();
-            past_the_end_row = blocks.back().numRows();
+            past_the_end_block = group_end.block + 1;
+            past_the_end_row = group_end.row;
         }
-        for (auto r = group_start;
-            r.block < past_the_end_block;
-            ++r.block, r.row = 0)
+
+        for (auto block_index = first_row_to_copy_to.block;
+            block_index < past_the_end_block;
+            ++block_index)
         {
-            const auto & block = blocks[r.block - first_block_number];
+            const auto & block = blocks[block_index - first_block_number];
 
-            argument_columns.clear();
-            for (const auto ai : ws.argument_column_indices)
+            // We process tail of the first block, all rows of intermediate
+            // blocks, and the head of the last block.
+            const auto block_first_row
+                = (block_index == first_row_to_copy_to.block)
+                    ? first_row_to_copy_to.row : 0;
+            const auto block_last_row = ((block_index + 1) == past_the_end_block)
+                ? past_the_end_row : block.numRows();
+
+//            fmt::print(stderr,
+//                "group rest [{}, {}), pteb {}, pter {}, cur {}, fr {}, lr {}\n",
+//                group_start, group_end, past_the_end_block, group_end.row,
+//                block_index, block_first_row, block_last_row);
+            // The number of the elements left to insert may be zero, but we must
+            // notice it on the first block. Other blocks shouldn't be empty,
+            // because we don't generally have empty block, and advanceRowNumber()
+            // doesn't generate past-the-end row numbers, so we wouldn't get into
+            // a block we don't want to process.
+            if (block_first_row == block_last_row)
             {
-                argument_columns.push_back(block.input_columns[ai].get());
+                assert(block_index == first_row_to_copy_to.block);
+                break;
             }
 
-            // We process all rows of intermediate blocks, and the head of the
-            // last block.
-            const auto end = ((r.block + 1) == past_the_end_block)
-                ? past_the_end_row
-                : block.numRows();
-            for (; r.row < end; ++r.row)
-            {
-                // FIXME does it also allocate the result on the arena?
-                // We'll have to pass it out with blocks then...
-                a->insertResultInto(buf,
-                    *block.output_columns[wi],
-                    arena.get());
-            }
+            block.output_columns[wi]->insertManyFrom(*reference_column,
+                reference_row, block_last_row - block_first_row);
         }
     }
 
     first_not_ready_row = group_end;
 }
 
-void WindowTransform::initPerBlockCaches()
-{
-}
-
 void WindowTransform::appendChunk(Chunk & chunk)
 {
 //    fmt::print(stderr, "new chunk, {} rows, finished={}\n", chunk.getNumRows(),
@@ -413,12 +454,12 @@ void WindowTransform::appendChunk(Chunk & chunk)
         }
     }
 
-    initPerBlockCaches();
-
     // Start the calculations. First, advance the partition end.
     for (;;)
     {
         advancePartitionEnd();
+//        fmt::print(stderr, "partition [?, {}), {}, old etalon {}\n", partition_end,
+//            partition_ended, partition_etalon);
 
         // Either we ran out of data or we found the end of partition (maybe
         // both, but this only happens at the total end of data).
@@ -428,22 +469,21 @@ void WindowTransform::appendChunk(Chunk & chunk)
             assert(input_is_finished);
         }
 
-//        fmt::print(stderr, "partition end '{}', {}\n", partition_end,
-//            partition_ended);
-
         // After that, advance the peer groups. We can advance peer groups until
         // the end of partition or current end of data, which is precisely the
         // description of `partition_end`.
-        while (group_end < partition_end)
+        while (group_start < partition_end)
         {
-            group_start = group_end;
             advanceGroupEnd();
 
-//            fmt::print(stderr, "group end '{}'\n", group_end);
+//            fmt::print(stderr, "group [{}, {}), {}\n", group_start, group_end,
+//                group_ended);
 
-            // If the group didn't end yet, wait.
             if (!group_ended)
             {
+                // Wait for more input data to find the end of group.
+                assert(!input_is_finished);
+                assert(!partition_ended);
                 return;
             }
 
@@ -457,6 +497,9 @@ void WindowTransform::appendChunk(Chunk & chunk)
 
             if (!frame_ended)
             {
+                // Wait for more input data to find the end of frame.
+                assert(!input_is_finished);
+                assert(!partition_ended);
                 return;
             }
 
@@ -467,35 +510,33 @@ void WindowTransform::appendChunk(Chunk & chunk)
             // The frame will have to be recalculated.
             frame_ended = false;
 
-            // Move to the next group. Don't advance group_start yet, it's
-            // convenient to use it as the PARTITION BY etalon.
+            // Move to the next group.
             group_ended = false;
-
-            if (group_end == partition_end)
-            {
-                break;
-            }
-            assert(group_end < partition_end);
-        }
-
-        if (!partition_ended)
-        {
-            // We haven't encountered the end of the partition yet, need more
-            // data.
-            assert(partition_end == blocksEnd());
-            break;
+            group_start = group_end;
         }
 
         if (input_is_finished)
         {
-            // why?
+            // We finalized the last partition in the above loop, and don't have
+            // to do anything else.
             return;
         }
 
+        if (!partition_ended)
+        {
+            // Wait for more input data to find the end of partition.
+            // Assert that we processed all the data we currently have, and that
+            // we are going to receive more data.
+            assert(partition_end == blocksEnd());
+            assert(!input_is_finished);
+            break;
+        }
+
         // Start the next partition.
         const auto new_partition_start = partition_end;
         advanceRowNumber(partition_end);
         partition_ended = false;
+        partition_etalon = new_partition_start;
         // We have to reset the frame when the new partition starts. This is not a
         // generally correct way to do so, but we don't really support moving frame
         // for now.
@@ -663,10 +704,13 @@ void WindowTransform::work()
     // We don't really have to keep the entire partition, and it can be big, so
     // we want to drop the starting blocks to save memory.
     // We can drop the old blocks if we already returned them as output, and the
-    // frame and group are already past them. Note that the frame start can be
-    // further than group start for some frame specs, so we have to check both.
+    // frame, group and the partition etalon are already past them. Note that the
+    // frame start can be further than group start for some frame specs (e.g.
+    // EXCLUDE CURRENT ROW), so we have to check both.
     const auto first_used_block = std::min(next_output_block_number,
-            std::min(frame_start.block, group_start.block));
+        std::min(frame_start.block,
+            std::min(group_start.block,
+                partition_etalon.block)));
     if (first_block_number < first_used_block)
     {
 //        fmt::print(stderr, "will drop blocks from {} to {}\n", first_block_number,
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 49efb19ae69..d18c9c727d2 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -15,7 +15,7 @@ using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
 
 class Arena;
 
-// Runtime data for computing one window function
+// Runtime data for computing one window function.
 struct WindowFunctionWorkspace
 {
     WindowFunctionDescription window_function;
@@ -58,18 +58,20 @@ struct RowNumber
 
 /*
  * Computes several window functions that share the same window. The input must
- * be sorted correctly for this window (PARTITION BY, then ORDER BY).
+ * be sorted by PARTITION BY (in any order), then by ORDER BY.
  * We need to track the following pointers:
- * 1) start of partition -- rows that compare equal w/PARTITION BY.
- * 2) current frame boundaries.
- * 3) start of peer group -- rows that compare equal w/ORDER BY (empty ORDER BY
- *    means all rows are equal).
- * These row ranges are (almost) nested -- peer group is inside frame inside
- * partition. The only exception is when the exclusion clause is specified that
- * excludes current peer group, but we don't support it anyway.
- * All pointers only move forward.
- * The value of the function is the same for all rows of the peer group.
- * (partition [frame {group} ] )
+ * 1) boundaries of partition -- rows that compare equal w/PARTITION BY.
+ * 2) boundaries of peer group -- rows that compare equal w/ORDER BY (empty
+ *    ORDER BY means all rows are peers).
+ * 3) boundaries of the frame.
+ * Both the peer group and the frame are inside the partition, but can have any
+ * position relative to each other.
+ * All pointers only move forward. For partition and group boundaries, this is
+ * ensured by the order of input data. This property also trivially holds for
+ * the ROWS and GROUPS frames. For the RANGE frame, the proof requires the
+ * additional fact that the ranges are specified in terms of (the single)
+ * ORDER BY column.
+ * The value of the window function is the same for all rows of the peer group.
  */
 class WindowTransform : public IProcessor /* public ISimpleTransform */
 {
@@ -104,13 +106,12 @@ public:
 private:
     void advancePartitionEnd();
     void advanceGroupEnd();
-    void advanceGroupEndGroups();
-    void advanceGroupEndRows();
+    void advanceGroupEndOrderBy();
+    void advanceGroupEndTrivial();
     void advanceGroupEndRange();
     void advanceFrameStart();
     void advanceFrameEnd();
     void writeOutGroup();
-    void initPerBlockCaches();
 
     Columns & inputAt(const RowNumber & x)
     {
@@ -224,6 +225,9 @@ public:
     // need it, and we want to be able to drop the starting blocks to save memory.
     // The `partition_end` is past-the-end, as usual. When partition_ended = false,
     // it still haven't ended, and partition_end is the next row to check.
+    // We still need to keep some not-too-far-away row in the partition, to use
+    // it as an etalon for PARTITION BY comparison.
+    RowNumber partition_etalon;
     RowNumber partition_end;
     bool partition_ended = false;
 
@@ -233,6 +237,13 @@ public:
     RowNumber group_end;
     bool group_ended = false;
 
+    // The frame is [frame_start, frame_end) if frame_ended, and unknown
+    // otherwise. Note that when we move to the next peer group, both the
+    // frame_start and the frame_end may jump forward by an unknown amount of
+    // blocks, e.g. if we use a RANGE frame. This means that sometimes we don't
+    // know neither frame_end nor frame_start.
+    // We update the states of the window functions as we track the frame
+    // boundaries.
     // After we have found the final boundaries of the frame, we can immediately
     // output the result for the current group, w/o waiting for more data.
     RowNumber frame_start;
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 3b4405ff865..56413261415 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -19,8 +19,8 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 1	2
 0	2
 5	5
-4	5
-3	5
+4	4
+3	4
 8	8
 7	8
 6	8
@@ -120,33 +120,33 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 0	2	1
 1	2	2
 2	2	3
-3	5	4
-4	5	5
+3	4	4
+4	4	5
 5	5	1
-6	8	2
-7	8	3
-8	8	4
-9	11	5
-10	11	1
+6	8	1
+7	8	2
+8	8	3
+9	10	4
+10	10	1
 11	11	2
 12	14	3
 13	14	4
 14	14	5
-15	17	1
-16	17	2
-17	17	3
-18	20	4
-19	20	5
+15	16	1
+16	16	1
+17	17	2
+18	20	3
+19	20	4
 20	20	1
-21	23	2
-22	23	3
+21	22	2
+22	22	3
 23	23	4
 24	26	5
 25	26	1
-26	26	2
-27	29	3
-28	29	4
-29	29	5
+26	26	1
+27	28	2
+28	28	3
+29	29	4
 30	30	1
 -- two functions over the same window
 -- an explain test would also be helpful, but it's too immature now and I don't
@@ -155,8 +155,8 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 0	2	3
 1	2	2
 2	2	1
-3	5	3
-4	5	2
+3	4	2
+4	4	1
 5	5	1
 6	6	1
 -- check that we can work with constant columns
@@ -230,3 +230,228 @@ from numbers(3);
 0
 1
 3
+--select
+--    sum(number)
+--        over (order by number groups between unbounded preceding and current row)
+--from numbers(3);
+
+-- RANGE frame
+-- Try some mutually prime sizes of partition, group and block, for the number
+-- of rows that is their least common multiple so that we see all the interesting
+-- corner cases.
+select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c
+from numbers(30)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 5
+;
+0	0	0	2
+1	0	1	3
+2	0	0	2
+3	1	1	3
+4	1	0	1
+5	1	1	3
+6	2	0	2
+7	2	1	3
+8	2	0	2
+9	3	1	3
+10	3	0	1
+11	3	1	3
+12	4	0	2
+13	4	1	3
+14	4	0	2
+15	5	1	3
+16	5	0	1
+17	5	1	3
+18	6	0	2
+19	6	1	3
+20	6	0	2
+21	7	1	3
+22	7	0	1
+23	7	1	3
+24	8	0	2
+25	8	1	3
+26	8	0	2
+27	9	1	3
+28	9	0	1
+29	9	1	3
+select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c
+from numbers(30)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 2
+;
+0	0	0	2
+1	0	1	4
+2	0	2	5
+3	0	0	2
+4	0	1	4
+5	1	2	5
+6	1	0	2
+7	1	1	3
+8	1	2	5
+9	1	0	2
+10	2	1	3
+11	2	2	5
+12	2	0	1
+13	2	1	3
+14	2	2	5
+15	3	0	2
+16	3	1	4
+17	3	2	5
+18	3	0	2
+19	3	1	4
+20	4	2	5
+21	4	0	2
+22	4	1	3
+23	4	2	5
+24	4	0	2
+25	5	1	3
+26	5	2	5
+27	5	0	1
+28	5	1	3
+29	5	2	5
+select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c
+from numbers(30)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 3
+;
+0	0	0	3
+1	0	1	5
+2	0	0	3
+3	0	1	5
+4	0	0	3
+5	1	1	5
+6	1	0	2
+7	1	1	5
+8	1	0	2
+9	1	1	5
+10	2	0	3
+11	2	1	5
+12	2	0	3
+13	2	1	5
+14	2	0	3
+15	3	1	5
+16	3	0	2
+17	3	1	5
+18	3	0	2
+19	3	1	5
+20	4	0	3
+21	4	1	5
+22	4	0	3
+23	4	1	5
+24	4	0	3
+25	5	1	5
+26	5	0	2
+27	5	1	5
+28	5	0	2
+29	5	1	5
+select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c
+from numbers(30)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 2
+;
+0	0	0	1
+1	0	1	2
+2	0	2	3
+3	1	3	2
+4	1	4	3
+5	1	0	1
+6	2	1	1
+7	2	2	2
+8	2	3	3
+9	3	4	3
+10	3	0	1
+11	3	1	2
+12	4	2	1
+13	4	3	2
+14	4	4	3
+15	5	0	1
+16	5	1	2
+17	5	2	3
+18	6	3	2
+19	6	4	3
+20	6	0	1
+21	7	1	1
+22	7	2	2
+23	7	3	3
+24	8	4	3
+25	8	0	1
+26	8	1	2
+27	9	2	1
+28	9	3	2
+29	9	4	3
+select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c
+from numbers(30)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 3
+;
+0	0	0	1
+1	0	1	2
+2	1	2	1
+3	1	3	2
+4	2	4	2
+5	2	0	1
+6	3	1	1
+7	3	2	2
+8	4	3	1
+9	4	4	2
+10	5	0	1
+11	5	1	2
+12	6	2	1
+13	6	3	2
+14	7	4	2
+15	7	0	1
+16	8	1	1
+17	8	2	2
+18	9	3	1
+19	9	4	2
+20	10	0	1
+21	10	1	2
+22	11	2	1
+23	11	3	2
+24	12	4	2
+25	12	0	1
+26	13	1	1
+27	13	2	2
+28	14	3	1
+29	14	4	2
+select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c
+from numbers(30)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 5
+;
+0	0	0	1
+1	0	1	2
+2	1	2	2
+3	1	0	1
+4	2	1	1
+5	2	2	2
+6	3	0	1
+7	3	1	2
+8	4	2	2
+9	4	0	1
+10	5	1	1
+11	5	2	2
+12	6	0	1
+13	6	1	2
+14	7	2	2
+15	7	0	1
+16	8	1	1
+17	8	2	2
+18	9	0	1
+19	9	1	2
+20	10	2	2
+21	10	0	1
+22	11	1	1
+23	11	2	2
+24	12	0	1
+25	12	1	2
+26	13	2	2
+27	13	0	1
+28	14	1	1
+29	14	2	2
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index c942befa658..2c0a978d07b 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -104,3 +104,49 @@ from numbers(3);
 --    sum(number)
 --        over (order by number groups between unbounded preceding and current row)
 --from numbers(3);
+
+-- RANGE frame
+-- Try some mutually prime sizes of partition, group and block, for the number
+-- of rows that is their least common multiple so that we see all the interesting
+-- corner cases.
+select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c
+from numbers(30)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 5
+;
+
+select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c
+from numbers(30)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 2
+;
+
+select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c
+from numbers(30)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 3
+;
+
+select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c
+from numbers(30)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 2
+;
+
+select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c
+from numbers(30)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 3
+;
+
+select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c
+from numbers(30)
+window w as (partition by p order by o range unbounded preceding)
+order by number
+settings max_block_size = 5
+;

From 0d69249c56cae94c85de7c48f62d3e730c116638 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 27 Jan 2021 03:27:40 +0300
Subject: [PATCH 0198/1238] typo

---
 src/Processors/Transforms/WindowTransform.cpp |  7 +--
 .../01591_window_functions.reference          | 44 +++++++++----------
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index a60265cb3ca..bd2a26a907c 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -136,7 +136,7 @@ void WindowTransform::advancePartitionEnd()
             const auto * ref = inputAt(partition_etalon)[partition_by_indices[i]].get();
             const auto * c = inputAt(partition_end)[partition_by_indices[i]].get();
             if (c->compareAt(partition_end.row,
-                    group_start.row, *ref,
+                    partition_etalon.row, *ref,
                     1 /* nan_direction_hint */) != 0)
             {
                 break;
@@ -457,9 +457,10 @@ void WindowTransform::appendChunk(Chunk & chunk)
     // Start the calculations. First, advance the partition end.
     for (;;)
     {
+//        const auto old_etalon = partition_etalon;
         advancePartitionEnd();
-//        fmt::print(stderr, "partition [?, {}), {}, old etalon {}\n", partition_end,
-//            partition_ended, partition_etalon);
+//        fmt::print(stderr, "partition [?, {}), {}, etalon old {} new {}\n",
+//            partition_end, partition_ended, old_etalon, partition_etalon);
 
         // Either we ran out of data or we found the end of partition (maybe
         // both, but this only happens at the total end of data).
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 56413261415..8d6adad5e3d 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -19,8 +19,8 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 1	2
 0	2
 5	5
-4	4
-3	4
+4	5
+3	5
 8	8
 7	8
 6	8
@@ -120,33 +120,33 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 0	2	1
 1	2	2
 2	2	3
-3	4	4
-4	4	5
+3	5	4
+4	5	5
 5	5	1
-6	8	1
-7	8	2
-8	8	3
-9	10	4
-10	10	1
+6	8	2
+7	8	3
+8	8	4
+9	11	5
+10	11	1
 11	11	2
 12	14	3
 13	14	4
 14	14	5
-15	16	1
-16	16	1
-17	17	2
-18	20	3
-19	20	4
+15	17	1
+16	17	2
+17	17	3
+18	20	4
+19	20	5
 20	20	1
-21	22	2
-22	22	3
+21	23	2
+22	23	3
 23	23	4
 24	26	5
 25	26	1
-26	26	1
-27	28	2
-28	28	3
-29	29	4
+26	26	2
+27	29	3
+28	29	4
+29	29	5
 30	30	1
 -- two functions over the same window
 -- an explain test would also be helpful, but it's too immature now and I don't
@@ -155,8 +155,8 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 0	2	3
 1	2	2
 2	2	1
-3	4	2
-4	4	1
+3	5	3
+4	5	2
 5	5	1
 6	6	1
 -- check that we can work with constant columns

From 48b4d98b2136c5c7f25b0d96e15c04be9c6204ad Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 27 Jan 2021 04:48:41 +0300
Subject: [PATCH 0199/1238] Amend

---
 src/Storages/MergeTree/MergeTreeReadPool.h       | 3 ++-
 src/Storages/MergeTree/MergeTreeReaderStream.cpp | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h
index aa6811661e6..366e9a2381a 100644
--- a/src/Storages/MergeTree/MergeTreeReadPool.h
+++ b/src/Storages/MergeTree/MergeTreeReadPool.h
@@ -71,7 +71,8 @@ private:
 public:
     MergeTreeReadPool(
         const size_t threads_, const size_t sum_marks_, const size_t min_marks_for_concurrent_read_,
-        RangesInDataParts && parts_, const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_,
+        RangesInDataParts && parts_, const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_,
+        const PrewhereInfoPtr & prewhere_info_,
         const bool check_columns_, const Names & column_names_,
         const BackoffSettings & backoff_settings_, size_t preferred_block_size_bytes_,
         const bool do_not_steal_tasks_ = false);
diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
index 08cb49445f0..fd251497d7c 100644
--- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
@@ -112,6 +112,9 @@ MergeTreeReaderStream::MergeTreeReaderStream(
         if (profile_callback)
             buffer->setProfileCallback(profile_callback, clock_type);
 
+        if (!settings.checksum_on_read)
+            buffer->disableChecksumming();
+
         non_cached_buffer = std::move(buffer);
         data_buffer = non_cached_buffer.get();
     }

From 9d086f445da620ef59587ded0e142d979016c7aa Mon Sep 17 00:00:00 2001
From: guoleiyi <guoleiyi@guoleiyi-mb0.tencent.com>
Date: Wed, 27 Jan 2021 10:53:10 +0800
Subject: [PATCH 0200/1238] Should fail ddl query as soon as possible if table
 is shutdown

---
 src/Interpreters/DDLWorker.cpp              | 7 +++++++
 src/Storages/StorageReplicatedMergeTree.cpp | 1 +
 src/Storages/StorageReplicatedMergeTree.h   | 1 +
 3 files changed, 9 insertions(+)

diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index b1d9f872daa..e519b375c90 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -867,6 +867,13 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
         StorageReplicatedMergeTree::Status status;
         replicated_storage->getStatus(status);
 
+        // Should return as soon as possible if the table is shutdown by drop or other command.
+        if (status.is_partial_shutdown) {
+            LOG_WARNING(log, "Table is shutdown, task {} will not be executed.", task.entry_name);
+            task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, table is shutdown");
+            return false;
+        }
+
         /// Any replica which is leader tries to take lock
         if (status.is_leader && lock->tryLock())
         {
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 70e90e9706a..f6e830f1570 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -4791,6 +4791,7 @@ void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields)
     res.can_become_leader = storage_settings_ptr->replicated_can_become_leader;
     res.is_readonly = is_readonly;
     res.is_session_expired = !zookeeper || zookeeper->expired();
+    res.is_partial_shutdown = partial_shutdown_called;
 
     res.queue = queue.getStatus();
     res.absolute_delay = getAbsoluteDelay(); /// NOTE: may be slightly inconsistent with queue status.
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index cf36cf82fc9..549f81c10e5 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -159,6 +159,7 @@ public:
         bool can_become_leader;
         bool is_readonly;
         bool is_session_expired;
+        bool is_partial_shutdown;
         ReplicatedMergeTreeQueue::Status queue;
         UInt32 parts_to_check;
         String zookeeper_path;

From 4a17f5c73ac23a1c3fbe2353d7dcf6a8f94723ee Mon Sep 17 00:00:00 2001
From: hexiaoting <hewenting_ict@163.com>
Date: Wed, 27 Jan 2021 11:24:17 +0800
Subject: [PATCH 0201/1238] Move condistions from JOIN ON to WHERE

---
 src/Interpreters/CollectJoinOnKeysVisitor.cpp | 54 ++++++++++++++-----
 src/Interpreters/CollectJoinOnKeysVisitor.h   |  5 +-
 src/Interpreters/TreeRewriter.cpp             | 25 +++++++--
 .../00878_join_unexpected_results.reference   |  2 +
 .../00878_join_unexpected_results.sql         |  8 +--
 ...conditions_from_join_on_to_where.reference | 47 ++++++++++++++++
 ..._move_conditions_from_join_on_to_where.sql | 27 ++++++++++
 7 files changed, 148 insertions(+), 20 deletions(-)
 create mode 100644 tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
 create mode 100644 tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql

diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
index 3b3fdaa65cb..a17f68fbf75 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
@@ -78,14 +78,48 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
     {
         ASTPtr left = func.arguments->children.at(0);
         ASTPtr right = func.arguments->children.at(1);
-        auto table_numbers = getTableNumbers(ast, left, right, data);
-        data.addJoinKeys(left, right, table_numbers);
+        auto table_numbers = getTableNumbers(left, right, data);
+        if (table_numbers.first != 0)
+        {
+            data.addJoinKeys(left, right, table_numbers);
+            if (!data.new_on_expression)
+                data.new_on_expression = ast->clone();
+            else
+                data.new_on_expression = makeASTFunction("and", data.new_on_expression, ast->clone());
+        }
+        else
+        {
+            if (!data.new_where_conditions)
+                data.new_where_conditions = ast->clone();
+            else
+                data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone());
+
+            data.move_to_where = true;
+        }
+
     }
     else if (inequality != ASOF::Inequality::None)
     {
         if (!data.is_asof)
-            throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'",
-                            ErrorCodes::NOT_IMPLEMENTED);
+        {
+            ASTPtr left = func.arguments->children.at(0);
+            ASTPtr right = func.arguments->children.at(1);
+            auto table_numbers = getTableNumbers(left, right, data);
+            if (table_numbers.first != 0)
+            {
+                throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'",
+                    ErrorCodes::NOT_IMPLEMENTED);
+            }
+            else
+            {
+                if (!data.new_where_conditions)
+                    data.new_where_conditions = ast->clone();
+                else
+                    data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone());
+
+                data.move_to_where = true;
+            }
+        }
 
         if (data.asof_left_key || data.asof_right_key)
             throw Exception("ASOF JOIN expects exactly one inequality in ON section. Unexpected '" + queryToString(ast) + "'",
@@ -93,7 +127,7 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
 
         ASTPtr left = func.arguments->children.at(0);
         ASTPtr right = func.arguments->children.at(1);
-        auto table_numbers = getTableNumbers(ast, left, right, data);
+        auto table_numbers = getTableNumbers(left, right, data);
 
         data.addAsofJoinKeys(left, right, table_numbers, inequality);
     }
@@ -118,7 +152,7 @@ void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector<co
         getIdentifiers(child, out);
 }
 
-std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast,
+std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast,
                                                                     Data & data)
 {
     std::vector<const ASTIdentifier *> left_identifiers;
@@ -128,10 +162,7 @@ std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr
     getIdentifiers(right_ast, right_identifiers);
 
     if (left_identifiers.empty() || right_identifiers.empty())
-    {
-        throw Exception("Not equi-join ON expression: " + queryToString(expr) + ". No columns in one of equality side.",
-                        ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
-    }
+        return std::make_pair(0, 0);
 
     size_t left_idents_table = getTableForIdentifiers(left_identifiers, data);
     size_t right_idents_table = getTableForIdentifiers(right_identifiers, data);
@@ -141,8 +172,7 @@ std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr
         auto left_name = queryToString(*left_identifiers[0]);
         auto right_name = queryToString(*right_identifiers[0]);
 
-        throw Exception("In expression " + queryToString(expr) + " columns " + left_name + " and " + right_name
-            + " are from the same table but from different arguments of equal function", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
+        return std::make_pair(0, 0);
     }
 
     return std::make_pair(left_idents_table, right_idents_table);
diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h
index 54e008a114e..2c2d731a4d7 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.h
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.h
@@ -32,6 +32,9 @@ public:
         const bool is_asof{false};
         ASTPtr asof_left_key{};
         ASTPtr asof_right_key{};
+        ASTPtr new_on_expression{};
+        ASTPtr new_where_conditions{};
+        bool move_to_where{false};
         bool has_some{false};
 
         void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair<size_t, size_t> & table_no);
@@ -57,7 +60,7 @@ private:
     static void visit(const ASTFunction & func, const ASTPtr & ast, Data & data);
 
     static void getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out);
-    static std::pair<size_t, size_t> getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data);
+    static std::pair<size_t, size_t> getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data);
     static const ASTIdentifier * unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases);
     static size_t getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data);
 };
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index eaf46b717fc..7a4eac6eae3 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -400,13 +400,13 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul
 
 /// Find the columns that are obtained by JOIN.
 void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & select_query,
-                          const TablesWithColumns & tables, const Aliases & aliases)
+                          const TablesWithColumns & tables, const Aliases & aliases, ASTPtr & new_where_conditions)
 {
     const ASTTablesInSelectQueryElement * node = select_query.join();
     if (!node)
         return;
 
-    const auto & table_join = node->table_join->as<ASTTableJoin &>();
+    auto & table_join = node->table_join->as<ASTTableJoin &>();
 
     if (table_join.using_expression_list)
     {
@@ -425,9 +425,24 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele
                             ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
         if (is_asof)
             data.asofToJoinKeys();
+        else if (data.move_to_where)
+        {
+            table_join.on_expression = (data.new_on_expression)->clone();
+            new_where_conditions = data.new_where_conditions;
+        }
     }
 }
 
+/// Move joined key related to only one table to WHERE clause
+void moveJoinedKeyToWhere(ASTSelectQuery * select_query, ASTPtr & new_where_conditions)
+{
+    if (select_query->where())
+        select_query->setExpression(ASTSelectQuery::Expression::WHERE,
+            makeASTFunction("and", new_where_conditions->clone(), select_query->where()->clone()));
+    else
+        select_query->setExpression(ASTSelectQuery::Expression::WHERE, new_where_conditions->clone());
+}
+
 
 std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
 {
@@ -807,7 +822,11 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
 
     setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys,
                         result.analyzed_join->table_join);
-    collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
+
+    ASTPtr new_where_condition;
+    collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases, new_where_condition);
+    if (new_where_condition)
+        moveJoinedKeyToWhere(select_query, new_where_condition);
 
     /// rewrite filters for select query, must go after getArrayJoinedColumns
     if (settings.optimize_respect_aliases && result.metadata_snapshot)
diff --git a/tests/queries/0_stateless/00878_join_unexpected_results.reference b/tests/queries/0_stateless/00878_join_unexpected_results.reference
index a389cb47a96..aaf586c2767 100644
--- a/tests/queries/0_stateless/00878_join_unexpected_results.reference
+++ b/tests/queries/0_stateless/00878_join_unexpected_results.reference
@@ -23,6 +23,7 @@ join_use_nulls = 1
 -
 \N	\N
 -
+2	2	\N	\N
 -
 1	1	1	1
 2	2	\N	\N
@@ -49,6 +50,7 @@ join_use_nulls = 0
 -
 -
 -
+2	2	0	0
 -
 1	1	1	1
 2	2	0	0
diff --git a/tests/queries/0_stateless/00878_join_unexpected_results.sql b/tests/queries/0_stateless/00878_join_unexpected_results.sql
index 0aef5208b26..6f6cd6e6479 100644
--- a/tests/queries/0_stateless/00878_join_unexpected_results.sql
+++ b/tests/queries/0_stateless/00878_join_unexpected_results.sql
@@ -30,11 +30,11 @@ select * from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null;
 select '-';
 select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null;
 select '-';
-select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; -- {serverError 403 }
+select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a;
 select '-';
 select t.*, s.* from t left join s on (s.a=t.a) order by t.a;
 select '-';
-select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; -- {serverError 403 }
+select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2;
 
 select 'join_use_nulls = 0';
 set join_use_nulls = 0;
@@ -58,11 +58,11 @@ select '-';
 select '-';
 -- select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; -- TODO
 select '-';
-select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; -- {serverError 403 }
+select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a;
 select '-';
 select t.*, s.* from t left join s on (s.a=t.a) order by t.a;
 select '-';
-select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; -- {serverError 403 }
+select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2;
 
 drop table t;
 drop table s;
diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
new file mode 100644
index 00000000000..cf5d26b657a
--- /dev/null
+++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
@@ -0,0 +1,47 @@
+---------Q1----------
+2	2	2	20
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL INNER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON a = table2.a
+WHERE table2.b = toUInt32(20)
+---------Q2----------
+2	2	2	20
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL INNER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON a = table2.a
+WHERE (table2.a < table2.b) AND (table2.b = toUInt32(20))
+---------Q3----------
+---------Q4----------
+6	40
+SELECT
+    a,
+    table2.b
+FROM table1
+ALL INNER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON a = toUInt32(10 - table2.a)
+WHERE (b = 6) AND (table2.b > 20)
diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
new file mode 100644
index 00000000000..7ba2a3b5c25
--- /dev/null
+++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
@@ -0,0 +1,27 @@
+DROP TABLE IF EXISTS table1;
+DROP TABLE IF EXISTS table2;
+
+CREATE TABLE table1 (a UInt32, b UInt32) ENGINE = Memory;
+CREATE TABLE table2 (a UInt32, b UInt32) ENGINE = Memory;
+
+INSERT INTO table1 SELECT number, number FROM numbers(10);
+INSERT INTO table2 SELECT number * 2, number * 20 FROM numbers(6);
+
+SELECT '---------Q1----------';
+SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b = toUInt32(20));
+EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b = toUInt32(20));
+
+SELECT '---------Q2----------';
+SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.a < table2.b) AND (table2.b = toUInt32(20));
+EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.a < table2.b) AND (table2.b = toUInt32(20));
+
+SELECT '---------Q3----------';
+SELECT * FROM table1 JOIN table2 ON (table1.a = toUInt32(table2.a + 5)) AND (table2.a < table1.b) AND (table2.b > toUInt32(20)); -- { serverError 48 }
+
+SELECT '---------Q4----------';
+SELECT table1.a, table2.b FROM table1 INNER JOIN table2 ON (table1.a = toUInt32(10 - table2.a)) AND (table1.b = 6) AND (table2.b > 20);
+EXPLAIN SYNTAX SELECT table1.a, table2.b FROM table1 INNER JOIN table2 ON (table1.a = toUInt32(10 - table2.a)) AND (table1.b = 6) AND (table2.b > 20);
+
+
+DROP TABLE table1;
+DROP TABLE table2;

From 9fa3e09bb142cfaf76a352deae12341bab1223bb Mon Sep 17 00:00:00 2001
From: hexiaoting <hewenting_ict@163.com>
Date: Wed, 27 Jan 2021 11:36:15 +0800
Subject: [PATCH 0202/1238] Add more test cases

---
 ...ove_conditions_from_join_on_to_where.reference | 15 +++++++++++++++
 ...1653_move_conditions_from_join_on_to_where.sql |  6 ++++++
 2 files changed, 21 insertions(+)

diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
index cf5d26b657a..a58aa254891 100644
--- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
+++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
@@ -45,3 +45,18 @@ ALL INNER JOIN
     FROM table2
 ) AS table2 ON a = toUInt32(10 - table2.a)
 WHERE (b = 6) AND (table2.b > 20)
+---------Q5----------
+SELECT
+    a,
+    table2.b
+FROM table1
+ALL INNER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+    WHERE 0
+) AS table2 ON a = table2.a
+WHERE 0
+---------Q6----------
diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
index 7ba2a3b5c25..5b861ecfe82 100644
--- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
+++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
@@ -22,6 +22,12 @@ SELECT '---------Q4----------';
 SELECT table1.a, table2.b FROM table1 INNER JOIN table2 ON (table1.a = toUInt32(10 - table2.a)) AND (table1.b = 6) AND (table2.b > 20);
 EXPLAIN SYNTAX SELECT table1.a, table2.b FROM table1 INNER JOIN table2 ON (table1.a = toUInt32(10 - table2.a)) AND (table1.b = 6) AND (table2.b > 20);
 
+SELECT '---------Q5----------';
+SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table1.b = 6) AND (table2.b > 20) AND (10 < 6);
+EXPLAIN SYNTAX SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table1.b = 6) AND (table2.b > 20) AND (10 < 6);
+
+SELECT '---------Q6----------';
+SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.b = 6) AND (table2.b > 20); -- { serverError 403 } 
 
 DROP TABLE table1;
 DROP TABLE table2;

From d707055ed609d6c2cbb63b2075b64cac4e4937c4 Mon Sep 17 00:00:00 2001
From: benbiti <wangshouben@hotmail.com>
Date: Wed, 27 Jan 2021 13:56:20 +0800
Subject: [PATCH 0203/1238] update FINAL with max_final_threads setting to cn

---
 docs/zh/operations/settings/settings.md         | 11 +++++++++++
 docs/zh/sql-reference/statements/select/from.md |  6 ++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md
index f834ab74f5a..64625c19c6a 100644
--- a/docs/zh/operations/settings/settings.md
+++ b/docs/zh/operations/settings/settings.md
@@ -1310,3 +1310,14 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
 **另请参阅**
 
 -   [IN 运算符中的 NULL 处理](../../sql-reference/operators/in.md#in-null-processing)
+
+## max_final_threads {#max-final-threads}
+
+设置使用[FINAL](../../sql-reference/statements/select/from.md#select-from-final) 限定符的`SELECT`查询, 在数据读取阶段的最大并发线程数。
+
+可能的值:
+
+-   正整数。
+-   0 or 1 — 禁用。 此时`SELECT` 查询单线程执行。
+
+默认值: `16`。
diff --git a/docs/zh/sql-reference/statements/select/from.md b/docs/zh/sql-reference/statements/select/from.md
index a8b49febab5..71b7cd319eb 100644
--- a/docs/zh/sql-reference/statements/select/from.md
+++ b/docs/zh/sql-reference/statements/select/from.md
@@ -25,11 +25,13 @@ toc_title: FROM
 -   [Replicated](../../../engines/table-engines/mergetree-family/replication.md) 版本 `MergeTree` 引擎
 -   [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md)，和 [MaterializedView](../../../engines/table-engines/special/materializedview.md) 在其他引擎上运行的引擎，只要是它们底层是 `MergeTree`-引擎表即可。
 
+现在使用 `FINAL` 修饰符 的 `SELECT` 查询启用了并发执行, 这会快一点。但是仍然存在缺陷 (见下)。  [max_final_threads](../../../operations/settings/settings.md#max-final-threads) 设置使用的最大线程数限制。
+
 ### 缺点 {#drawbacks}
 
-使用的查询 `FINAL` 执行速度不如类似的查询那么快，因为:
+使用的查询 `FINAL` 执行速度比类似的查询慢一点，因为:
 
--   查询在单个线程中执行，并在查询执行期间合并数据。
+-   在查询执行期间合并数据。
 -   查询与 `FINAL` 除了读取查询中指定的列之外，还读取主键列。
 
 **在大多数情况下，避免使用 `FINAL`.** 常见的方法是使用假设后台进程的不同查询 `MergeTree` 引擎还没有发生，并通过应用聚合（例如，丢弃重复项）来处理它。 {## TODO: examples ##}

From 6693f77c322bbadcf7479350df258037376456d3 Mon Sep 17 00:00:00 2001
From: yiguolei <676222867@qq.com>
Date: Wed, 27 Jan 2021 13:56:36 +0800
Subject: [PATCH 0204/1238] fix code style

---
 src/Interpreters/DDLWorker.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index e519b375c90..d3ebed228c7 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -868,7 +868,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
         replicated_storage->getStatus(status);
 
         // Should return as soon as possible if the table is shutdown by drop or other command.
-        if (status.is_partial_shutdown) {
+        if (status.is_partial_shutdown)
+        {
             LOG_WARNING(log, "Table is shutdown, task {} will not be executed.", task.entry_name);
             task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, table is shutdown");
             return false;

From d21d838604abcedc38dfa779f2fe89e33944cbf8 Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Wed, 27 Jan 2021 12:50:11 +0300
Subject: [PATCH 0205/1238] No more dicttoxml with excessive logs

---
 docker/test/integration/runner/Dockerfile | 2 +-
 tests/integration/helpers/cluster.py      | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index 9b51891ccf5..56abf1122b2 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -62,7 +62,7 @@ RUN python3 -m pip install \
     avro \
     cassandra-driver \
     confluent-kafka \
-    dicttoxml \
+    dict2xml \
     docker \
     docker-compose==1.22.0 \
     grpcio \
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 4d8f3c68025..ee4ea8c94d5 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -20,9 +20,8 @@ import psycopg2
 import pymongo
 import pymysql
 import requests
-import xml.dom.minidom
+from dict2xml import dict2xml
 from confluent_kafka.avro.cached_schema_registry_client import CachedSchemaRegistryClient
-from dicttoxml import dicttoxml
 from kazoo.client import KazooClient
 from kazoo.exceptions import KazooException
 from minio import Minio
@@ -1192,8 +1191,8 @@ class ClickHouseInstance:
 
     @staticmethod
     def dict_to_xml(dictionary):
-        xml_str = dicttoxml(dictionary, custom_root="yandex", attr_type=False)
-        return xml.dom.minidom.parseString(xml_str).toprettyxml()
+        xml_str = dict2xml(dictionary, wrap="yandex", indent="  ", newlines=True)
+        return xml_str
 
     @property
     def odbc_drivers(self):

From 01c8b9e1b1c0284ce44ce61af494cc3dba1858a4 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 27 Jan 2021 13:07:18 +0300
Subject: [PATCH 0206/1238] Fix rare bug when some replicated operations (like
 mutation) cannot process some parts after data corruption

---
 .../ReplicatedMergeTreePartCheckThread.cpp    | 90 ++++++++++++-------
 .../ReplicatedMergeTreePartCheckThread.h      | 19 +++-
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    | 17 +++-
 src/Storages/StorageReplicatedMergeTree.cpp   | 15 ++++
 src/Storages/StorageReplicatedMergeTree.h     |  3 +
 .../test_broken_part_during_merge/__init__.py |  1 +
 .../test_broken_part_during_merge/test.py     | 61 +++++++++++++
 7 files changed, 172 insertions(+), 34 deletions(-)
 create mode 100644 tests/integration/test_broken_part_during_merge/__init__.py
 create mode 100644 tests/integration/test_broken_part_during_merge/test.py

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
index 22cb5ed6e9c..2fecf2b4e41 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
@@ -74,20 +74,9 @@ size_t ReplicatedMergeTreePartCheckThread::size() const
 }
 
 
-void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & part_name)
+ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreePartCheckThread::searchForMissingPartOnOtherReplicas(const String & part_name)
 {
     auto zookeeper = storage.getZooKeeper();
-    String part_path = storage.replica_path + "/parts/" + part_name;
-
-    /// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue.
-    if (zookeeper->exists(part_path))
-    {
-        LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally. Removing from ZooKeeper and queueing a fetch.", part_name);
-        ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
-
-        storage.removePartAndEnqueueFetch(part_name);
-        return;
-    }
 
     /// If the part is not in ZooKeeper, we'll check if it's at least somewhere.
     auto part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version);
@@ -115,7 +104,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
         *   and don't delete the queue entry when in doubt.
         */
 
-    LOG_WARNING(log, "Checking if anyone has a part covering {}.", part_name);
+    LOG_WARNING(log, "Checking if anyone has a part {} or covering part.", part_name);
 
     bool found_part_with_the_same_min_block = false;
     bool found_part_with_the_same_max_block = false;
@@ -123,15 +112,27 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
     Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas");
     for (const String & replica : replicas)
     {
-        Strings parts = zookeeper->getChildren(storage.zookeeper_path + "/replicas/" + replica + "/parts");
+        String replica_path = storage.zookeeper_path + "/replicas/" + replica;
+
+        Strings parts = zookeeper->getChildren(replica_path + "/parts");
         for (const String & part_on_replica : parts)
         {
             auto part_on_replica_info = MergeTreePartInfo::fromPartName(part_on_replica, storage.format_version);
 
+            if (part_info == part_on_replica_info)
+            {
+                /// Found missing part at ourself. If we are here than something wrong with this part, so skipping.
+                if (replica_path == storage.replica_path)
+                    continue;
+
+                LOG_WARNING(log, "Found the missing part {} at {} on {}", part_name, part_on_replica, replica);
+                return MissingPartSearchResult::FoundAndNeedFetch;
+            }
+
             if (part_on_replica_info.contains(part_info))
             {
                 LOG_WARNING(log, "Found part {} on {} that covers the missing part {}", part_on_replica, replica, part_name);
-                return;
+                return MissingPartSearchResult::FoundAndDontNeedFetch;
             }
 
             if (part_info.contains(part_on_replica_info))
@@ -144,7 +145,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
                 if (found_part_with_the_same_min_block && found_part_with_the_same_max_block)
                 {
                     LOG_WARNING(log, "Found parts with the same min block and with the same max block as the missing part {}. Hoping that it will eventually appear as a result of a merge.", part_name);
-                    return;
+                    return MissingPartSearchResult::FoundAndDontNeedFetch;
                 }
             }
         }
@@ -160,21 +161,48 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
         not_found_msg = "smaller parts with either the same min block or the same max block.";
     LOG_ERROR(log, "No replica has part covering {} and a merge is impossible: we didn't find {}", part_name, not_found_msg);
 
-    ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
+    return MissingPartSearchResult::LostForever;
+}
 
-    /// Is it in the replication queue? If there is - delete, because the task can not be processed.
-    if (!storage.queue.remove(zookeeper, part_name))
+void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible(const String & part_name)
+{
+    auto zookeeper = storage.getZooKeeper();
+    String part_path = storage.replica_path + "/parts/" + part_name;
+
+    auto missing_part_search_result = searchForMissingPartOnOtherReplicas(part_name);
+    /// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue.
+    if (zookeeper->exists(part_path))
     {
-        /// The part was not in our queue. Why did it happen?
-        LOG_ERROR(log, "Missing part {} is not in our queue.", part_name);
-        return;
+        /// If part found on some other replica
+        if (missing_part_search_result == MissingPartSearchResult::FoundAndNeedFetch)
+        {
+            LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and found on other replica. Removing from ZooKeeper and queueing a fetch.", part_name);
+            storage.removePartAndEnqueueFetch(part_name);
+        }
+        else /// If we have covering part on other replica or part is lost forever we don't need to fetch anything
+        {
+            LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and not found on other replica. Removing it from ZooKeeper.", part_name);
+            storage.removePartFromZooKeeper(part_name);
+        }
     }
 
-    /** This situation is possible if on all the replicas where the part was, it deteriorated.
-        * For example, a replica that has just written it has power turned off and the data has not been written from cache to disk.
-        */
-    LOG_ERROR(log, "Part {} is lost forever.", part_name);
-    ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss);
+    ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
+
+    if (missing_part_search_result == MissingPartSearchResult::LostForever)
+    {
+        /// Is it in the replication queue? If there is - delete, because the task can not be processed.
+        if (!storage.queue.remove(zookeeper, part_name))
+        {
+            /// The part was not in our queue. Why did it happen?
+            LOG_ERROR(log, "Missing part {} is not in our queue.", part_name);
+        }
+
+        /** This situation is possible if on all the replicas where the part was, it deteriorated.
+            * For example, a replica that has just written it has power turned off and the data has not been written from cache to disk.
+            */
+        LOG_ERROR(log, "Part {} is lost forever.", part_name);
+        ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss);
+    }
 }
 
 
@@ -193,7 +221,7 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
     /// We do not have this or a covering part.
     if (!part)
     {
-        searchForMissingPart(part_name);
+        searchForMissingPartAndFetchIfPossible(part_name);
         return {part_name, false, "Part is missing, will search for it"};
     }
     /// We have this part, and it's active. We will check whether we need this part and whether it has the right data.
@@ -254,11 +282,11 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
 
                 tryLogCurrentException(log, __PRETTY_FUNCTION__);
 
-                String message = "Part " + part_name + " looks broken. Removing it and queueing a fetch.";
+                String message = "Part " + part_name + " looks broken. Removing it and will try to fetch.";
                 LOG_ERROR(log, message);
-                ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
 
-                storage.removePartAndEnqueueFetch(part_name);
+                /// Part is broken, let's try to find it and fetch.
+                searchForMissingPartAndFetchIfPossible(part_name);
 
                 /// Delete part locally.
                 storage.forgetPartAndMoveToDetached(part, "broken");
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
index 4239d7a8051..d43a9a02237 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
@@ -73,7 +73,24 @@ public:
 private:
     void run();
 
-    void searchForMissingPart(const String & part_name);
+    /// Search for missing part and queue fetch if possible. Otherwise
+    /// remove part from zookeeper and queue.
+    void searchForMissingPartAndFetchIfPossible(const String & part_name);
+
+    enum MissingPartSearchResult
+    {
+        /// We found this part on other replica, let's fetch it.
+        FoundAndNeedFetch,
+        /// We found covering part or source part with same min and max block number
+        /// don't need to fetch because we should do it during normal queue processing.
+        FoundAndDontNeedFetch,
+        /// Covering part not found anywhere and exact part_name doesn't found on other
+        /// replicas.
+        LostForever,
+    };
+
+    /// Search for missing part on other replicas or covering part on all replicas (including our replica).
+    MissingPartSearchResult searchForMissingPartOnOtherReplicas(const String & part_name);
 
     StorageReplicatedMergeTree & storage;
     String log_name;
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index ab1254acc5f..26a916d2356 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -420,13 +420,26 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri
     {
         std::unique_lock lock(state_mutex);
 
-        virtual_parts.remove(part_name);
+        bool removed = virtual_parts.remove(part_name);
 
         for (Queue::iterator it = queue.begin(); it != queue.end();)
         {
             if ((*it)->new_part_name == part_name)
             {
                 found = *it;
+                if (removed)
+                {
+                    /// Preserve invariant `virtual_parts` = `current_parts` + `queue`.
+                    /// We remove new_part from virtual parts and add all source parts
+                    /// which present in current_parts.
+                    for (const auto & source_part : found->source_parts)
+                    {
+                        auto part_in_current_parts = current_parts.getContainingPart(source_part);
+                        if (part_in_current_parts == source_part)
+                            virtual_parts.add(source_part);
+                    }
+                }
+
                 updateStateOnQueueEntryRemoval(
                     found, /* is_successful = */ false,
                     min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock);
@@ -1010,7 +1023,7 @@ bool ReplicatedMergeTreeQueue::isNotCoveredByFuturePartsImpl(const String & log_
     /// NOTE The above is redundant, but left for a more convenient message in the log.
     auto result_part = MergeTreePartInfo::fromPartName(new_part_name, format_version);
 
-    /// It can slow down when the size of `future_parts` is large. But it can not be large, since `BackgroundProcessingPool` is limited.
+    /// It can slow down when the size of `future_parts` is large. But it can not be large, since background pool is limited.
     for (const auto & future_part_elem : future_parts)
     {
         auto future_part = MergeTreePartInfo::fromPartName(future_part_elem.first, format_version);
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 70e90e9706a..8e2fd5db6d6 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -3008,6 +3008,21 @@ void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_nam
     ops.emplace_back(zkutil::makeRemoveRequest(part_path, -1));
 }
 
+void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_name)
+{
+    auto zookeeper = getZooKeeper();
+    String part_path = replica_path + "/parts/" + part_name;
+    Coordination::Stat stat;
+
+    /// Part doesn't exist, nothing to remove
+    if (!zookeeper->exists(part_path, &stat))
+        return;
+
+    Coordination::Requests ops;
+
+    removePartFromZooKeeper(part_name, ops, stat.numChildren > 0);
+    zookeeper->multi(ops);
+}
 
 void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_name)
 {
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index cf36cf82fc9..6db05294b63 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -381,6 +381,9 @@ private:
     /// Set has_children to true for "old-style" parts (those with /columns and /checksums child znodes).
     void removePartFromZooKeeper(const String & part_name, Coordination::Requests & ops, bool has_children);
 
+    /// Just removes part from ZooKeeper using previous method
+    void removePartFromZooKeeper(const String & part_name);
+
     /// Quickly removes big set of parts from ZooKeeper (using async multi queries)
     void removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names,
                                   NameSet * parts_should_be_retried = nullptr);
diff --git a/tests/integration/test_broken_part_during_merge/__init__.py b/tests/integration/test_broken_part_during_merge/__init__.py
new file mode 100644
index 00000000000..e5a0d9b4834
--- /dev/null
+++ b/tests/integration/test_broken_part_during_merge/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/tests/integration/test_broken_part_during_merge/test.py b/tests/integration/test_broken_part_during_merge/test.py
new file mode 100644
index 00000000000..33719166f4a
--- /dev/null
+++ b/tests/integration/test_broken_part_during_merge/test.py
@@ -0,0 +1,61 @@
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from multiprocessing.dummy import Pool
+from helpers.network import PartitionManager
+import time
+
+cluster = ClickHouseCluster(__file__)
+
+node1 = cluster.add_instance('node1', with_zookeeper=True)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        node1.query('''
+            CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
+            ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') ORDER BY id;
+                '''.format(replica=node1.name))
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+def corrupt_data_part_on_disk(node, table, part_name):
+    part_path = node.query(
+        "SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(table, part_name)).strip()
+    node.exec_in_container(['bash', '-c',
+                            'cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c \'echo "1" >> $1\' -- {{}}'.format(
+                                p=part_path)], privileged=True)
+
+
+def test_merge_and_part_corruption(started_cluster):
+    node1.query("SYSTEM STOP REPLICATION QUEUES replicated_mt")
+    for i in range(4):
+        node1.query("INSERT INTO replicated_mt SELECT toDate('2019-10-01'), number, number * number FROM numbers ({f}, 100000)".format(f=i*100000))
+
+    assert node1.query("SELECT COUNT() FROM system.parts WHERE table='replicated_mt' AND active=1") == "4\n"
+
+    # Need to corrupt "border part" (left or right). If we will corrupt something in the middle
+    # clickhouse will not consider merge as broken, because we have parts with the same min and max
+    # block numbers.
+    corrupt_data_part_on_disk(node1, 'replicated_mt', 'all_3_3_0')
+
+    with Pool(1) as p:
+        def optimize_with_delay(x):
+            node1.query("OPTIMIZE TABLE replicated_mt FINAL", timeout=30)
+
+        # corrupt part after merge already assigned, but not started
+        res_opt = p.apply_async(optimize_with_delay, (1,))
+        node1.query("CHECK TABLE replicated_mt", settings={"check_query_single_value_result": 0})
+        # start merge
+        node1.query("SYSTEM START REPLICATION QUEUES replicated_mt")
+        res_opt.get()
+
+        # will hung if checked bug not fixed
+        node1.query("ALTER TABLE replicated_mt UPDATE value = 7 WHERE 1", settings={"mutations_sync": 2}, timeout=30)
+        assert node1.query("SELECT sum(value) FROM replicated_mt") == "2100000\n"

From 6fc39b10d30be05d183f77ea7013901cbc425a5d Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 27 Jan 2021 13:11:48 +0300
Subject: [PATCH 0207/1238] Spelling

---
 src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
index 2fecf2b4e41..f08b94d21df 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
@@ -121,7 +121,7 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP
 
             if (part_info == part_on_replica_info)
             {
-                /// Found missing part at ourself. If we are here than something wrong with this part, so skipping.
+                /// Found missing part at ourself. If we are here then something wrong with this part, so skipping.
                 if (replica_path == storage.replica_path)
                     continue;
 

From 99a0401c82f651689ba47aad44410e9c1e9d7943 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Wed, 27 Jan 2021 14:36:55 +0300
Subject: [PATCH 0208/1238] Update 01666_merge_tree_max_query_limit.sh

---
 tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh
index 27716aa8b28..e32a83c9560 100755
--- a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh
+++ b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh
@@ -19,7 +19,7 @@ insert into simple select number, number + 100 from numbers(1000);
 "
 
 echo "Spin up a long running query"
-${CLICKHOUSE_CLIENT} --query "select sleepEachRow(0.01) from simple settings max_block_size = 1 format Null" --query_id "long_running_query" 2>&1 | grep -o 'was cancelled' | head -1 &
+${CLICKHOUSE_CLIENT} --query "select sleepEachRow(0.01) from simple settings max_block_size = 1 format Null" --query_id "long_running_query" > /dev/null 2>&1 &
 wait_for_query_to_start 'long_running_query'
 
 # query which reads marks >= min_marks_to_honor_max_concurrent_queries is throttled

From fc614d03c14cd4fab3c5ada845543ef1534cf5ee Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Wed, 27 Jan 2021 14:37:13 +0300
Subject: [PATCH 0209/1238] Update 01666_merge_tree_max_query_limit.reference

---
 .../0_stateless/01666_merge_tree_max_query_limit.reference       | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference
index 9011a5d1204..a08a20dc95d 100644
--- a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference
+++ b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference
@@ -12,5 +12,4 @@ Check if another query is passed
 Modify max_concurrent_queries back to 1
 Check if another query with less marks to read is throttled
 yes
-was cancelled
 finished	long_running_query	default	select sleepEachRow(0.01) from simple settings max_block_size = 1 format Null

From 5622e6daa6bf27e651a46e4482ffc9decd924ede Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 27 Jan 2021 14:56:12 +0300
Subject: [PATCH 0210/1238] Fix rare max_number_of_merges_with_ttl_in_pool
 limit overrun for non-replicated MergeTree

---
 src/Storages/MergeTree/MergeList.h          | 21 ++++++++++++++-------
 src/Storages/MergeTree/MergeTreeData.cpp    |  2 +-
 src/Storages/StorageMergeTree.cpp           |  4 ++++
 src/Storages/StorageReplicatedMergeTree.cpp |  5 +++++
 4 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 65e873ed102..6b2af414835 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -116,12 +116,6 @@ public:
         : Parent(CurrentMetrics::Merge)
     {}
 
-    void onEntryCreate(const Parent::Entry & entry) override
-    {
-        if (isTTLMergeType(entry->merge_type))
-            ++merges_with_ttl_counter;
-    }
-
     void onEntryDestroy(const Parent::Entry & entry) override
     {
         if (isTTLMergeType(entry->merge_type))
@@ -140,7 +134,20 @@ public:
         }
     }
 
-    size_t getExecutingMergesWithTTLCount() const
+    /// Merge consists of two parts: assignment and execution. We add merge to
+    /// merge list on execution, but checking merge list during merge
+    /// assignment. This lead to the logical race condition (we can assign more
+    /// merges with TTL than allowed). So we "book" merge with ttl during
+    /// assignment, and remove from list after merge execution.
+    ///
+    /// NOTE: Not important for replicated merge tree, we check count of merges twice:
+    /// in assignment and in queue before execution.
+    void bookMergeWithTTL()
+    {
+        ++merges_with_ttl_counter;
+    }
+
+    size_t getMergesWithTTLCount() const
     {
         return merges_with_ttl_counter;
     }
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 276ac10aeaf..d1a2f85eceb 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3955,7 +3955,7 @@ NamesAndTypesList MergeTreeData::getVirtuals() const
 
 size_t MergeTreeData::getTotalMergesWithTTLInMergeList() const
 {
-    return global_context.getMergeList().getExecutingMergesWithTTLCount();
+    return global_context.getMergeList().getMergesWithTTLCount();
 }
 
 void MergeTreeData::addPartContributionToDataVolume(const DataPartPtr & part)
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 070e6eb0483..9fca8c49e81 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -732,6 +732,10 @@ std::shared_ptr<StorageMergeTree::MergeMutateSelectedEntry> StorageMergeTree::se
         return {};
     }
 
+    /// Account TTL merge here to avoid exceeding the max_number_of_merges_with_ttl_in_pool limit
+    if (isTTLMergeType(future_part.merge_type))
+        global_context.getMergeList().bookMergeWithTTL();
+
     merging_tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part.parts), *this, metadata_snapshot, false);
     return std::make_shared<MergeMutateSelectedEntry>(future_part, std::move(merging_tagger), MutationCommands{});
 }
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 70e90e9706a..4eb7d7ebccd 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1490,7 +1490,12 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry)
     future_merged_part.updatePath(*this, reserved_space);
     future_merged_part.merge_type = entry.merge_type;
 
+    /// Account TTL merge
+    if (isTTLMergeType(future_merged_part.merge_type))
+        global_context.getMergeList().bookMergeWithTTL();
+
     auto table_id = getStorageID();
+    /// Add merge to list
     MergeList::EntryPtr merge_entry = global_context.getMergeList().insert(table_id.database_name, table_id.table_name, future_merged_part);
 
     Transaction transaction(*this);

From 241d3ec8c275029cbe150746745377b3af1ef703 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 27 Jan 2021 15:40:16 +0300
Subject: [PATCH 0211/1238] Merge with master

---
 .../ZooKeeper/TestKeeperStorageDispatcher.cpp | 139 ------------------
 1 file changed, 139 deletions(-)
 delete mode 100644 src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp

diff --git a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp b/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp
deleted file mode 100644
index 434a6a2e747..00000000000
--- a/src/Common/ZooKeeper/TestKeeperStorageDispatcher.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-#include <Common/ZooKeeper/TestKeeperStorageDispatcher.h>
-#include <Common/setThreadName.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-
-    extern const int LOGICAL_ERROR;
-    extern const int TIMEOUT_EXCEEDED;
-}
-
-}
-namespace zkutil
-{
-
-void TestKeeperStorageDispatcher::processingThread()
-{
-    setThreadName("TestKeeperSProc");
-
-    while (!shutdown)
-    {
-        RequestInfo info;
-
-        UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds());
-
-        if (requests_queue.tryPop(info, max_wait))
-        {
-            if (shutdown)
-                break;
-
-            try
-            {
-                auto responses = storage.processRequest(info.request, info.session_id);
-                for (const auto & response_for_session : responses)
-                    setResponse(response_for_session.session_id, response_for_session.response);
-            }
-            catch (...)
-            {
-                tryLogCurrentException(__PRETTY_FUNCTION__);
-            }
-        }
-    }
-}
-
-void TestKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
-{
-    std::lock_guard lock(session_to_response_callback_mutex);
-    auto session_writer = session_to_response_callback.find(session_id);
-    if (session_writer == session_to_response_callback.end())
-        return;
-
-    session_writer->second(response);
-    /// Session closed, no more writes
-    if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::Close)
-        session_to_response_callback.erase(session_writer);
-}
-
-void TestKeeperStorageDispatcher::finalize()
-{
-    {
-        std::lock_guard lock(push_request_mutex);
-
-        if (shutdown)
-            return;
-
-        shutdown = true;
-
-        if (processing_thread.joinable())
-            processing_thread.join();
-    }
-
-    RequestInfo info;
-    TestKeeperStorage::RequestsForSessions expired_requests;
-    while (requests_queue.tryPop(info))
-        expired_requests.push_back(TestKeeperStorage::RequestForSession{info.session_id, info.request});
-
-    auto expired_responses = storage.finalize(expired_requests);
-
-    for (const auto & response_for_session : expired_responses)
-        setResponse(response_for_session.session_id, response_for_session.response);
-}
-
-void TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
-{
-
-    {
-        std::lock_guard lock(session_to_response_callback_mutex);
-        if (session_to_response_callback.count(session_id) == 0)
-            throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown session id {}", session_id);
-    }
-
-    RequestInfo request_info;
-    request_info.time = clock::now();
-    request_info.request = request;
-    request_info.session_id = session_id;
-
-    std::lock_guard lock(push_request_mutex);
-    /// Put close requests without timeouts
-    if (request->getOpNum() == Coordination::OpNum::Close)
-        requests_queue.push(std::move(request_info));
-    else if (!requests_queue.tryPush(std::move(request_info), operation_timeout.totalMilliseconds()))
-        throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
-}
-
-TestKeeperStorageDispatcher::TestKeeperStorageDispatcher()
-{
-    processing_thread = ThreadFromGlobalPool([this] { processingThread(); });
-}
-
-TestKeeperStorageDispatcher::~TestKeeperStorageDispatcher()
-{
-    try
-    {
-        finalize();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
-}
-
-void TestKeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback)
-{
-    std::lock_guard lock(session_to_response_callback_mutex);
-    if (!session_to_response_callback.try_emplace(session_id, callback).second)
-        throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session with id {} already registered in dispatcher", session_id);
-}
-
-void TestKeeperStorageDispatcher::finishSession(int64_t session_id)
-{
-    std::lock_guard lock(session_to_response_callback_mutex);
-    auto session_it = session_to_response_callback.find(session_id);
-    if (session_it != session_to_response_callback.end())
-        session_to_response_callback.erase(session_it);
-}
-
-}

From 7a2f6cd5b979d1d1f9fc80a873f0bc6393ad0a96 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sat, 19 Dec 2020 00:43:08 +0300
Subject: [PATCH 0212/1238] Dictionaries refactoring to new interface

---
 src/Dictionaries/CacheDictionary.h            |   2 +
 src/Dictionaries/ComplexKeyCacheDictionary.h  |   2 +
 src/Dictionaries/ComplexKeyDirectDictionary.h |   2 +
 src/Dictionaries/ComplexKeyHashedDictionary.h |   2 +
 src/Dictionaries/DictionaryBlockInputStream.h | 342 +-----
 src/Dictionaries/DictionaryStructure.cpp      |  32 +-
 src/Dictionaries/DictionaryStructure.h        |  66 ++
 src/Dictionaries/DirectDictionary.h           |   2 +
 src/Dictionaries/FlatDictionary.cpp           | 514 ++++-----
 src/Dictionaries/FlatDictionary.h             |  76 +-
 src/Dictionaries/HashedDictionary.h           |   2 +
 src/Dictionaries/IPAddressDictionary.h        |   2 +
 src/Dictionaries/PolygonDictionary.h          |   2 +
 src/Dictionaries/RangeHashedDictionary.h      |   2 +
 src/Dictionaries/SSDCacheDictionary.h         |   2 +
 .../SSDComplexKeyCacheDictionary.h            |   2 +
 src/Functions/FunctionsExternalDictionaries.h | 986 ++----------------
 17 files changed, 411 insertions(+), 1627 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h
index b9bd0b7623b..f2f364af9da 100644
--- a/src/Dictionaries/CacheDictionary.h
+++ b/src/Dictionaries/CacheDictionary.h
@@ -119,6 +119,8 @@ public:
 
     std::exception_ptr getLastException() const override;
 
+    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
+
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.h b/src/Dictionaries/ComplexKeyCacheDictionary.h
index 2663fee266d..36a1457570c 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.h
@@ -89,6 +89,8 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
+    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
+
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.h b/src/Dictionaries/ComplexKeyDirectDictionary.h
index dc602be103f..670f2ac0a85 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.h
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.h
@@ -60,6 +60,8 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
+    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
+
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.h b/src/Dictionaries/ComplexKeyHashedDictionary.h
index baf6628eebd..82677458298 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.h
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.h
@@ -60,6 +60,8 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
+    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
+
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/DictionaryBlockInputStream.h b/src/Dictionaries/DictionaryBlockInputStream.h
index c683ef0e9cc..96d5fac966f 100644
--- a/src/Dictionaries/DictionaryBlockInputStream.h
+++ b/src/Dictionaries/DictionaryBlockInputStream.h
@@ -60,111 +60,9 @@ protected:
     Block getBlock(size_t start, size_t size) const override;
 
 private:
-    // pointer types to getXXX functions
-    // for single key dictionaries
-    template <typename Type>
-    using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, PaddedPODArray<Type> &) const;
-
-    template <typename Type>
-    using DictionaryDecimalGetter
-        = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, DecimalPaddedPODArray<Type> &) const;
-
-    using DictionaryStringGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, ColumnString *) const;
-
-    // for complex complex key dictionaries
-    template <typename Type>
-    using GetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, PaddedPODArray<Type> & out) const;
-
-    template <typename Type>
-    using DecimalGetterByKey
-        = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray<Type> & out) const;
-
-    using StringGetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, ColumnString * out) const;
-
-    // call getXXX
-    // for single key dictionaries
-    template <typename Type, typename Container>
-    void callGetter(
-        DictionaryGetter<Type> getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        Container & container,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-
-    template <typename Type, typename Container>
-    void callGetter(
-        DictionaryDecimalGetter<Type> getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        Container & container,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-
-    template <typename Container>
-    void callGetter(
-        DictionaryStringGetter getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        Container & container,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-
-    // for complex complex key dictionaries
-    template <typename Type, typename Container>
-    void callGetter(
-        GetterByKey<Type> getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        Container & container,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-
-    template <typename Type, typename Container>
-    void callGetter(
-        DecimalGetterByKey<Type> getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        Container & container,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-
-    template <typename Container>
-    void callGetter(
-        StringGetterByKey getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        Container & container,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-
-    template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
     Block
     fillBlock(const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
 
-
-    template <typename AttributeType, typename Getter>
-    ColumnPtr getColumnFromAttribute(
-        Getter getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
-    template <typename Getter>
-    ColumnPtr getColumnFromStringAttribute(
-        Getter getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const Columns & keys,
-        const DataTypes & data_types,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & dictionary) const;
     ColumnPtr getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const;
 
     void fillKeyColumns(
@@ -178,12 +76,6 @@ private:
     Names column_names;
     PaddedPODArray<Key> ids;
     ColumnsWithTypeAndName key_columns;
-    Poco::Logger * logger;
-
-    using FillBlockFunction = Block (DictionaryBlockInputStream<DictionaryType, Key>::*)(
-        const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
-
-    FillBlockFunction fill_block_function;
 
     Columns data_columns;
     GetColumnsFunction get_key_columns_function;
@@ -207,9 +99,6 @@ DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
     , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
     , column_names(column_names_)
     , ids(std::move(ids_))
-    , logger(&Poco::Logger::get("DictionaryBlockInputStream"))
-    , fill_block_function(
-          &DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<DictionaryGetter, DictionaryDecimalGetter, DictionaryStringGetter>)
     , key_type(DictionaryKeyType::Id)
 {
 }
@@ -223,12 +112,10 @@ DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
     : DictionaryBlockInputStreamBase(keys.size(), max_block_size_)
     , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
     , column_names(column_names_)
-    , logger(&Poco::Logger::get("DictionaryBlockInputStream"))
-    , fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
     , key_type(DictionaryKeyType::ComplexKey)
 {
-    const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
-    fillKeyColumns(keys, 0, keys.size(), dictionaty_structure, key_columns);
+    const DictionaryStructure & dictionary_structure = dictionary->getStructure();
+    fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns);
 }
 
 template <typename DictionaryType, typename Key>
@@ -242,8 +129,6 @@ DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
     : DictionaryBlockInputStreamBase(data_columns_.front()->size(), max_block_size_)
     , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
     , column_names(column_names_)
-    , logger(&Poco::Logger::get("DictionaryBlockInputStream"))
-    , fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
     , data_columns(data_columns_)
     , get_key_columns_function(get_key_columns_function_)
     , get_view_columns_function(get_view_columns_function_)
@@ -268,13 +153,13 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
                 columns.emplace_back(column);
                 view_columns.emplace_back(column, key_column.type, key_column.name);
             }
-            return (this->*fill_block_function)({}, columns, {}, std::move(view_columns));
+            return fillBlock({}, columns, {}, std::move(view_columns));
         }
 
         case DictionaryKeyType::Id:
         {
             PaddedPODArray<Key> ids_to_fill(ids.begin() + start, ids.begin() + start + length);
-            return (this->*fill_block_function)(ids_to_fill, {}, {}, {});
+            return fillBlock(ids_to_fill, {}, {}, {});
         }
 
         case DictionaryKeyType::Callback:
@@ -294,104 +179,18 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
                 columns.push_back(key_column.column);
                 types.push_back(key_column.type);
             }
-            return (this->*fill_block_function)({}, columns, types, std::move(view_with_type_and_name));
+            return fillBlock({}, columns, types, std::move(view_with_type_and_name));
         }
     }
 
     throw Exception("Unexpected DictionaryKeyType.", ErrorCodes::LOGICAL_ERROR);
 }
 
-
 template <typename DictionaryType, typename Key>
-template <typename Type, typename Container>
-void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
-    DictionaryGetter<Type> getter,
-    const PaddedPODArray<Key> & ids_to_fill,
-    const Columns & /*keys*/,
-    const DataTypes & /*data_types*/,
-    Container & container,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    (dict.*getter)(attribute.name, ids_to_fill, container);
-}
-
-template <typename DictionaryType, typename Key>
-template <typename Type, typename Container>
-void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
-    DictionaryDecimalGetter<Type> getter,
-    const PaddedPODArray<Key> & ids_to_fill,
-    const Columns & /*keys*/,
-    const DataTypes & /*data_types*/,
-    Container & container,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    (dict.*getter)(attribute.name, ids_to_fill, container);
-}
-
-template <typename DictionaryType, typename Key>
-template <typename Container>
-void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
-    DictionaryStringGetter getter,
-    const PaddedPODArray<Key> & ids_to_fill,
-    const Columns & /*keys*/,
-    const DataTypes & /*data_types*/,
-    Container & container,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    (dict.*getter)(attribute.name, ids_to_fill, container);
-}
-
-template <typename DictionaryType, typename Key>
-template <typename Type, typename Container>
-void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
-    GetterByKey<Type> getter,
-    const PaddedPODArray<Key> & /*ids_to_fill*/,
-    const Columns & keys,
-    const DataTypes & data_types,
-    Container & container,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    (dict.*getter)(attribute.name, keys, data_types, container);
-}
-
-template <typename DictionaryType, typename Key>
-template <typename Type, typename Container>
-void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
-    DecimalGetterByKey<Type> getter,
-    const PaddedPODArray<Key> & /*ids_to_fill*/,
-    const Columns & keys,
-    const DataTypes & data_types,
-    Container & container,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    (dict.*getter)(attribute.name, keys, data_types, container);
-}
-
-template <typename DictionaryType, typename Key>
-template <typename Container>
-void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
-    StringGetterByKey getter,
-    const PaddedPODArray<Key> & /*ids_to_fill*/,
-    const Columns & keys,
-    const DataTypes & data_types,
-    Container & container,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    (dict.*getter)(attribute.name, keys, data_types, container);
-}
-
-
-template <typename DictionaryType, typename Key>
-template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
 Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
     const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const
 {
+    constexpr auto dictionary_get_by_type = DictionaryType::get_by_type;
     std::unordered_set<std::string> names(column_names.begin(), column_names.end());
 
     DataTypes data_types = types;
@@ -408,9 +207,13 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
             block_columns.push_back(column);
 
     const DictionaryStructure & structure = dictionary->getStructure();
+    ColumnPtr ids_column;
 
     if (structure.id && names.find(structure.id->name) != names.end())
-        block_columns.emplace_back(getColumnFromIds(ids_to_fill), std::make_shared<DataTypeUInt64>(), structure.id->name);
+    {
+        ids_column = getColumnFromIds(ids_to_fill);
+        block_columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), structure.id->name);
+    }
 
     for (const auto idx : ext::range(0, structure.attributes.size()))
     {
@@ -418,124 +221,27 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
         if (names.find(attribute.name) != names.end())
         {
             ColumnPtr column;
-#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
-    column = getColumnFromAttribute<TYPE, Getter<TYPE>>(&DictionaryType::get##TYPE, ids_to_fill, keys, data_types, attribute, *dictionary)
-            switch (attribute.underlying_type)
+
+            if constexpr (dictionary_get_by_type == DictionaryGetByType::getByIdentifiers)
             {
-                case AttributeUnderlyingType::utUInt8:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt8);
-                    break;
-                case AttributeUnderlyingType::utUInt16:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt16);
-                    break;
-                case AttributeUnderlyingType::utUInt32:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt32);
-                    break;
-                case AttributeUnderlyingType::utUInt64:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt64);
-                    break;
-                case AttributeUnderlyingType::utUInt128:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt128);
-                    break;
-                case AttributeUnderlyingType::utInt8:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int8);
-                    break;
-                case AttributeUnderlyingType::utInt16:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int16);
-                    break;
-                case AttributeUnderlyingType::utInt32:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int32);
-                    break;
-                case AttributeUnderlyingType::utInt64:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int64);
-                    break;
-                case AttributeUnderlyingType::utFloat32:
-                    GET_COLUMN_FORM_ATTRIBUTE(Float32);
-                    break;
-                case AttributeUnderlyingType::utFloat64:
-                    GET_COLUMN_FORM_ATTRIBUTE(Float64);
-                    break;
-                case AttributeUnderlyingType::utDecimal32:
-                {
-                    column = getColumnFromAttribute<Decimal32, DecimalGetter<Decimal32>>(
-                        &DictionaryType::getDecimal32, ids_to_fill, keys, data_types, attribute, *dictionary);
-                    break;
-                }
-                case AttributeUnderlyingType::utDecimal64:
-                {
-                    column = getColumnFromAttribute<Decimal64, DecimalGetter<Decimal64>>(
-                        &DictionaryType::getDecimal64, ids_to_fill, keys, data_types, attribute, *dictionary);
-                    break;
-                }
-                case AttributeUnderlyingType::utDecimal128:
-                {
-                    column = getColumnFromAttribute<Decimal128, DecimalGetter<Decimal128>>(
-                        &DictionaryType::getDecimal128, ids_to_fill, keys, data_types, attribute, *dictionary);
-                    break;
-                }
-                case AttributeUnderlyingType::utString:
-                {
-                    column = getColumnFromStringAttribute<StringGetter>(
-                        &DictionaryType::getString, ids_to_fill, keys, data_types, attribute, *dictionary);
-                    break;
-                }
+                column = dictionary->get(attribute.name, attribute.type, ids_column, nullptr /* default_untyped */);
             }
-#undef GET_COLUMN_FORM_ATTRIBUTE
+            else if constexpr (dictionary_get_by_type == DictionaryGetByType::getByComplexKeys)
+            {
+                column = nullptr;
+            }
+            else
+            {
+                column = nullptr;
+            }
+
             block_columns.emplace_back(column, attribute.type, attribute.name);
         }
     }
+
     return Block(block_columns);
 }
 
-
-template <typename DictionaryType, typename Key>
-template <typename AttributeType, typename Getter>
-ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
-    Getter getter,
-    const PaddedPODArray<Key> & ids_to_fill,
-    const Columns & keys,
-    const DataTypes & data_types,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    if constexpr (IsDecimalNumber<AttributeType>)
-    {
-        auto size = ids_to_fill.size();
-        if (!keys.empty())
-            size = keys.front()->size();
-        auto column = ColumnDecimal<AttributeType>::create(size, 0); /// NOTE: There's wrong scale here, but it's unused.
-        callGetter(getter, ids_to_fill, keys, data_types, column->getData(), attribute, dict);
-        return column;
-    }
-    else
-    {
-        auto size = ids_to_fill.size();
-        if (!keys.empty())
-            size = keys.front()->size();
-        auto column_vector = ColumnVector<AttributeType>::create(size);
-        callGetter(getter, ids_to_fill, keys, data_types, column_vector->getData(), attribute, dict);
-        return column_vector;
-    }
-}
-
-
-template <typename DictionaryType, typename Key>
-template <typename Getter>
-ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromStringAttribute(
-    Getter getter,
-    const PaddedPODArray<Key> & ids_to_fill,
-    const Columns & keys,
-    const DataTypes & data_types,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & dict) const
-{
-    auto column_string = ColumnString::create();
-    auto ptr = column_string.get();
-    callGetter(getter, ids_to_fill, keys, data_types, ptr, attribute, dict);
-    return column_string;
-}
-
-
 template <typename DictionaryType, typename Key>
 ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
 {
diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index 07b746d8365..68be50daa2f 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -2,6 +2,8 @@
 #include <Columns/IColumn.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeArray.h>
+#include <Functions/FunctionHelpers.h>
 #include <Formats/FormatSettings.h>
 #include <IO/WriteHelpers.h>
 #include <IO/Operators.h>
@@ -12,6 +14,7 @@
 #include <unordered_set>
 #include <ext/range.h>
 
+#include <iostream>
 
 namespace DB
 {
@@ -83,11 +86,6 @@ AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type)
             return AttributeUnderlyingType::utDecimal128;
     }
 
-    // Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
-    // TODO: This should be fixed by fully supporting arrays in dictionaries.
-    if (type.find("Array") == 0)
-        return AttributeUnderlyingType::utString;
-
     throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE};
 }
 
@@ -319,8 +317,26 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
             continue;
 
         const auto type_string = config.getString(prefix + "type");
-        const auto type = DataTypeFactory::instance().get(type_string);
-        const auto underlying_type = getAttributeUnderlyingType(type_string);
+        const auto initial_type = DataTypeFactory::instance().get(type_string);
+        auto type = initial_type;
+        bool is_array = false;
+        bool is_nullable = false;
+
+        const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(type.get());
+        if (array_type)
+        {
+            is_array = true;
+            type = array_type->getNestedType();
+        }
+        
+        if (type->isNullable())
+        {
+            is_nullable = true;
+            type = removeNullable(type);
+        }
+
+        /// TODO: Fix for decimal
+        const auto underlying_type = getAttributeUnderlyingType(type->getName());
 
         const auto expression = config.getString(prefix + "expression", "");
         if (!expression.empty())
@@ -366,7 +382,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
         has_hierarchy = has_hierarchy || hierarchical;
 
         res_attributes.emplace_back(
-            DictionaryAttribute{name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id});
+            DictionaryAttribute{name, underlying_type, initial_type, expression, null_value, hierarchical, injective, is_object_id, is_nullable, is_array});
     }
 
     return res_attributes;
diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h
index 08885bc4d1b..5656815e375 100644
--- a/src/Dictionaries/DictionaryStructure.h
+++ b/src/Dictionaries/DictionaryStructure.h
@@ -42,6 +42,12 @@ std::string toString(const AttributeUnderlyingType type);
 /// Min and max lifetimes for a dictionary or it's entry
 using DictionaryLifetime = ExternalLoadableLifetime;
 
+enum class DictionaryGetByType {
+    getByIdentifiers,
+    getByComplexKeys,
+    getByRange,
+    unsupported /* will be removed after migration to new interface */
+};
 
 /** Holds the description of a single dictionary attribute:
 *    - name, used for lookup into dictionary and source;
@@ -62,8 +68,68 @@ struct DictionaryAttribute final
     const bool hierarchical;
     const bool injective;
     const bool is_object_id;
+    const bool is_nullable;
+    const bool is_array;
 };
 
+template <typename Type>
+struct DictionaryAttributeType
+{
+    using AttributeType = Type;
+};
+
+template <typename F>
+void callOnDictionaryAttributeType(AttributeUnderlyingType type, F&& func)
+{
+    switch (type)
+    {
+        case AttributeUnderlyingType::utUInt8:
+            func(DictionaryAttributeType<UInt8>());
+            break;
+        case AttributeUnderlyingType::utUInt16:
+            func(DictionaryAttributeType<UInt16>());
+            break;
+        case AttributeUnderlyingType::utUInt32:
+            func(DictionaryAttributeType<UInt32>());
+            break;
+        case AttributeUnderlyingType::utUInt64:
+            func(DictionaryAttributeType<UInt64>());
+            break;
+        case AttributeUnderlyingType::utUInt128:
+            func(DictionaryAttributeType<UInt128>());
+            break;
+        case AttributeUnderlyingType::utInt8:
+            func(DictionaryAttributeType<Int8>());
+            break;
+        case AttributeUnderlyingType::utInt16:
+            func(DictionaryAttributeType<Int16>());
+            break;
+        case AttributeUnderlyingType::utInt32:
+            func(DictionaryAttributeType<Int32>());
+            break;
+        case AttributeUnderlyingType::utInt64:
+            func(DictionaryAttributeType<Int64>());
+            break;
+        case AttributeUnderlyingType::utFloat32:
+            func(DictionaryAttributeType<Float32>());
+            break;
+        case AttributeUnderlyingType::utFloat64:
+            func(DictionaryAttributeType<Float64>());
+            break;
+        case AttributeUnderlyingType::utString:
+            func(DictionaryAttributeType<String>());
+            break;
+        case AttributeUnderlyingType::utDecimal32:
+            func(DictionaryAttributeType<Decimal32>());
+            break;
+        case AttributeUnderlyingType::utDecimal64:
+            func(DictionaryAttributeType<Decimal64>());
+            break;
+        case AttributeUnderlyingType::utDecimal128:
+            func(DictionaryAttributeType<Decimal128>());
+            break;
+    }
+};
 
 struct DictionarySpecialAttribute final
 {
diff --git a/src/Dictionaries/DirectDictionary.h b/src/Dictionaries/DirectDictionary.h
index 18ef5224a8a..31e2994febb 100644
--- a/src/Dictionaries/DirectDictionary.h
+++ b/src/Dictionaries/DirectDictionary.h
@@ -65,6 +65,8 @@ public:
     void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
     void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
 
+    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported; 
+
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 47ffdaeb5bd..aa03179491e 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -1,5 +1,7 @@
 #include "FlatDictionary.h"
 #include <IO/WriteHelpers.h>
+#include <Columns/ColumnsNumber.h>
+#include <Functions/FunctionHelpers.h>
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
 #include <Core/Defines.h>
@@ -14,6 +16,7 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
     extern const int DICTIONARY_IS_EMPTY;
     extern const int UNSUPPORTED_METHOD;
+    extern const int ILLEGAL_COLUMN;
 }
 
 static const auto initial_array_size = 1024;
@@ -102,183 +105,184 @@ void FlatDictionary::isInConstantVector(const Key child_id, const PaddedPODArray
     isInImpl(child_id, ancestor_ids, out);
 }
 
-
-#define DECLARE(TYPE) \
-    void FlatDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void FlatDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
+ColumnPtr FlatDictionary::get(const std::string& attribute_name, const DataTypePtr &, const ColumnPtr id_column, const ColumnPtr default_untyped) const
 {
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+    ColumnPtr result;
 
-    const auto & null_value = std::get<StringRef>(attribute.null_values);
+    const auto *id_col = checkAndGetColumn<ColumnUInt64>(id_column.get());
+    const auto *id_col_const = checkAndGetColumnConst<ColumnUInt64>(id_column.get()); 
 
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return null_value; });
-}
-
-#define DECLARE(TYPE) \
-    void FlatDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
+    if (id_col == nullptr && id_col_const == nullptr)
+    {
+        throw Exception{"Identifiers column must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
     }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
 
-void FlatDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
-{
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t row) { return def->getDataAt(row); });
-}
-
-#define DECLARE(TYPE) \
-    void FlatDictionary::get##TYPE( \
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
+    PaddedPODArray<UInt64> ids_const(1);
+    
+    if (id_col_const != nullptr) {
+        ids_const[0] = id_col_const->getValue<UInt64>();     
     }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
 
-void FlatDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
-{
+    const auto& ids = id_col != nullptr ? id_col->getData() : ids_const;
+    bool is_const = id_col_const != nullptr;
+
     const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
 
-    FlatDictionary::getItemsImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return StringRef{def}; });
+    /// TODO: Check that attribute type is same as result type
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        auto size = ids.size();
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto column_string = ColumnString::create();
+            auto out = column_string.get();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                {
+                    getItemsImpl<StringRef, StringRef>(
+                        attribute,
+                        ids,
+                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t row) { return default_col->getDataAt(row); });
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<String>();
+
+                    getItemsImpl<StringRef, StringRef>(
+                        attribute,
+                        ids,
+                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t) { return def; });
+                }
+            }
+            else
+            {
+                const auto & null_value = std::get<StringRef>(attribute.null_values);
+
+                getItemsImpl<StringRef, StringRef>(
+                    attribute,
+                    ids,
+                    [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column_string);
+        }
+        else if constexpr (IsNumber<AttributeType>)
+        {
+            auto column = ColumnVector<AttributeType>::create(size);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    ids,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column);
+        }
+        else if constexpr (IsDecimalNumber<AttributeType>)
+        {
+            // auto scale = getDecimalScale(*attribute.type);
+            auto column = ColumnDecimal<AttributeType>::create(size, 0);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    ids,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; }
+                );
+            }
+
+            result = std::move(column);
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+    
+    if (is_const)
+    {
+        result = ColumnConst::create(std::move(result), 1);
+    }
+
+    return result;
 }
 
-
 void FlatDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
 {
-    const auto & attribute = attributes.front();
+    const auto ids_count = ext::size(ids);
 
-    switch (attribute.type)
+    for (const auto i : ext::range(0, ids_count))
     {
-        case AttributeUnderlyingType::utUInt8:
-            has<UInt8>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            has<UInt16>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            has<UInt32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            has<UInt64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            has<UInt128>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            has<Int8>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            has<Int16>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            has<Int32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            has<Int64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            has<Float32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            has<Float64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utString:
-            has<String>(attribute, ids, out);
-            break;
-
-        case AttributeUnderlyingType::utDecimal32:
-            has<Decimal32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            has<Decimal64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            has<Decimal128>(attribute, ids, out);
-            break;
+        const auto id = ids[i];
+        out[i] = id < loaded_ids.size() && loaded_ids[id];
     }
+
+    query_count.fetch_add(ids_count, std::memory_order_relaxed);
 }
 
 
@@ -416,6 +420,14 @@ void FlatDictionary::addAttributeSize(const Attribute & attribute)
     bucket_count = array_ref.capacity();
 }
 
+template <>
+void FlatDictionary::addAttributeSize<String>(const Attribute & attribute)
+{
+    const auto & array_ref = std::get<ContainerType<StringRef>>(attribute.arrays);
+    bytes_allocated += sizeof(PaddedPODArray<StringRef>) + array_ref.allocated_bytes();
+    bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
+    bucket_count = array_ref.capacity();
+}
 
 void FlatDictionary::calculateBytesAllocated()
 {
@@ -423,60 +435,15 @@ void FlatDictionary::calculateBytesAllocated()
 
     for (const auto & attribute : attributes)
     {
-        switch (attribute.type)
+        auto type_call = [&](const auto & dictionary_attribute_type)
         {
-            case AttributeUnderlyingType::utUInt8:
-                addAttributeSize<UInt8>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt16:
-                addAttributeSize<UInt16>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt32:
-                addAttributeSize<UInt32>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt64:
-                addAttributeSize<UInt64>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt128:
-                addAttributeSize<UInt128>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt8:
-                addAttributeSize<Int8>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt16:
-                addAttributeSize<Int16>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt32:
-                addAttributeSize<Int32>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt64:
-                addAttributeSize<Int64>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat32:
-                addAttributeSize<Float32>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat64:
-                addAttributeSize<Float64>(attribute);
-                break;
+            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+            using AttributeType = typename Type::AttributeType;
 
-            case AttributeUnderlyingType::utDecimal32:
-                addAttributeSize<Decimal32>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal64:
-                addAttributeSize<Decimal64>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal128:
-                addAttributeSize<Decimal128>(attribute);
-                break;
+            addAttributeSize<AttributeType>(attribute);
+        };
 
-            case AttributeUnderlyingType::utString:
-            {
-                addAttributeSize<StringRef>(attribute);
-                bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
-
-                break;
-            }
-        }
+        callOnDictionaryAttributeType(attribute.type, type_call);
     }
 }
 
@@ -502,57 +469,16 @@ void FlatDictionary::createAttributeImpl<String>(Attribute & attribute, const Fi
 
 FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
 {
-    Attribute attr{type, {}, {}, {}};
+    Attribute attr{type, {}, {}, {}, {}};
 
-    switch (type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utString:
-            createAttributeImpl<String>(attr, null_value);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributType = typename Type::AttributeType;
+        createAttributeImpl<AttributType>(attr, null_value);
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
-    }
+    callOnDictionaryAttributeType(type, type_call);
 
     return attr;
 }
@@ -610,55 +536,14 @@ void FlatDictionary::setAttributeValueImpl<String>(Attribute & attribute, const
 
 void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
 {
-    switch (attribute.type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
-            break;
-        case AttributeUnderlyingType::utInt8:
-            setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt16:
-            setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt32:
-            setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt64:
-            setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
-            break;
-        case AttributeUnderlyingType::utString:
-            setAttributeValueImpl<String>(attribute, id, value.get<String>());
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        setAttributeValueImpl<AttributeType>(attribute, id, value.get<AttributeType>());
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
-            break;
-    }
+    callOnDictionaryAttributeType(attribute.type, type_call);
 }
 
 
@@ -672,21 +557,6 @@ const FlatDictionary::Attribute & FlatDictionary::getAttribute(const std::string
 }
 
 
-template <typename T>
-void FlatDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
-{
-    const auto ids_count = ext::size(ids);
-
-    for (const auto i : ext::range(0, ids_count))
-    {
-        const auto id = ids[i];
-        out[i] = id < loaded_ids.size() && loaded_ids[id];
-    }
-
-    query_count.fetch_add(ids_count, std::memory_order_relaxed);
-}
-
-
 PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
 {
     const auto ids_count = ext::size(loaded_ids);
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index 2f51c1f5c1b..6a6fafe5903 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -3,8 +3,11 @@
 #include <atomic>
 #include <variant>
 #include <vector>
+#include <optional>
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
+#include <Columns/ColumnArray.h>
+#include <DataTypes/IDataType.h>
 #include <Common/Arena.h>
 #include <Core/Block.h>
 #include <ext/range.h>
@@ -69,71 +72,9 @@ public:
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::getByIdentifiers;
 
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void
-    getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
-        const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
+    ColumnPtr get(const std::string& attribute_name, const DataTypePtr & result_type, const ColumnPtr id_column, const ColumnPtr default_untyped) const;
 
     void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
 
@@ -146,6 +87,8 @@ private:
     struct Attribute final
     {
         AttributeUnderlyingType type;
+        // bool is_array;
+
         std::variant<
             UInt8,
             UInt16,
@@ -180,6 +123,8 @@ private:
             ContainerType<Float64>,
             ContainerType<StringRef>>
             arrays;
+
+        std::optional<ContainerType<size_t>> array_offsets;
         std::unique_ptr<Arena> string_arena;
     };
 
@@ -212,9 +157,6 @@ private:
 
     const Attribute & getAttribute(const std::string & attribute_name) const;
 
-    template <typename T>
-    void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
-
     template <typename ChildType, typename AncestorType>
     void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
 
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index b9c4ab138b8..497d18f93e0 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -66,6 +66,8 @@ public:
 
     void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
 
+    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;  
+
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h
index 2009141ebcc..3040579134c 100644
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@@ -61,6 +61,8 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
+    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
+
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/PolygonDictionary.h b/src/Dictionaries/PolygonDictionary.h
index 75114cff435..f22052b59b5 100644
--- a/src/Dictionaries/PolygonDictionary.h
+++ b/src/Dictionaries/PolygonDictionary.h
@@ -80,6 +80,8 @@ public:
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
+    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
+
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h
index 46ae0390b6a..22037fa4718 100644
--- a/src/Dictionaries/RangeHashedDictionary.h
+++ b/src/Dictionaries/RangeHashedDictionary.h
@@ -54,6 +54,8 @@ public:
 
     typedef Int64 RangeStorageType;
 
+    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
+
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h
index 59df778e1f2..0fa344adc6c 100644
--- a/src/Dictionaries/SSDCacheDictionary.h
+++ b/src/Dictionaries/SSDCacheDictionary.h
@@ -354,6 +354,8 @@ public:
     template <typename T>
     using ResultArrayType = SSDCacheStorage::ResultArrayType<T>;
 
+    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
+
 #define DECLARE(TYPE) \
     void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
     DECLARE(UInt8)
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
index 4758d62f1df..b7ef53af054 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
@@ -569,6 +569,8 @@ public:
 
     std::exception_ptr getLastException() const override { return storage.getLastException(); }
 
+    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
+
     template <typename T>
     using ResultArrayType = SSDComplexKeyCacheStorage::ResultArrayType<T>;
 
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 2cbcca734e4..d77d432bbf8 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -259,532 +259,26 @@ private:
 template <typename T>
 static const PaddedPODArray<T> & getColumnDataAsPaddedPODArray(const IColumn & column, PaddedPODArray<T> & backup_storage);
 
-
-class FunctionDictGetString final : public IFunction
+enum class DictionaryGetFunctionType
 {
-public:
-    static constexpr auto name = "dictGetString";
-
-    static FunctionPtr create(const Context & context)
-    {
-        return std::make_shared<FunctionDictGetString>(context);
-    }
-
-    explicit FunctionDictGetString(const Context & context_) : helper(context_) {}
-
-    String getName() const override { return name; }
-
-private:
-    bool isVariadic() const override { return true; }
-    size_t getNumberOfArguments() const override { return 0; }
-
-    bool useDefaultImplementationForConstants() const final { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
-
-    bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override
-    {
-        return helper.isDictGetFunctionInjective(sample_columns);
-    }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        if (arguments.size() != 3 && arguments.size() != 4)
-            throw Exception{"Number of arguments for function " + getName() + " doesn't match: passed "
-                + toString(arguments.size()) + ", should be 3 or 4.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        if (!isString(arguments[0]))
-        {
-            throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName()
-                + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-        }
-
-        if (!isString(arguments[1]))
-        {
-            throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName()
-                + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-        }
-
-        if (!WhichDataType(arguments[2]).isUInt64() &&
-            !isTuple(arguments[2]))
-        {
-            throw Exception{"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName()
-                + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-        }
-
-        /// This is for the case of range dictionaries_loader.
-        if (arguments.size() == 4 && !arguments[3]->isValueRepresentedByInteger())
-        {
-            throw Exception{"Illegal type " + arguments[3]->getName() +
-                            " of fourth argument of function " + getName() +
-                            " must be convertible to Int64.", ErrorCodes::ILLEGAL_COLUMN};
-        }
-
-        return std::make_shared<DataTypeString>();
-    }
-
-    bool isDeterministic() const override { return false; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
-    {
-        if (input_rows_count == 0)
-            return result_type->createColumn();
-
-        auto dict = helper.getDictionary(arguments[0]);
-        ColumnPtr res;
-
-        if (!((res = executeDispatch<FlatDictionary>(arguments, dict))
-            || (res = executeDispatch<HashedDictionary>(arguments, dict))
-            || (res = executeDispatch<DirectDictionary>(arguments, dict))
-            || (res = executeDispatch<CacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatch<SSDCacheDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<ComplexKeyHashedDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyDirectDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyCacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
-#endif
-#if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexCell>(arguments, dict))
-            || (res = executeDispatchRange<RangeHashedDictionary>(arguments, dict))))
-            throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
-
-        return res;
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        if (arguments.size() != 3)
-            throw Exception{"Function " + getName() + " for dictionary of type " + dict->getTypeName() +
-                " requires exactly 3 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const auto * id_col_untyped = arguments[2].column.get();
-        if (const auto * id_col = checkAndGetColumn<ColumnUInt64>(id_col_untyped))
-        {
-            auto out = ColumnString::create();
-            dict->getString(attr_name, id_col->getData(), out.get());
-            return out;
-        }
-        else
-            throw Exception{"Third argument of function " + getName() + " must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchComplex(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        if (arguments.size() != 3)
-            throw Exception{"Function " + getName() + " for dictionary of type " + dict->getTypeName() +
-                " requires exactly 3 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const ColumnWithTypeAndName & key_col_with_type = arguments[2];
-        /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
-        ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst();
-
-        if (checkColumn<ColumnTuple>(key_col.get()))
-        {
-            const auto & key_columns = assert_cast<const ColumnTuple &>(*key_col).getColumnsCopy();
-            const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
-
-            auto out = ColumnString::create();
-            dict->getString(attr_name, key_columns, key_types, out.get());
-            return out;
-        }
-        else
-            throw Exception{"Third argument of function " + getName() + " must be " + dict->getKeyDescription(), ErrorCodes::TYPE_MISMATCH};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchRange(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        if (arguments.size() != 4)
-            throw Exception{"Function " + getName() + " for dictionary of type " + dict->getTypeName() +
-                " requires exactly 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const auto & id_col_untyped = arguments[2].column;
-        const auto & range_col_untyped = arguments[3].column;
-
-        PaddedPODArray<UInt64> id_col_values_storage;
-        PaddedPODArray<Int64> range_col_values_storage;
-        const auto & id_col_values = getColumnDataAsPaddedPODArray(*id_col_untyped, id_col_values_storage);
-        const auto & range_col_values = getColumnDataAsPaddedPODArray(*range_col_untyped, range_col_values_storage);
-
-        auto out = ColumnString::create();
-        dict->getString(attr_name, id_col_values, range_col_values, out.get());
-        return out;
-    }
-
-    mutable FunctionDictHelper helper;
+    withoutDefault,
+    withDefault
 };
 
-
-class FunctionDictGetStringOrDefault final : public IFunction
-{
-public:
-    static constexpr auto name = "dictGetStringOrDefault";
-
-    static FunctionPtr create(const Context & context)
-    {
-        return std::make_shared<FunctionDictGetStringOrDefault>(context);
-    }
-
-    explicit FunctionDictGetStringOrDefault(const Context & context_) : helper(context_) {}
-
-    String getName() const override { return name; }
-
-private:
-    size_t getNumberOfArguments() const override { return 4; }
-
-    bool useDefaultImplementationForConstants() const final { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        if (!isString(arguments[0]))
-            throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() +
-                ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!isString(arguments[1]))
-            throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() +
-                ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!WhichDataType(arguments[2]).isUInt64() &&
-            !isTuple(arguments[2]))
-        {
-            throw Exception{"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName()
-                + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-        }
-
-        if (!isString(arguments[3]))
-            throw Exception{"Illegal type " + arguments[3]->getName() + " of fourth argument of function " + getName() +
-                ", must be String.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        return std::make_shared<DataTypeString>();
-    }
-
-    bool isDeterministic() const override { return false; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
-    {
-        if (input_rows_count == 0)
-            return result_type->createColumn();
-
-        auto dict = helper.getDictionary(arguments[0]);
-
-        ColumnPtr res;
-        if (!((res = executeDispatch<FlatDictionary>(arguments, dict))
-            || (res = executeDispatch<HashedDictionary>(arguments, dict))
-            || (res = executeDispatch<DirectDictionary>(arguments, dict))
-            || (res = executeDispatch<CacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatch<SSDCacheDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<ComplexKeyHashedDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyDirectDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyCacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
-#endif
-#if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexCell>(arguments, dict))))
-            throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
-
-        return res;
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const auto * id_col_untyped = arguments[2].column.get();
-        if (const auto * id_col = checkAndGetColumn<ColumnUInt64>(id_col_untyped))
-            return executeDispatch(arguments, dict, attr_name, id_col);
-        else if (const auto * id_col_const = checkAndGetColumnConst<ColumnVector<UInt64>>(id_col_untyped))
-            return executeDispatch(arguments, dict, attr_name, id_col_const);
-        else
-            throw Exception{"Third argument of function " + getName() + " must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(
-            const ColumnsWithTypeAndName & arguments, const DictionaryType * dict,
-            const std::string & attr_name, const ColumnUInt64 * id_col) const
-    {
-        const auto * default_col_untyped = arguments[3].column.get();
-
-        if (const auto * default_col = checkAndGetColumn<ColumnString>(default_col_untyped))
-        {
-            /// vector ids, vector defaults
-            auto out = ColumnString::create();
-            const auto & ids = id_col->getData();
-            dict->getString(attr_name, ids, default_col, out.get());
-            return out;
-        }
-        else if (const auto * default_col_const = checkAndGetColumnConstStringOrFixedString(default_col_untyped))
-        {
-            /// vector ids, const defaults
-            auto out = ColumnString::create();
-            const auto & ids = id_col->getData();
-            String def = default_col_const->getValue<String>();
-            dict->getString(attr_name, ids, def, out.get());
-            return out;
-        }
-        else
-            throw Exception{"Fourth argument of function " + getName() + " must be String", ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(
-            const ColumnsWithTypeAndName & arguments, const DictionaryType * dict,
-            const std::string & attr_name, const ColumnConst * id_col) const
-    {
-        const auto * default_col_untyped = arguments[3].column.get();
-
-        if (const auto * default_col = checkAndGetColumn<ColumnString>(default_col_untyped))
-        {
-            /// const ids, vector defaults
-            const PaddedPODArray<UInt64> ids(1, id_col->getValue<UInt64>());
-            PaddedPODArray<UInt8> flags(1);
-            dict->has(ids, flags);
-            if (flags.front())
-            {
-                auto out = ColumnString::create();
-                dict->getString(attr_name, ids, String(), out.get());
-                return DataTypeString().createColumnConst(id_col->size(), out->getDataAt(0).toString());
-            }
-            else
-                return arguments[3].column; // reuse the default column
-        }
-        else if (const auto * default_col_const = checkAndGetColumnConstStringOrFixedString(default_col_untyped))
-        {
-            /// const ids, const defaults
-            const PaddedPODArray<UInt64> ids(1, id_col->getValue<UInt64>());
-            auto out = ColumnString::create();
-            String def = default_col_const->getValue<String>();
-            dict->getString(attr_name, ids, def, out.get());
-            return DataTypeString().createColumnConst(id_col->size(), out->getDataAt(0).toString());
-        }
-        else
-            throw Exception{"Fourth argument of function " + getName() + " must be String", ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchComplex(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const ColumnWithTypeAndName & key_col_with_type = arguments[2];
-        /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
-        ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst();
-
-        const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_col).getColumnsCopy();
-        const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
-
-        auto out = ColumnString::create();
-
-        const auto * default_col_untyped = arguments[3].column.get();
-        if (const auto * default_col = checkAndGetColumn<ColumnString>(default_col_untyped))
-        {
-            dict->getString(attr_name, key_columns, key_types, default_col, out.get());
-        }
-        else if (const auto * default_col_const = checkAndGetColumnConstStringOrFixedString(default_col_untyped))
-        {
-            String def = default_col_const->getValue<String>();
-            dict->getString(attr_name, key_columns, key_types, def, out.get());
-        }
-        else
-            throw Exception{"Fourth argument of function " + getName() + " must be String", ErrorCodes::ILLEGAL_COLUMN};
-
-        return out;
-    }
-
-    mutable FunctionDictHelper helper;
-};
-
-
-template <typename DataType> struct DictGetTraits;
-#define DECLARE_DICT_GET_TRAITS(TYPE, DATA_TYPE) \
-template <> struct DictGetTraits<DATA_TYPE>\
-{\
-    template <typename DictionaryType>\
-    static void get(\
-        const DictionaryType * dict, const std::string & name, const PaddedPODArray<UInt64> & ids,\
-        PaddedPODArray<TYPE> & out)\
-    {\
-        dict->get##TYPE(name, ids, out);\
-    }\
-    template <typename DictionaryType>\
-    static void get(\
-        const DictionaryType * dict, const std::string & name, const Columns & key_columns,\
-        const DataTypes & key_types, PaddedPODArray<TYPE> & out)\
-    {\
-        dict->get##TYPE(name, key_columns, key_types, out);\
-    }\
-    template <typename DictionaryType>\
-    static void get(\
-        const DictionaryType * dict, const std::string & name, const PaddedPODArray<UInt64> & ids,\
-        const PaddedPODArray<Int64> & dates, PaddedPODArray<TYPE> & out)\
-    {\
-        dict->get##TYPE(name, ids, dates, out);\
-    }\
-    template <typename DictionaryType, typename DefaultsType>\
-    static void getOrDefault(\
-        const DictionaryType * dict, const std::string & name, const PaddedPODArray<UInt64> & ids,\
-        const DefaultsType & def, PaddedPODArray<TYPE> & out)\
-    {\
-        dict->get##TYPE(name, ids, def, out);\
-    }\
-    template <typename DictionaryType, typename DefaultsType>\
-    static void getOrDefault(\
-        const DictionaryType * dict, const std::string & name, const Columns & key_columns,\
-        const DataTypes & key_types, const DefaultsType & def, PaddedPODArray<TYPE> & out)\
-    {\
-        dict->get##TYPE(name, key_columns, key_types, def, out);\
-    }\
-};
-DECLARE_DICT_GET_TRAITS(UInt8, DataTypeUInt8)
-DECLARE_DICT_GET_TRAITS(UInt16, DataTypeUInt16)
-DECLARE_DICT_GET_TRAITS(UInt32, DataTypeUInt32)
-DECLARE_DICT_GET_TRAITS(UInt64, DataTypeUInt64)
-DECLARE_DICT_GET_TRAITS(Int8, DataTypeInt8)
-DECLARE_DICT_GET_TRAITS(Int16, DataTypeInt16)
-DECLARE_DICT_GET_TRAITS(Int32, DataTypeInt32)
-DECLARE_DICT_GET_TRAITS(Int64, DataTypeInt64)
-DECLARE_DICT_GET_TRAITS(Float32, DataTypeFloat32)
-DECLARE_DICT_GET_TRAITS(Float64, DataTypeFloat64)
-DECLARE_DICT_GET_TRAITS(UInt16, DataTypeDate)
-DECLARE_DICT_GET_TRAITS(UInt32, DataTypeDateTime)
-DECLARE_DICT_GET_TRAITS(UInt128, DataTypeUUID)
-#undef DECLARE_DICT_GET_TRAITS
-
-template <typename T> struct DictGetTraits<DataTypeDecimal<T>>
-{
-    static constexpr bool is_dec32 = std::is_same_v<T, Decimal32>;
-    static constexpr bool is_dec64 = std::is_same_v<T, Decimal64>;
-    static constexpr bool is_dec128 = std::is_same_v<T, Decimal128>;
-
-    template <typename DictionaryType>
-    static void get(const DictionaryType * dict, const std::string & name, const PaddedPODArray<UInt64> & ids,
-                    DecimalPaddedPODArray<T> & out)
-    {
-        if constexpr (is_dec32) dict->getDecimal32(name, ids, out);
-        if constexpr (is_dec64) dict->getDecimal64(name, ids, out);
-        if constexpr (is_dec128) dict->getDecimal128(name, ids, out);
-    }
-
-    template <typename DictionaryType>
-    static void get(const DictionaryType * dict, const std::string & name, const Columns & key_columns, const DataTypes & key_types,
-                    DecimalPaddedPODArray<T> & out)
-    {
-        if constexpr (is_dec32) dict->getDecimal32(name, key_columns, key_types, out);
-        if constexpr (is_dec64) dict->getDecimal64(name, key_columns, key_types, out);
-        if constexpr (is_dec128) dict->getDecimal128(name, key_columns, key_types, out);
-    }
-
-    template <typename DictionaryType>
-    static void get(const DictionaryType * dict, const std::string & name, const PaddedPODArray<UInt64> & ids,
-                    const PaddedPODArray<Int64> & dates, DecimalPaddedPODArray<T> & out)
-    {
-        if constexpr (is_dec32) dict->getDecimal32(name, ids, dates, out);
-        if constexpr (is_dec64) dict->getDecimal64(name, ids, dates, out);
-        if constexpr (is_dec128) dict->getDecimal128(name, ids, dates, out);
-    }
-
-    template <typename DictionaryType, typename DefaultsType>
-    static void getOrDefault(const DictionaryType * dict, const std::string & name, const PaddedPODArray<UInt64> & ids,
-                    const DefaultsType & def, DecimalPaddedPODArray<T> & out)
-    {
-        if constexpr (is_dec32) dict->getDecimal32(name, ids, def, out);
-        if constexpr (is_dec64) dict->getDecimal64(name, ids, def, out);
-        if constexpr (is_dec128) dict->getDecimal128(name, ids, def, out);
-    }
-
-    template <typename DictionaryType, typename DefaultsType>
-    static void getOrDefault(const DictionaryType * dict, const std::string & name, const Columns & key_columns,
-                    const DataTypes & key_types, const DefaultsType & def, DecimalPaddedPODArray<T> & out)
-    {
-        if constexpr (is_dec32) dict->getDecimal32(name, key_columns, key_types, def, out);
-        if constexpr (is_dec64) dict->getDecimal64(name, key_columns, key_types, def, out);
-        if constexpr (is_dec128) dict->getDecimal128(name, key_columns, key_types, def, out);
-    }
-};
-
-
-template <typename DataType, typename Name>
-class FunctionDictGet final : public IFunction
+template <typename DataType, typename Name, DictionaryGetFunctionType dictionary_get_function_type>
+class FunctionDictGetImpl final : public IFunction
 {
     using Type = typename DataType::FieldType;
-    using ColVec = std::conditional_t<IsDecimalNumber<Type>, ColumnDecimal<Type>, ColumnVector<Type>>;
 
 public:
     static constexpr auto name = Name::name;
 
     static FunctionPtr create(const Context & context, UInt32 dec_scale = 0)
     {
-        return std::make_shared<FunctionDictGet>(context, dec_scale);
+        return std::make_shared<FunctionDictGetImpl>(context, dec_scale);
     }
 
-    explicit FunctionDictGet(const Context & context_, UInt32 dec_scale = 0)
+    explicit FunctionDictGetImpl(const Context & context_, UInt32 dec_scale = 0)
         : helper(context_)
         , decimal_scale(dec_scale)
     {}
@@ -792,22 +286,21 @@ public:
     String getName() const override { return name; }
 
 private:
-    bool isVariadic() const override { return true; }
-    size_t getNumberOfArguments() const override { return 0; }
+    size_t getNumberOfArguments() const override {
+        /// TODO: Check if ranged dictionary is working
+        return dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault ? 0 : 4; 
+    }
+
+    bool isVariadic() const override { return dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault; }
 
     bool useDefaultImplementationForConstants() const final { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
 
-    bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override
-    {
-        return helper.isDictGetFunctionInjective(sample_columns);
-    }
+    bool isDeterministic() const override { return false; }
+
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
 
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
-        if (arguments.size() != 3 && arguments.size() != 4)
-            throw Exception{"Function " + getName() + " takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
         if (!isString(arguments[0]))
             throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName()
                 + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
@@ -821,14 +314,21 @@ private:
             throw Exception{"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName()
                 + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
 
-        if (arguments.size() == 4)
+        if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::withDefault)
         {
-            const auto * range_argument = arguments[3].get();
-            if (!(range_argument->isValueRepresentedByInteger() &&
-                   range_argument->getSizeOfValueInMemory() <= sizeof(Int64)))
-                throw Exception{"Illegal type " + range_argument->getName() + " of fourth argument of function " + getName()
-                    + ", must be convertible to " + TypeName<Int64>::get() + ".",
-                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+            if (!checkAndGetDataType<DataType>(arguments[3].get()))
+                throw Exception{"Illegal type " + arguments[3]->getName() + " of fourth argument of function " + getName()
+                    + ", must be " + TypeName<Type>::get() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+        }
+        else
+        {
+            /// This is for the case of range dictionaries_loader.
+            if (arguments.size() == 4 && !arguments[3]->isValueRepresentedByInteger())
+            {
+                throw Exception{"Illegal type " + arguments[3]->getName() +
+                                " of fourth argument of function " + getName() +
+                                " must be convertible to Int64.", ErrorCodes::ILLEGAL_COLUMN};
+            }
         }
 
         if constexpr (IsDataTypeDecimal<DataType>)
@@ -837,174 +337,88 @@ private:
             return std::make_shared<DataType>();
     }
 
-    bool isDeterministic() const override { return false; }
-
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
     {
         if (input_rows_count == 0)
             return result_type->createColumn();
 
         auto dict = helper.getDictionary(arguments[0]);
-
         ColumnPtr res;
-        if (!((res = executeDispatch<FlatDictionary>(arguments, dict))
-            || (res = executeDispatch<HashedDictionary>(arguments, dict))
-            || (res = executeDispatch<DirectDictionary>(arguments, dict))
-            || (res = executeDispatch<CacheDictionary>(arguments, dict))
+
+        if (!((res = executeDispatch<FlatDictionary>(arguments, result_type, dict))
+            || (res = executeDispatch<HashedDictionary>(arguments, result_type, dict))
+            || (res = executeDispatch<DirectDictionary>(arguments, result_type, dict))
+            || (res = executeDispatch<CacheDictionary>(arguments, result_type, dict))
 #if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatch<SSDCacheDictionary>(arguments, dict))
+            || (res = executeDispatch<SSDCacheDictionary>(arguments, result_type, dict))
 #endif
-            || (res = executeDispatchComplex<ComplexKeyHashedDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyDirectDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyCacheDictionary>(arguments, dict))
+            || (res = executeDispatch<ComplexKeyHashedDictionary>(arguments, result_type, dict))
+            || (res = executeDispatch<ComplexKeyDirectDictionary>(arguments, result_type, dict))
+            || (res = executeDispatch<ComplexKeyCacheDictionary>(arguments, result_type, dict))
 #if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
+            || (res = executeDispatch<SSDComplexKeyCacheDictionary>(arguments, result_type, dict))
 #endif
 #if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
+            || (res = executeDispatch<IPAddressDictionary>(arguments, result_type, dict))
 #endif
-            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexCell>(arguments, dict))
-            || (res = executeDispatchRange<RangeHashedDictionary>(arguments, dict))))
+            || (res = executeDispatch<PolygonDictionarySimple>(arguments, result_type, dict))
+            || (res = executeDispatch<PolygonDictionaryIndexEach>(arguments, result_type, dict))
+            || (res = executeDispatch<PolygonDictionaryIndexCell>(arguments, result_type, dict))
+            || (res = executeDispatch<RangeHashedDictionary>(arguments, result_type, dict))))
             throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
 
         return res;
     }
 
     template <typename DictionaryType>
-    ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
+    ColumnPtr executeDispatch(
+        const ColumnsWithTypeAndName & arguments,
+        const DataTypePtr & result_type,
+        const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
     {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
+        const auto * dictionary = typeid_cast<const DictionaryType *>(dict_ptr.get());
+        if (!dictionary)
             return nullptr;
 
-        if (arguments.size() != 3)
-            throw Exception{"Function " + getName() + " for dictionary of type " + dict->getTypeName() +
-                " requires exactly 3 arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
         const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
         if (!attr_name_col)
             throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
 
         String attr_name = attr_name_col->getValue<String>();
 
-        const auto * id_col_untyped = arguments[2].column.get();
-        if (const auto * id_col = checkAndGetColumn<ColumnUInt64>(id_col_untyped))
-        {
-            typename ColVec::MutablePtr out;
-            if constexpr (IsDataTypeDecimal<DataType>)
-                out = ColVec::create(id_col->size(), decimal_scale);
-            else
-                out = ColVec::create(id_col->size());
-            const auto & ids = id_col->getData();
-            auto & data = out->getData();
-            DictGetTraits<DataType>::get(dict, attr_name, ids, data);
-            return out;
-        }
-        else if (const auto * id_col_const = checkAndGetColumnConst<ColumnVector<UInt64>>(id_col_untyped))
-        {
-            const PaddedPODArray<UInt64> ids(1, id_col_const->getValue<UInt64>());
+        const auto id_col_untyped = arguments[2].column;
 
-            if constexpr (IsDataTypeDecimal<DataType>)
+        constexpr auto dictionary_get_by_type = DictionaryType::get_by_type;
+
+        if constexpr (dictionary_get_by_type == DictionaryGetByType::getByIdentifiers)
+        {
+            if (dictionary_get_function_type == DictionaryGetFunctionType::withDefault)
             {
-                DecimalPaddedPODArray<Type> data(1, decimal_scale);
-                DictGetTraits<DataType>::get(dict, attr_name, ids, data);
-                return DataType(DataType::maxPrecision(), decimal_scale).createColumnConst(
-                        id_col_const->size(), toField(data.front(), decimal_scale));
+                const auto default_col_untyped = arguments[3].column;
+                return dictionary->get(attr_name, result_type, id_col_untyped, default_col_untyped);
             }
             else
             {
-                PaddedPODArray<Type> data(1);
-                DictGetTraits<DataType>::get(dict, attr_name, ids, data);
-                return DataTypeNumber<Type>().createColumnConst(id_col_const->size(), toField(data.front()));
+                return dictionary->get(attr_name, result_type, id_col_untyped, nullptr);
             }
         }
-        else
-            throw Exception{"Third argument of function " + getName() + " must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchComplex(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        if (arguments.size() != 3)
-            throw Exception{"Function " + getName() + " for dictionary of type " + dict->getTypeName() +
-                " requires exactly 3 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const ColumnWithTypeAndName & key_col_with_type = arguments[2];
-
-        /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
-        ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst();
-
-        if (checkColumn<ColumnTuple>(key_col.get()))
+        else if constexpr (dictionary_get_by_type == DictionaryGetByType::getByComplexKeys)
         {
-            const auto & key_columns = assert_cast<const ColumnTuple &>(*key_col).getColumnsCopy();
-            const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
-
-            typename ColVec::MutablePtr out;
-            if constexpr (IsDataTypeDecimal<DataType>)
-                out = ColVec::create(key_columns.front()->size(), decimal_scale);
-            else
-                out = ColVec::create(key_columns.front()->size());
-            auto & data = out->getData();
-            DictGetTraits<DataType>::get(dict, attr_name, key_columns, key_types, data);
-            return out;
+            return nullptr;
         }
         else
-            throw Exception{"Third argument of function " + getName() + " must be " + dict->getKeyDescription(), ErrorCodes::TYPE_MISMATCH};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchRange(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
+        {
             return nullptr;
-
-        if (arguments.size() != 4)
-            throw Exception{"Function " + getName() + " for dictionary of type " + dict->getTypeName() +
-                " requires exactly 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const auto & id_col_untyped = arguments[2].column;
-        const auto & range_col_untyped = arguments[3].column;
-
-        PaddedPODArray<UInt64> id_col_values_storage;
-        PaddedPODArray<Int64> range_col_values_storage;
-        const auto & id_col_values = getColumnDataAsPaddedPODArray(*id_col_untyped, id_col_values_storage);
-        const auto & range_col_values = getColumnDataAsPaddedPODArray(*range_col_untyped, range_col_values_storage);
-
-        typename ColVec::MutablePtr out;
-        if constexpr (IsDataTypeDecimal<DataType>)
-            out = ColVec::create(id_col_untyped->size(), decimal_scale);
-        else
-            out = ColVec::create(id_col_untyped->size());
-        auto & data = out->getData();
-        DictGetTraits<DataType>::get(dict, attr_name, id_col_values, range_col_values, data);
-        return out;
+        }
     }
 
     mutable FunctionDictHelper helper;
     UInt32 decimal_scale;
 };
 
+template<typename DataType, typename Name>
+using FunctionDictGet = FunctionDictGetImpl<DataType, Name, DictionaryGetFunctionType::withoutDefault>;
+
 struct NameDictGetUInt8 { static constexpr auto name = "dictGetUInt8"; };
 struct NameDictGetUInt16 { static constexpr auto name = "dictGetUInt16"; };
 struct NameDictGetUInt32 { static constexpr auto name = "dictGetUInt32"; };
@@ -1021,6 +435,7 @@ struct NameDictGetUUID { static constexpr auto name = "dictGetUUID"; };
 struct NameDictGetDecimal32 { static constexpr auto name = "dictGetDecimal32"; };
 struct NameDictGetDecimal64 { static constexpr auto name = "dictGetDecimal64"; };
 struct NameDictGetDecimal128 { static constexpr auto name = "dictGetDecimal128"; };
+struct NameDictGetString { static constexpr auto name = "dictGetString"; };
 
 using FunctionDictGetUInt8 = FunctionDictGet<DataTypeUInt8, NameDictGetUInt8>;
 using FunctionDictGetUInt16 = FunctionDictGet<DataTypeUInt16, NameDictGetUInt16>;
@@ -1038,266 +453,10 @@ using FunctionDictGetUUID = FunctionDictGet<DataTypeUUID, NameDictGetUUID>;
 using FunctionDictGetDecimal32 = FunctionDictGet<DataTypeDecimal<Decimal32>, NameDictGetDecimal32>;
 using FunctionDictGetDecimal64 = FunctionDictGet<DataTypeDecimal<Decimal64>, NameDictGetDecimal64>;
 using FunctionDictGetDecimal128 = FunctionDictGet<DataTypeDecimal<Decimal128>, NameDictGetDecimal128>;
+using FunctionDictGetString = FunctionDictGet<DataTypeString, NameDictGetString>;
 
-
-template <typename DataType, typename Name>
-class FunctionDictGetOrDefault final : public IFunction
-{
-    using Type = typename DataType::FieldType;
-    using ColVec = std::conditional_t<IsDecimalNumber<Type>, ColumnDecimal<Type>, ColumnVector<Type>>;
-
-public:
-    static constexpr auto name = Name::name;
-
-    static FunctionPtr create(const Context & context, UInt32 dec_scale = 0)
-    {
-        return std::make_shared<FunctionDictGetOrDefault>(context, dec_scale);
-    }
-
-    explicit FunctionDictGetOrDefault(const Context & context_, UInt32 dec_scale = 0)
-        : helper(context_)
-        , decimal_scale(dec_scale)
-    {}
-
-    String getName() const override { return name; }
-
-private:
-    size_t getNumberOfArguments() const override { return 4; }
-
-    bool useDefaultImplementationForConstants() const final { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        if (!isString(arguments[0]))
-            throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName()
-                + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!isString(arguments[1]))
-            throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName()
-                + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!WhichDataType(arguments[2]).isUInt64() &&
-            !isTuple(arguments[2]))
-            throw Exception{"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName()
-                + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!checkAndGetDataType<DataType>(arguments[3].get()))
-            throw Exception{"Illegal type " + arguments[3]->getName() + " of fourth argument of function " + getName()
-                + ", must be " + TypeName<Type>::get() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if constexpr (IsDataTypeDecimal<DataType>)
-            return std::make_shared<DataType>(DataType::maxPrecision(), decimal_scale);
-        else
-            return std::make_shared<DataType>();
-    }
-
-    bool isDeterministic() const override { return false; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
-    {
-        if (input_rows_count == 0)
-            return result_type->createColumn();
-
-        auto dict = helper.getDictionary(arguments[0]);
-        ColumnPtr res;
-
-        if (!((res = executeDispatch<FlatDictionary>(arguments, dict))
-            || (res = executeDispatch<HashedDictionary>(arguments, dict))
-            || (res = executeDispatch<DirectDictionary>(arguments, dict))
-            || (res = executeDispatch<CacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatch<SSDCacheDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<ComplexKeyHashedDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyDirectDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyCacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
-#endif
-#if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexCell>(arguments, dict))))
-            throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
-
-        return res;
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const auto * id_col_untyped = arguments[2].column.get();
-        if (const auto * id_col = checkAndGetColumn<ColumnUInt64>(id_col_untyped))
-            return executeDispatch(arguments, dict, attr_name, id_col);
-        else if (const auto * id_col_const = checkAndGetColumnConst<ColumnVector<UInt64>>(id_col_untyped))
-            return executeDispatch(arguments, dict, attr_name, id_col_const);
-        else
-            throw Exception{"Third argument of function " + getName() + " must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(
-            const ColumnsWithTypeAndName & arguments, const DictionaryType * dict,
-            const std::string & attr_name, const ColumnUInt64 * id_col) const
-    {
-        const auto * default_col_untyped = arguments[3].column.get();
-
-        if (const auto default_col = checkAndGetColumn<ColVec>(default_col_untyped))
-        {
-            /// vector ids, vector defaults
-            typename ColVec::MutablePtr out;
-            if constexpr (IsDataTypeDecimal<DataType>)
-                out = ColVec::create(id_col->size(), decimal_scale);
-            else
-                out = ColVec::create(id_col->size());
-            const auto & ids = id_col->getData();
-            auto & data = out->getData();
-            const auto & defs = default_col->getData();
-            DictGetTraits<DataType>::getOrDefault(dict, attr_name, ids, defs, data);
-            return out;
-        }
-        else if (const auto default_col_const = checkAndGetColumnConst<ColVec>(default_col_untyped))
-        {
-            /// vector ids, const defaults
-            typename ColVec::MutablePtr out;
-            if constexpr (IsDataTypeDecimal<DataType>)
-                out = ColVec::create(id_col->size(), decimal_scale);
-            else
-                out = ColVec::create(id_col->size());
-            const auto & ids = id_col->getData();
-            auto & data = out->getData();
-            const auto def = default_col_const->template getValue<Type>();
-            DictGetTraits<DataType>::getOrDefault(dict, attr_name, ids, def, data);
-            return out;
-        }
-        else
-            throw Exception{"Fourth argument of function " + getName() + " must be " + TypeName<Type>::get(), ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(
-            const ColumnsWithTypeAndName & arguments, const DictionaryType * dict,
-            const std::string & attr_name, const ColumnConst * id_col) const
-    {
-        const auto * default_col_untyped = arguments[3].column.get();
-
-        if (const auto default_col = checkAndGetColumn<ColVec>(default_col_untyped))
-        {
-            /// const ids, vector defaults
-            const PaddedPODArray<UInt64> ids(1, id_col->getValue<UInt64>());
-            PaddedPODArray<UInt8> flags(1);
-            dict->has(ids, flags);
-            if (flags.front())
-            {
-                if constexpr (IsDataTypeDecimal<DataType>)
-                {
-                    DecimalPaddedPODArray<Type> data(1, decimal_scale);
-                    DictGetTraits<DataType>::getOrDefault(dict, attr_name, ids, Type(), data);
-                    return DataType(DataType::maxPrecision(), decimal_scale).createColumnConst(
-                            id_col->size(), toField(data.front(), decimal_scale));
-                }
-                else
-                {
-                    PaddedPODArray<Type> data(1);
-                    DictGetTraits<DataType>::getOrDefault(dict, attr_name, ids, Type(), data);
-                    return DataType().createColumnConst(id_col->size(), toField(data.front()));
-                }
-            }
-            else
-                return arguments[3].column; // reuse the default column
-        }
-        else if (const auto default_col_const = checkAndGetColumnConst<ColVec>(default_col_untyped))
-        {
-            /// const ids, const defaults
-            const PaddedPODArray<UInt64> ids(1, id_col->getValue<UInt64>());
-
-            if constexpr (IsDataTypeDecimal<DataType>)
-            {
-                DecimalPaddedPODArray<Type> data(1, decimal_scale);
-                const auto & def = default_col_const->template getValue<Type>();
-                DictGetTraits<DataType>::getOrDefault(dict, attr_name, ids, def, data);
-                return DataType(DataType::maxPrecision(), decimal_scale).createColumnConst(
-                        id_col->size(), toField(data.front(), decimal_scale));
-            }
-            else
-            {
-                PaddedPODArray<Type> data(1);
-                const auto & def = default_col_const->template getValue<Type>();
-                DictGetTraits<DataType>::getOrDefault(dict, attr_name, ids, def, data);
-                return DataType().createColumnConst(id_col->size(), toField(data.front()));
-            }
-        }
-        else
-            throw Exception{"Fourth argument of function " + getName() + " must be " + TypeName<Type>::get(), ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchComplex(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
-        const ColumnWithTypeAndName & key_col_with_type = arguments[2];
-
-        /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
-        ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst();
-
-        const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_col).getColumnsCopy();
-        const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
-
-        /// @todo detect when all key columns are constant
-        const auto rows = key_col->size();
-        typename ColVec::MutablePtr out;
-        if constexpr (IsDataTypeDecimal<DataType>)
-            out = ColVec::create(rows, decimal_scale);
-        else
-            out = ColVec::create(rows);
-        auto & data = out->getData();
-
-        const auto * default_col_untyped = arguments[3].column.get();
-        if (const auto default_col = checkAndGetColumn<ColVec>(default_col_untyped))
-        {
-            /// const defaults
-            const auto & defs = default_col->getData();
-
-            DictGetTraits<DataType>::getOrDefault(dict, attr_name, key_columns, key_types, defs, data);
-        }
-        else if (const auto default_col_const = checkAndGetColumnConst<ColVec>(default_col_untyped))
-        {
-            const auto def = default_col_const->template getValue<Type>();
-
-            DictGetTraits<DataType>::getOrDefault(dict, attr_name, key_columns, key_types, def, data);
-        }
-        else
-            throw Exception{"Fourth argument of function " + getName() + " must be " + TypeName<Type>::get(), ErrorCodes::ILLEGAL_COLUMN};
-
-        return out;
-    }
-
-    mutable FunctionDictHelper helper;
-    UInt32 decimal_scale;
-};
+template<typename DataType, typename Name>
+using FunctionDictGetOrDefault = FunctionDictGetImpl<DataType, Name, DictionaryGetFunctionType::withDefault>;
 
 struct NameDictGetUInt8OrDefault { static constexpr auto name = "dictGetUInt8OrDefault"; };
 struct NameDictGetUInt16OrDefault { static constexpr auto name = "dictGetUInt16OrDefault"; };
@@ -1315,6 +474,7 @@ struct NameDictGetUUIDOrDefault { static constexpr auto name = "dictGetUUIDOrDef
 struct NameDictGetDecimal32OrDefault { static constexpr auto name = "dictGetDecimal32OrDefault"; };
 struct NameDictGetDecimal64OrDefault { static constexpr auto name = "dictGetDecimal64OrDefault"; };
 struct NameDictGetDecimal128OrDefault { static constexpr auto name = "dictGetDecimal128OrDefault"; };
+struct NameDictGetStringOrDefault { static constexpr auto name = "dictGetStringOrDefault"; };
 
 using FunctionDictGetUInt8OrDefault = FunctionDictGetOrDefault<DataTypeUInt8, NameDictGetUInt8OrDefault>;
 using FunctionDictGetUInt16OrDefault = FunctionDictGetOrDefault<DataTypeUInt16, NameDictGetUInt16OrDefault>;
@@ -1332,8 +492,10 @@ using FunctionDictGetUUIDOrDefault = FunctionDictGetOrDefault<DataTypeUUID, Name
 using FunctionDictGetDecimal32OrDefault = FunctionDictGetOrDefault<DataTypeDecimal<Decimal32>, NameDictGetDecimal32OrDefault>;
 using FunctionDictGetDecimal64OrDefault = FunctionDictGetOrDefault<DataTypeDecimal<Decimal64>, NameDictGetDecimal64OrDefault>;
 using FunctionDictGetDecimal128OrDefault = FunctionDictGetOrDefault<DataTypeDecimal<Decimal128>, NameDictGetDecimal128OrDefault>;
+using FunctionDictGetStringOrDefault = FunctionDictGetOrDefault<DataTypeString, NameDictGetStringOrDefault>;
 
 
+/// TODO: Use new API
 /// This variant of function derives the result type automatically.
 class FunctionDictGetNoType final : public IFunction
 {

From d16a572eeecfa383fc736a49900c7c63fb7015d6 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sat, 19 Dec 2020 16:24:51 +0300
Subject: [PATCH 0213/1238] Updated IDictionaryBase interface

---
 src/Dictionaries/CacheDictionary.cpp          |   2 +-
 src/Dictionaries/CacheDictionary.h            |   4 +-
 .../ComplexKeyCacheDictionary.cpp             |   2 +-
 src/Dictionaries/ComplexKeyCacheDictionary.h  |   2 -
 src/Dictionaries/ComplexKeyDirectDictionary.h |   2 -
 .../ComplexKeyHashedDictionary.cpp            |   2 +-
 src/Dictionaries/ComplexKeyHashedDictionary.h |   2 -
 src/Dictionaries/DictionaryBlockInputStream.h |  56 ++++----
 src/Dictionaries/DirectDictionary.h           |   4 +-
 src/Dictionaries/FlatDictionary.cpp           |  64 ++++++---
 src/Dictionaries/FlatDictionary.h             |  15 +-
 src/Dictionaries/HashedDictionary.cpp         |   2 +-
 src/Dictionaries/HashedDictionary.h           |   4 +-
 src/Dictionaries/IDictionary.h                |  35 ++++-
 src/Dictionaries/IPAddressDictionary.cpp      |   2 +-
 src/Dictionaries/IPAddressDictionary.h        |   2 -
 src/Dictionaries/PolygonDictionary.h          |   2 -
 src/Dictionaries/RangeHashedDictionary.h      |   2 -
 src/Dictionaries/SSDCacheDictionary.cpp       |   2 +-
 src/Dictionaries/SSDCacheDictionary.h         |   4 +-
 .../SSDComplexKeyCacheDictionary.h            |   2 -
 src/Functions/FunctionsExternalDictionaries.h | 129 +++---------------
 22 files changed, 147 insertions(+), 194 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index 4beb2caa1f1..1a0d7e28fea 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -707,7 +707,7 @@ PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
 
 BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
 {
-    using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
+    using BlockInputStreamType = DictionaryBlockInputStream<Key>;
     return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
 }
 
diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h
index f2f364af9da..d242c6bc3eb 100644
--- a/src/Dictionaries/CacheDictionary.h
+++ b/src/Dictionaries/CacheDictionary.h
@@ -119,8 +119,6 @@ public:
 
     std::exception_ptr getLastException() const override;
 
-    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
-
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
@@ -190,7 +188,7 @@ public:
 
     void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
 
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
+    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index 0c517699272..fefa2a44aaf 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -388,7 +388,7 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names &
                 keys.push_back(cells[idx].key);
     }
 
-    using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyCacheDictionary, UInt64>;
+    using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
     return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, keys, column_names);
 }
 
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.h b/src/Dictionaries/ComplexKeyCacheDictionary.h
index 36a1457570c..2663fee266d 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.h
@@ -89,8 +89,6 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
-
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.h b/src/Dictionaries/ComplexKeyDirectDictionary.h
index 670f2ac0a85..dc602be103f 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.h
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.h
@@ -60,8 +60,6 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
-
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index 676196fabd2..e5a9aa13f08 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -735,7 +735,7 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
 
 BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
 {
-    using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyHashedDictionary, UInt64>;
+    using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
     return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeys(), column_names);
 }
 
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.h b/src/Dictionaries/ComplexKeyHashedDictionary.h
index 82677458298..baf6628eebd 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.h
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.h
@@ -60,8 +60,6 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
-
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/DictionaryBlockInputStream.h b/src/Dictionaries/DictionaryBlockInputStream.h
index 96d5fac966f..53b513b5d33 100644
--- a/src/Dictionaries/DictionaryBlockInputStream.h
+++ b/src/Dictionaries/DictionaryBlockInputStream.h
@@ -25,12 +25,10 @@ namespace ErrorCodes
 /* BlockInputStream implementation for external dictionaries
  * read() returns blocks consisting of the in-memory contents of the dictionaries
  */
-template <typename DictionaryType, typename Key>
+template <typename Key>
 class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase
 {
 public:
-    using DictionaryPtr = std::shared_ptr<DictionaryType const>;
-
     DictionaryBlockInputStream(
         std::shared_ptr<const IDictionaryBase> dictionary, UInt64 max_block_size, PaddedPODArray<Key> && ids, const Names & column_names);
 
@@ -72,7 +70,7 @@ private:
         const DictionaryStructure & dictionary_structure,
         ColumnsWithTypeAndName & columns) const;
 
-    DictionaryPtr dictionary;
+    std::shared_ptr<const IDictionaryBase> dictionary;
     Names column_names;
     PaddedPODArray<Key> ids;
     ColumnsWithTypeAndName key_columns;
@@ -92,25 +90,25 @@ private:
 };
 
 
-template <typename DictionaryType, typename Key>
-DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
+template <typename Key>
+DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
     std::shared_ptr<const IDictionaryBase> dictionary_, UInt64 max_block_size_, PaddedPODArray<Key> && ids_, const Names & column_names_)
     : DictionaryBlockInputStreamBase(ids_.size(), max_block_size_)
-    , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
+    , dictionary(dictionary_)
     , column_names(column_names_)
     , ids(std::move(ids_))
     , key_type(DictionaryKeyType::Id)
 {
 }
 
-template <typename DictionaryType, typename Key>
-DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
+template <typename Key>
+DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
     std::shared_ptr<const IDictionaryBase> dictionary_,
     UInt64 max_block_size_,
     const std::vector<StringRef> & keys,
     const Names & column_names_)
     : DictionaryBlockInputStreamBase(keys.size(), max_block_size_)
-    , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
+    , dictionary(dictionary_)
     , column_names(column_names_)
     , key_type(DictionaryKeyType::ComplexKey)
 {
@@ -118,8 +116,8 @@ DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
     fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns);
 }
 
-template <typename DictionaryType, typename Key>
-DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
+template <typename Key>
+DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
     std::shared_ptr<const IDictionaryBase> dictionary_,
     UInt64 max_block_size_,
     const Columns & data_columns_,
@@ -127,19 +125,20 @@ DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
     GetColumnsFunction && get_key_columns_function_,
     GetColumnsFunction && get_view_columns_function_)
     : DictionaryBlockInputStreamBase(data_columns_.front()->size(), max_block_size_)
-    , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
+    , dictionary(dictionary_)
     , column_names(column_names_)
     , data_columns(data_columns_)
-    , get_key_columns_function(get_key_columns_function_)
-    , get_view_columns_function(get_view_columns_function_)
+    , get_key_columns_function(std::move(get_key_columns_function_))
+    , get_view_columns_function(std::move(get_view_columns_function_))
     , key_type(DictionaryKeyType::Callback)
 {
 }
 
 
-template <typename DictionaryType, typename Key>
-Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
+template <typename Key>
+Block DictionaryBlockInputStream<Key>::getBlock(size_t start, size_t length) const
 {
+    /// TODO: Rewrite
     switch (key_type)
     {
         case DictionaryKeyType::ComplexKey:
@@ -186,11 +185,10 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
     throw Exception("Unexpected DictionaryKeyType.", ErrorCodes::LOGICAL_ERROR);
 }
 
-template <typename DictionaryType, typename Key>
-Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
+template <typename Key>
+Block DictionaryBlockInputStream<Key>::fillBlock(
     const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const
 {
-    constexpr auto dictionary_get_by_type = DictionaryType::get_by_type;
     std::unordered_set<std::string> names(column_names.begin(), column_names.end());
 
     DataTypes data_types = types;
@@ -215,6 +213,9 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
         block_columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), structure.id->name);
     }
 
+    /// TODO: This can be optimized
+    auto dictionary_identifier_type = dictionary->getIdentifierType();
+
     for (const auto idx : ext::range(0, structure.attributes.size()))
     {
         const DictionaryAttribute & attribute = structure.attributes[idx];
@@ -222,11 +223,12 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
         {
             ColumnPtr column;
 
-            if constexpr (dictionary_get_by_type == DictionaryGetByType::getByIdentifiers)
+            if (dictionary_identifier_type == DictionaryIdentifierType::simple)
             {
-                column = dictionary->get(attribute.name, attribute.type, ids_column, nullptr /* default_untyped */);
+                column = dictionary->getColumn(
+                    attribute.name, attribute.type, {ids_column}, {std::make_shared<DataTypeUInt64>()}, nullptr /* default_untyped*/);
             }
-            else if constexpr (dictionary_get_by_type == DictionaryGetByType::getByComplexKeys)
+            else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
             {
                 column = nullptr;
             }
@@ -242,8 +244,8 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
     return Block(block_columns);
 }
 
-template <typename DictionaryType, typename Key>
-ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
+template <typename Key>
+ColumnPtr DictionaryBlockInputStream<Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
 {
     auto column_vector = ColumnVector<UInt64>::create();
     column_vector->getData().reserve(ids_to_fill.size());
@@ -253,8 +255,8 @@ ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(cons
 }
 
 
-template <typename DictionaryType, typename Key>
-void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns(
+template <typename Key>
+void DictionaryBlockInputStream<Key>::fillKeyColumns(
     const std::vector<StringRef> & keys,
     size_t start,
     size_t size,
diff --git a/src/Dictionaries/DirectDictionary.h b/src/Dictionaries/DirectDictionary.h
index 31e2994febb..8b59fe16c69 100644
--- a/src/Dictionaries/DirectDictionary.h
+++ b/src/Dictionaries/DirectDictionary.h
@@ -65,8 +65,6 @@ public:
     void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
     void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
 
-    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported; 
-
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
@@ -136,7 +134,7 @@ public:
 
     void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
 
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
+    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index aa03179491e..fde46be7656 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -105,26 +105,20 @@ void FlatDictionary::isInConstantVector(const Key child_id, const PaddedPODArray
     isInImpl(child_id, ancestor_ids, out);
 }
 
-ColumnPtr FlatDictionary::get(const std::string& attribute_name, const DataTypePtr &, const ColumnPtr id_column, const ColumnPtr default_untyped) const
+ColumnPtr FlatDictionary::getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr &,
+        const Columns & key_columns,
+        const DataTypes &,
+        const ColumnPtr default_untyped) const
 {
+    assert(!key_columns.empty());
     ColumnPtr result;
 
-    const auto *id_col = checkAndGetColumn<ColumnUInt64>(id_column.get());
-    const auto *id_col_const = checkAndGetColumnConst<ColumnUInt64>(id_column.get()); 
+    PaddedPODArray<Key> backup_storage;
 
-    if (id_col == nullptr && id_col_const == nullptr)
-    {
-        throw Exception{"Identifiers column must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
-    }
-
-    PaddedPODArray<UInt64> ids_const(1);
-    
-    if (id_col_const != nullptr) {
-        ids_const[0] = id_col_const->getValue<UInt64>();     
-    }
-
-    const auto& ids = id_col != nullptr ? id_col->getData() : ids_const;
-    bool is_const = id_col_const != nullptr;
+    const auto& ids = getColumnDataAsIdendifiers(*key_columns.front(), backup_storage);
+    bool is_const = isColumnConst(*key_columns.front());
 
     const auto & attribute = getAttribute(attribute_name);
 
@@ -272,8 +266,17 @@ ColumnPtr FlatDictionary::get(const std::string& attribute_name, const DataTypeP
     return result;
 }
 
-void FlatDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
+
+ColumnUInt8::Ptr FlatDictionary::has(const Columns & key_columns, const DataTypes &) const
 {
+    assert(!key_columns.empty());
+
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnDataAsIdendifiers(*key_columns.front(), backup_storage);
+
+    auto result = ColumnUInt8::create(ext::size(ids));
+    auto& out = result->getData();
+
     const auto ids_count = ext::size(ids);
 
     for (const auto i : ext::range(0, ids_count))
@@ -283,8 +286,9 @@ void FlatDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
     }
 
     query_count.fetch_add(ids_count, std::memory_order_relaxed);
-}
 
+    return result;
+}
 
 void FlatDictionary::createAttributes()
 {
@@ -556,6 +560,28 @@ const FlatDictionary::Attribute & FlatDictionary::getAttribute(const std::string
     return attributes[it->second];
 }
 
+const PaddedPODArray<FlatDictionary::Key> & FlatDictionary::getColumnDataAsIdendifiers(const IColumn & column, PaddedPODArray<Key> & backup_storage) const
+{
+
+    if (const auto *id_col = checkAndGetColumn<ColumnUInt64>(&column))
+    {
+        return id_col->getData();
+    }
+    else if (const auto *id_col_const = checkAndGetColumnConst<ColumnUInt64>(&column))
+    {
+        const auto full_column = id_col_const->convertToFullColumnIfConst();
+        const auto size = full_column->size();
+        backup_storage.resize(size);
+        for (size_t i = 0; i < size; ++i)
+            backup_storage[i] = full_column->getUInt(i);
+
+        return backup_storage;
+    }
+    else 
+        throw Exception{"Identifier column must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
+
+    return backup_storage;
+}
 
 PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
 {
@@ -570,7 +596,7 @@ PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
 
 BlockInputStreamPtr FlatDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
 {
-    using BlockInputStreamType = DictionaryBlockInputStream<FlatDictionary, Key>;
+    using BlockInputStreamType = DictionaryBlockInputStream<Key>;
     return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
 }
 
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index 6a6fafe5903..af211f2edda 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -69,14 +69,17 @@ public:
     void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
     void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::simple; }
 
-    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::getByIdentifiers;
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_untyped) const override;
 
-    ColumnPtr get(const std::string& attribute_name, const DataTypePtr & result_type, const ColumnPtr id_column, const ColumnPtr default_untyped) const;
 
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -160,6 +163,8 @@ private:
     template <typename ChildType, typename AncestorType>
     void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
 
+    const PaddedPODArray<Key> & getColumnDataAsIdendifiers(const IColumn & column, PaddedPODArray<Key> & backup_storage) const;
+
     PaddedPODArray<Key> getIds() const;
 
     const DictionaryStructure dict_struct;
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 1439036bf8d..34fa227e546 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -876,7 +876,7 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
 
 BlockInputStreamPtr HashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
 {
-    using BlockInputStreamType = DictionaryBlockInputStream<HashedDictionary, Key>;
+    using BlockInputStreamType = DictionaryBlockInputStream<Key>;
     return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
 }
 
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index 497d18f93e0..b3d1c16ed26 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -66,8 +66,6 @@ public:
 
     void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
 
-    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;  
-
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
@@ -138,7 +136,7 @@ public:
 
     void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
 
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
+    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
 
     void isInVectorVector(
         const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h
index 6bc8d32295a..ce613a26020 100644
--- a/src/Dictionaries/IDictionary.h
+++ b/src/Dictionaries/IDictionary.h
@@ -10,6 +10,8 @@
 #include <common/StringRef.h>
 #include "IDictionarySource.h"
 #include <Dictionaries/DictionaryStructure.h>
+#include <DataTypes/IDataType.h>
+#include <Columns/ColumnsNumber.h>
 
 #include <chrono>
 #include <memory>
@@ -29,6 +31,13 @@ using DictionaryPtr = std::unique_ptr<IDictionaryBase>;
 struct DictionaryStructure;
 class ColumnString;
 
+enum class DictionaryIdentifierType
+{
+    simple,
+    complex,
+    range
+};
+
 struct IDictionaryBase : public IExternalLoadable
 {
     using Key = UInt64;
@@ -85,6 +94,29 @@ struct IDictionaryBase : public IExternalLoadable
 
     virtual bool isInjective(const std::string & attribute_name) const = 0;
 
+    virtual DictionaryIdentifierType getIdentifierType() const /* = 0; */
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "Get identifier type not supported", getDictionaryID().getNameForLogs());
+    }
+
+    virtual ColumnPtr getColumn(
+        const std::string & attribute_name [[maybe_unused]],
+        const DataTypePtr & result_type [[maybe_unused]],
+        const Columns & key_columns [[maybe_unused]],
+        const DataTypes & key_types [[maybe_unused]],
+        const ColumnPtr default_untyped [[maybe_unused]]) const /* = 0; */
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "Get column not supported", getDictionaryID().getNameForLogs());
+    }
+
+    virtual ColumnUInt8::Ptr has(const Columns & key_columns [[maybe_unused]], const DataTypes & key_types [[maybe_unused]]) const /* = 0; */
+    {
+         throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "Has not supported", getDictionaryID().getNameForLogs());
+    }
+
     virtual BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const = 0;
 
     bool supportUpdates() const override { return true; }
@@ -115,7 +147,6 @@ protected:
     const String full_name;
 };
 
-
 struct IDictionary : IDictionaryBase
 {
     IDictionary(const StorageID & dict_id_) : IDictionaryBase(dict_id_) {}
@@ -124,8 +155,6 @@ struct IDictionary : IDictionaryBase
 
     virtual void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const = 0;
 
-    virtual void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const = 0;
-
     /// Methods for hierarchy.
 
     virtual void isInVectorVector(
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index d2bbf6ec2fa..f8af4686ad8 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -1045,7 +1045,7 @@ static auto keyViewGetter()
 
 BlockInputStreamPtr IPAddressDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
 {
-    using BlockInputStreamType = DictionaryBlockInputStream<IPAddressDictionary, UInt64>;
+    using BlockInputStreamType = DictionaryBlockInputStream<UInt64>;
 
 
     const bool is_ipv4 = std::get_if<IPv4Container>(&ip_column) != nullptr;
diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h
index 3040579134c..2009141ebcc 100644
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@@ -61,8 +61,6 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
-
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/PolygonDictionary.h b/src/Dictionaries/PolygonDictionary.h
index f22052b59b5..75114cff435 100644
--- a/src/Dictionaries/PolygonDictionary.h
+++ b/src/Dictionaries/PolygonDictionary.h
@@ -80,8 +80,6 @@ public:
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
-    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
-
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h
index 22037fa4718..46ae0390b6a 100644
--- a/src/Dictionaries/RangeHashedDictionary.h
+++ b/src/Dictionaries/RangeHashedDictionary.h
@@ -54,8 +54,6 @@ public:
 
     typedef Int64 RangeStorageType;
 
-    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
-
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp
index 1cf5946c95c..030f95da2ab 100644
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@@ -1575,7 +1575,7 @@ void SSDCacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UIn
 
 BlockInputStreamPtr SSDCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
 {
-    using BlockInputStreamType = DictionaryBlockInputStream<SSDCacheDictionary, Key>;
+    using BlockInputStreamType = DictionaryBlockInputStream<Key>;
     return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, storage.getCachedIds(), column_names);
 }
 
diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h
index 0fa344adc6c..8219a52bac9 100644
--- a/src/Dictionaries/SSDCacheDictionary.h
+++ b/src/Dictionaries/SSDCacheDictionary.h
@@ -354,8 +354,6 @@ public:
     template <typename T>
     using ResultArrayType = SSDCacheStorage::ResultArrayType<T>;
 
-    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
-
 #define DECLARE(TYPE) \
     void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
     DECLARE(UInt8)
@@ -422,7 +420,7 @@ public:
 
     void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * out) const;
 
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
+    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
index b7ef53af054..4758d62f1df 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
@@ -569,8 +569,6 @@ public:
 
     std::exception_ptr getLastException() const override { return storage.getLastException(); }
 
-    static constexpr DictionaryGetByType get_by_type = DictionaryGetByType::unsupported;
-
     template <typename T>
     using ResultArrayType = SSDComplexKeyCacheStorage::ResultArrayType<T>;
 
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index d77d432bbf8..609e14dd38e 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -175,78 +175,24 @@ private:
         if (input_rows_count == 0)
             return result_type->createColumn();
 
-        auto dict = helper.getDictionary(arguments[0]);
-        ColumnPtr res;
+        auto dictionary = helper.getDictionary(arguments[0]);
+        auto dictionary_identifier_type = dictionary->getIdentifierType();
 
-        if (!((res = executeDispatchSimple<FlatDictionary>(arguments, dict))
-            || (res = executeDispatchSimple<DirectDictionary>(arguments, dict))
-            || (res = executeDispatchSimple<HashedDictionary>(arguments, dict))
-            || (res = executeDispatchSimple<CacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatchSimple<SSDCacheDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<ComplexKeyHashedDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyDirectDictionary>(arguments, dict))
-            || (res = executeDispatchComplex<ComplexKeyCacheDictionary>(arguments, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatchComplex<SSDComplexKeyCacheDictionary>(arguments, dict))
-#endif
-#if !defined(ARCADIA_BUILD)
-            || (res = executeDispatchComplex<IPAddressDictionary>(arguments, dict))
-#endif
-            || (res = executeDispatchComplex<PolygonDictionarySimple>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexEach>(arguments, dict))
-            || (res = executeDispatchComplex<PolygonDictionaryIndexCell>(arguments, dict))))
-            throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
+        const auto id_col_untyped = arguments[1].column;
 
-        return res;
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchSimple(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        const auto * id_col_untyped = arguments[1].column.get();
-        if (const auto * id_col = checkAndGetColumn<ColumnUInt64>(id_col_untyped))
+        if (dictionary_identifier_type == DictionaryIdentifierType::simple)
         {
-            const auto & ids = id_col->getData();
-
-            auto out = ColumnUInt8::create(ext::size(ids));
-            dict->has(ids, out->getData());
-            return out;
+            return dictionary->has({ id_col_untyped }, { std::make_shared<DataTypeUInt64>() });
+        }
+        else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
+        {
+            /// TODO: Check if column is tuple and pass
+            return nullptr;
         }
         else
-            throw Exception{"Second argument of function " + getName() + " must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
-
-        return nullptr;
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatchComplex(
-            const ColumnsWithTypeAndName & arguments, const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dict = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dict)
-            return nullptr;
-
-        const ColumnWithTypeAndName & key_col_with_type = arguments[1];
-        const ColumnPtr & key_col = key_col_with_type.column;
-
-        if (checkColumn<ColumnTuple>(key_col.get()))
         {
-            const auto & key_columns = assert_cast<const ColumnTuple &>(*key_col).getColumnsCopy();
-            const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
-
-            auto out = ColumnUInt8::create(key_col_with_type.column->size());
-            dict->has(key_columns, key_types, out->getData());
-            return out;
+            return nullptr;
         }
-        else
-            throw Exception{"Second argument of function " + getName() + " must be " + dict->getKeyDescription(), ErrorCodes::TYPE_MISMATCH};
     }
 
     mutable FunctionDictHelper helper;
@@ -342,44 +288,9 @@ private:
         if (input_rows_count == 0)
             return result_type->createColumn();
 
-        auto dict = helper.getDictionary(arguments[0]);
+        auto dictionary = helper.getDictionary(arguments[0]);
         ColumnPtr res;
 
-        if (!((res = executeDispatch<FlatDictionary>(arguments, result_type, dict))
-            || (res = executeDispatch<HashedDictionary>(arguments, result_type, dict))
-            || (res = executeDispatch<DirectDictionary>(arguments, result_type, dict))
-            || (res = executeDispatch<CacheDictionary>(arguments, result_type, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatch<SSDCacheDictionary>(arguments, result_type, dict))
-#endif
-            || (res = executeDispatch<ComplexKeyHashedDictionary>(arguments, result_type, dict))
-            || (res = executeDispatch<ComplexKeyDirectDictionary>(arguments, result_type, dict))
-            || (res = executeDispatch<ComplexKeyCacheDictionary>(arguments, result_type, dict))
-#if defined(OS_LINUX) || defined(__FreeBSD__)
-            || (res = executeDispatch<SSDComplexKeyCacheDictionary>(arguments, result_type, dict))
-#endif
-#if !defined(ARCADIA_BUILD)
-            || (res = executeDispatch<IPAddressDictionary>(arguments, result_type, dict))
-#endif
-            || (res = executeDispatch<PolygonDictionarySimple>(arguments, result_type, dict))
-            || (res = executeDispatch<PolygonDictionaryIndexEach>(arguments, result_type, dict))
-            || (res = executeDispatch<PolygonDictionaryIndexCell>(arguments, result_type, dict))
-            || (res = executeDispatch<RangeHashedDictionary>(arguments, result_type, dict))))
-            throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
-
-        return res;
-    }
-
-    template <typename DictionaryType>
-    ColumnPtr executeDispatch(
-        const ColumnsWithTypeAndName & arguments,
-        const DataTypePtr & result_type,
-        const std::shared_ptr<const IDictionaryBase> & dict_ptr) const
-    {
-        const auto * dictionary = typeid_cast<const DictionaryType *>(dict_ptr.get());
-        if (!dictionary)
-            return nullptr;
-
         const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
         if (!attr_name_col)
             throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
@@ -388,28 +299,30 @@ private:
 
         const auto id_col_untyped = arguments[2].column;
 
-        constexpr auto dictionary_get_by_type = DictionaryType::get_by_type;
+        auto dictionary_identifier_type = dictionary->getIdentifierType();
 
-        if constexpr (dictionary_get_by_type == DictionaryGetByType::getByIdentifiers)
+        if (dictionary_identifier_type == DictionaryIdentifierType::simple)
         {
             if (dictionary_get_function_type == DictionaryGetFunctionType::withDefault)
             {
                 const auto default_col_untyped = arguments[3].column;
-                return dictionary->get(attr_name, result_type, id_col_untyped, default_col_untyped);
+                res = dictionary->getColumn(attr_name, result_type, { id_col_untyped }, { std::make_shared<DataTypeUInt64>() }, default_col_untyped);
             }
             else
             {
-                return dictionary->get(attr_name, result_type, id_col_untyped, nullptr);
+                res = dictionary->getColumn(attr_name, result_type, { id_col_untyped }, { std::make_shared<DataTypeUInt64>() }, nullptr);
             }
         }
-        else if constexpr (dictionary_get_by_type == DictionaryGetByType::getByComplexKeys)
+        else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
         {
-            return nullptr;
+            res = nullptr;
         }
         else
         {
-            return nullptr;
+            res = nullptr;
         }
+
+        return res;
     }
 
     mutable FunctionDictHelper helper;

From f24a8eadb26360ef024b3db8ec6052c68881a59a Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sat, 19 Dec 2020 17:27:39 +0300
Subject: [PATCH 0214/1238] Updated ComplexKeyHashed dictionary to new
 interface

---
 .../ComplexKeyHashedDictionary.cpp            | 543 +++++++-----------
 src/Dictionaries/ComplexKeyHashedDictionary.h |  89 +--
 src/Dictionaries/DictionaryBlockInputStream.h |   3 +-
 src/Dictionaries/FlatDictionary.cpp           |  65 ++-
 src/Functions/FunctionsExternalDictionaries.h |  28 +-
 5 files changed, 290 insertions(+), 438 deletions(-)

diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index e5a9aa13f08..6a734a2f7ab 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -1,6 +1,8 @@
 #include "ComplexKeyHashedDictionary.h"
 #include <ext/map.h>
 #include <ext/range.h>
+#include <Columns/ColumnsNumber.h>
+#include <Functions/FunctionHelpers.h>
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
 
@@ -32,216 +34,186 @@ ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(
     calculateBytesAllocated();
 }
 
-#define DECLARE(TYPE) \
-    void ComplexKeyHashedDictionary::get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t) { return null_value; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void ComplexKeyHashedDictionary::getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
-{
-    dict_struct.validateKeyTypes(key_types);
-
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return null_value; });
-}
-
-#define DECLARE(TYPE) \
-    void ComplexKeyHashedDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t row) { return def[row]; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void ComplexKeyHashedDictionary::getString(
+ColumnPtr ComplexKeyHashedDictionary::getColumn(
     const std::string & attribute_name,
+    const DataTypePtr &,
     const Columns & key_columns,
     const DataTypes & key_types,
-    const ColumnString * const def,
-    ColumnString * const out) const
+    const ColumnPtr default_untyped) const
 {
     dict_struct.validateKeyTypes(key_types);
 
+    ColumnPtr result;
+
     const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
 
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t row) { return def->getDataAt(row); });
+    /// TODO: Check that attribute type is same as result type
+
+    auto size = key_columns.front()->size();
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto column_string = ColumnString::create();
+            auto out = column_string.get();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                {
+                    getItemsImpl<StringRef, StringRef>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t row) { return default_col->getDataAt(row); });
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<String>();
+
+                    getItemsImpl<StringRef, StringRef>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t) { return def; });
+                }
+            }
+            else
+            {
+                const auto & null_value = std::get<StringRef>(attribute.null_values);
+
+                getItemsImpl<StringRef, StringRef>(
+                    attribute,
+                    key_columns,
+                    [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column_string);
+        }
+        else if constexpr (IsNumber<AttributeType>)
+        {
+            auto column = ColumnVector<AttributeType>::create(size);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    key_columns,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column);
+        }
+        else if constexpr (IsDecimalNumber<AttributeType>)
+        {
+            // auto scale = getDecimalScale(*attribute.type);
+            auto column = ColumnDecimal<AttributeType>::create(size, 0);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    key_columns,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; }
+                );
+            }
+
+            result = std::move(column);
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+   
+    return result;
 }
 
-#define DECLARE(TYPE) \
-    void ComplexKeyHashedDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void ComplexKeyHashedDictionary::getString(
-    const std::string & attribute_name,
-    const Columns & key_columns,
-    const DataTypes & key_types,
-    const String & def,
-    ColumnString * const out) const
+ColumnUInt8::Ptr ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataTypes & key_types) const
 {
     dict_struct.validateKeyTypes(key_types);
 
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return StringRef{def}; });
-}
-
-void ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
-{
-    dict_struct.validateKeyTypes(key_types);
+    auto size = key_columns.front()->size();
+    auto result = ColumnUInt8::create(size);
+    auto& out = result->getData();
 
     const auto & attribute = attributes.front();
 
-    switch (attribute.type)
+    auto type_call = [&](const auto & dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            has<UInt8>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            has<UInt16>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            has<UInt32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            has<UInt64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            has<UInt128>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            has<Int8>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            has<Int16>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            has<Int32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            has<Int64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            has<Float32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            has<Float64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utString:
-            has<StringRef>(attribute, key_columns, out);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            has<Decimal32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            has<Decimal64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            has<Decimal128>(attribute, key_columns, out);
-            break;
-    }
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            has<StringRef>(attribute, key_columns, out);
+        }
+        else
+        {
+            has<AttributeType>(attribute, key_columns, out);
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    return result;
 }
 
 void ComplexKeyHashedDictionary::createAttributes()
@@ -407,66 +379,30 @@ void ComplexKeyHashedDictionary::addAttributeSize(const Attribute & attribute)
     bucket_count = map_ref.getBufferSizeInCells();
 }
 
+template <>
+void ComplexKeyHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
+{
+    const auto & map_ref = std::get<ContainerType<StringRef>>(attribute.maps);
+    bytes_allocated += sizeof(ContainerType<StringRef>) + map_ref.getBufferSizeInBytes();
+    bucket_count = map_ref.getBufferSizeInCells();
+    bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
+}
+
 void ComplexKeyHashedDictionary::calculateBytesAllocated()
 {
     bytes_allocated += attributes.size() * sizeof(attributes.front());
 
     for (const auto & attribute : attributes)
     {
-        switch (attribute.type)
+        auto type_call = [&](const auto & dictionary_attribute_type)
         {
-            case AttributeUnderlyingType::utUInt8:
-                addAttributeSize<UInt8>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt16:
-                addAttributeSize<UInt16>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt32:
-                addAttributeSize<UInt32>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt64:
-                addAttributeSize<UInt64>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt128:
-                addAttributeSize<UInt128>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt8:
-                addAttributeSize<Int8>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt16:
-                addAttributeSize<Int16>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt32:
-                addAttributeSize<Int32>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt64:
-                addAttributeSize<Int64>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat32:
-                addAttributeSize<Float32>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat64:
-                addAttributeSize<Float64>(attribute);
-                break;
+            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+            using AttributeType = typename Type::AttributeType;
 
-            case AttributeUnderlyingType::utDecimal32:
-                addAttributeSize<Decimal32>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal64:
-                addAttributeSize<Decimal64>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal128:
-                addAttributeSize<Decimal128>(attribute);
-                break;
+            addAttributeSize<AttributeType>(attribute);
+        };
 
-            case AttributeUnderlyingType::utString:
-            {
-                addAttributeSize<StringRef>(attribute);
-                bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
-
-                break;
-            }
-        }
+        callOnDictionaryAttributeType(attribute.type, type_call);
     }
 
     bytes_allocated += keys_pool.size();
@@ -479,65 +415,29 @@ void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, cons
     attribute.maps.emplace<ContainerType<T>>();
 }
 
+template <>
+void ComplexKeyHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
+{
+    attribute.string_arena = std::make_unique<Arena>();
+    const String & string = null_value.get<String>();
+    const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
+    attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
+    attribute.maps.emplace<ContainerType<StringRef>>();
+}
+
 ComplexKeyHashedDictionary::Attribute
 ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
 {
     Attribute attr{type, {}, {}, {}};
 
-    switch (type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributType = typename Type::AttributeType;
+        createAttributeImpl<AttributType>(attr, null_value);
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
-
-        case AttributeUnderlyingType::utString:
-        {
-            attr.null_values = null_value.get<String>();
-            attr.maps.emplace<ContainerType<StringRef>>();
-            attr.string_arena = std::make_unique<Arena>();
-            break;
-        }
-    }
+    callOnDictionaryAttributeType(type, type_call);
 
     return attr;
 }
@@ -684,41 +584,26 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys() const
 {
     const Attribute & attribute = attributes.front();
 
-    switch (attribute.type)
-    {
-        case AttributeUnderlyingType::utUInt8:
-            return getKeys<UInt8>(attribute);
-        case AttributeUnderlyingType::utUInt16:
-            return getKeys<UInt16>(attribute);
-        case AttributeUnderlyingType::utUInt32:
-            return getKeys<UInt32>(attribute);
-        case AttributeUnderlyingType::utUInt64:
-            return getKeys<UInt64>(attribute);
-        case AttributeUnderlyingType::utUInt128:
-            return getKeys<UInt128>(attribute);
-        case AttributeUnderlyingType::utInt8:
-            return getKeys<Int8>(attribute);
-        case AttributeUnderlyingType::utInt16:
-            return getKeys<Int16>(attribute);
-        case AttributeUnderlyingType::utInt32:
-            return getKeys<Int32>(attribute);
-        case AttributeUnderlyingType::utInt64:
-            return getKeys<Int64>(attribute);
-        case AttributeUnderlyingType::utFloat32:
-            return getKeys<Float32>(attribute);
-        case AttributeUnderlyingType::utFloat64:
-            return getKeys<Float64>(attribute);
-        case AttributeUnderlyingType::utString:
-            return getKeys<StringRef>(attribute);
+    std::vector<StringRef> result;
 
-        case AttributeUnderlyingType::utDecimal32:
-            return getKeys<Decimal32>(attribute);
-        case AttributeUnderlyingType::utDecimal64:
-            return getKeys<Decimal64>(attribute);
-        case AttributeUnderlyingType::utDecimal128:
-            return getKeys<Decimal128>(attribute);
-    }
-    return {};
+    auto type_call = [&](const auto & dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            result = getKeys<StringRef>(attribute);
+        }
+        else
+        {
+            result = getKeys<AttributeType>(attribute);
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    return result;
 }
 
 template <typename T>
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.h b/src/Dictionaries/ComplexKeyHashedDictionary.h
index baf6628eebd..01a222d8629 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.h
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.h
@@ -60,91 +60,16 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::complex; }
 
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-        const std::string & attribute_name,
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnString * const def,
-        ColumnString * const out) const;
+        const ColumnPtr default_untyped) const override;
 
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-        const std::string & attribute_name,
-        const Columns & key_columns,
-        const DataTypes & key_types,
-        const String & def,
-        ColumnString * const out) const;
-
-    void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -170,7 +95,7 @@ private:
             Decimal128,
             Float32,
             Float64,
-            String>
+            StringRef>
             null_values;
         std::variant<
             ContainerType<UInt8>,
diff --git a/src/Dictionaries/DictionaryBlockInputStream.h b/src/Dictionaries/DictionaryBlockInputStream.h
index 53b513b5d33..915ab7601ed 100644
--- a/src/Dictionaries/DictionaryBlockInputStream.h
+++ b/src/Dictionaries/DictionaryBlockInputStream.h
@@ -230,7 +230,8 @@ Block DictionaryBlockInputStream<Key>::fillBlock(
             }
             else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
             {
-                column = nullptr;
+                column = dictionary->getColumn(
+                    attribute.name, attribute.type, keys, data_types, nullptr /* default_untyped*/);
             }
             else
             {
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index fde46be7656..137255b370e 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -109,16 +109,15 @@ ColumnPtr FlatDictionary::getColumn(
         const std::string& attribute_name,
         const DataTypePtr &,
         const Columns & key_columns,
-        const DataTypes &,
+        const DataTypes & key_types,
         const ColumnPtr default_untyped) const
 {
-    assert(!key_columns.empty());
+    dict_struct.validateKeyTypes(key_types);
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
 
     const auto& ids = getColumnDataAsIdendifiers(*key_columns.front(), backup_storage);
-    bool is_const = isColumnConst(*key_columns.front());
 
     const auto & attribute = getAttribute(attribute_name);
 
@@ -257,12 +256,7 @@ ColumnPtr FlatDictionary::getColumn(
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
-    
-    if (is_const)
-    {
-        result = ColumnConst::create(std::move(result), 1);
-    }
-
+   
     return result;
 }
 
@@ -540,14 +534,55 @@ void FlatDictionary::setAttributeValueImpl<String>(Attribute & attribute, const
 
 void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
 {
-    auto type_call = [&](const auto &dictionary_attribute_type)
+    switch (attribute.type)
     {
-        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
-        using AttributeType = typename Type::AttributeType;
-        setAttributeValueImpl<AttributeType>(attribute, id, value.get<AttributeType>());
-    };
+        case AttributeUnderlyingType::utUInt8:
+            setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
+            break;
+        case AttributeUnderlyingType::utUInt16:
+            setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
+            break;
+        case AttributeUnderlyingType::utUInt32:
+            setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
+            break;
+        case AttributeUnderlyingType::utUInt64:
+            setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
+            break;
+        case AttributeUnderlyingType::utUInt128:
+            setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
+            break;
+        case AttributeUnderlyingType::utInt8:
+            setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
+            break;
+        case AttributeUnderlyingType::utInt16:
+            setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
+            break;
+        case AttributeUnderlyingType::utInt32:
+            setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
+            break;
+        case AttributeUnderlyingType::utInt64:
+            setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
+            break;
+        case AttributeUnderlyingType::utFloat32:
+            setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
+            break;
+        case AttributeUnderlyingType::utFloat64:
+            setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
+            break;
+        case AttributeUnderlyingType::utString:
+            setAttributeValueImpl<String>(attribute, id, value.get<String>());
+            break;
 
-    callOnDictionaryAttributeType(attribute.type, type_call);
+        case AttributeUnderlyingType::utDecimal32:
+            setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
+            break;
+        case AttributeUnderlyingType::utDecimal64:
+            setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
+            break;
+        case AttributeUnderlyingType::utDecimal128:
+            setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
+            break;
+    }
 }
 
 
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 609e14dd38e..06c8cd5d650 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -297,25 +297,31 @@ private:
 
         String attr_name = attr_name_col->getValue<String>();
 
-        const auto id_col_untyped = arguments[2].column;
+        const ColumnWithTypeAndName & key_col_with_type = arguments[2];
+        const auto key_column = key_col_with_type.column;
 
         auto dictionary_identifier_type = dictionary->getIdentifierType();
+        
+        ColumnPtr default_col = nullptr;
+
+        if (dictionary_get_function_type == DictionaryGetFunctionType::withDefault)
+        {
+            default_col = arguments[3].column;
+        }
 
         if (dictionary_identifier_type == DictionaryIdentifierType::simple)
         {
-            if (dictionary_get_function_type == DictionaryGetFunctionType::withDefault)
-            {
-                const auto default_col_untyped = arguments[3].column;
-                res = dictionary->getColumn(attr_name, result_type, { id_col_untyped }, { std::make_shared<DataTypeUInt64>() }, default_col_untyped);
-            }
-            else
-            {
-                res = dictionary->getColumn(attr_name, result_type, { id_col_untyped }, { std::make_shared<DataTypeUInt64>() }, nullptr);
-            }
+            res = dictionary->getColumn(attr_name, result_type, { key_column }, { std::make_shared<DataTypeUInt64>() }, default_col);
         }
         else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
         {
-            res = nullptr;
+            /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
+            ColumnPtr key_column_full = key_col_with_type.column->convertToFullColumnIfConst();
+
+            const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_column_full).getColumnsCopy();
+            const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
+
+            res = dictionary->getColumn(attr_name, result_type, key_columns, key_types, default_col);
         }
         else
         {

From 8bd98ae432acdb812fb65f19b7defdb229b0432d Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sat, 19 Dec 2020 19:47:58 +0300
Subject: [PATCH 0215/1238] Remove previous interface enum

---
 src/Dictionaries/DictionaryStructure.cpp | 4 ++--
 src/Dictionaries/DictionaryStructure.h   | 7 -------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index 68be50daa2f..b2340cf39a0 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -14,8 +14,6 @@
 #include <unordered_set>
 #include <ext/range.h>
 
-#include <iostream>
-
 namespace DB
 {
 namespace ErrorCodes
@@ -316,6 +314,8 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
         if ((range_min && name == range_min->name) || (range_max && name == range_max->name))
             continue;
 
+
+        /// TODO: Rewrite
         const auto type_string = config.getString(prefix + "type");
         const auto initial_type = DataTypeFactory::instance().get(type_string);
         auto type = initial_type;
diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h
index 5656815e375..db5c3fa0103 100644
--- a/src/Dictionaries/DictionaryStructure.h
+++ b/src/Dictionaries/DictionaryStructure.h
@@ -42,13 +42,6 @@ std::string toString(const AttributeUnderlyingType type);
 /// Min and max lifetimes for a dictionary or it's entry
 using DictionaryLifetime = ExternalLoadableLifetime;
 
-enum class DictionaryGetByType {
-    getByIdentifiers,
-    getByComplexKeys,
-    getByRange,
-    unsupported /* will be removed after migration to new interface */
-};
-
 /** Holds the description of a single dictionary attribute:
 *    - name, used for lookup into dictionary and source;
 *    - type, used in conjunction with DataTypeFactory and getAttributeUnderlyingTypeByname;

From d92d843e206d6913be9b80827dda515eae89a644 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sat, 19 Dec 2020 20:04:34 +0300
Subject: [PATCH 0216/1238] Fixed compile issue

---
 src/Dictionaries/FlatDictionary.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 137255b370e..0fa5e88ab3b 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -467,7 +467,7 @@ void FlatDictionary::createAttributeImpl<String>(Attribute & attribute, const Fi
 
 FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
 {
-    Attribute attr{type, {}, {}, {}, {}};
+    Attribute attr{type, {}, {}, {}};
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {

From d61e8c083bcf4fa0b50ce2f7e9b0ef05bbb1d8b3 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sun, 20 Dec 2020 23:11:28 +0300
Subject: [PATCH 0217/1238] Updated RangeHashedDictionary to new interface

---
 .../ComplexKeyHashedDictionary.cpp            |   7 +-
 src/Dictionaries/FlatDictionary.cpp           |  35 +-
 src/Dictionaries/FlatDictionary.h             |   5 -
 src/Dictionaries/IDictionary.h                |  31 ++
 .../RangeDictionaryBlockInputStream.h         | 116 +----
 src/Dictionaries/RangeHashedDictionary.cpp    | 439 ++++++++++--------
 src/Dictionaries/RangeHashedDictionary.h      |  57 +--
 .../FunctionsExternalDictionaries.cpp         |   4 +-
 src/Functions/FunctionsExternalDictionaries.h | 282 ++++-------
 9 files changed, 406 insertions(+), 570 deletions(-)

diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index 6a734a2f7ab..23d28bfe6ec 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -48,7 +48,8 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
     const auto & attribute = getAttribute(attribute_name);
 
     /// TODO: Check that attribute type is same as result type
-
+    /// TODO: Check if const will work as expected
+    
     auto size = key_columns.front()->size();
 
     auto type_call = [&](const auto &dictionary_attribute_type)
@@ -433,8 +434,8 @@ ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingTyp
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
-        using AttributType = typename Type::AttributeType;
-        createAttributeImpl<AttributType>(attr, null_value);
+        using AttributeType = typename Type::AttributeType;
+        createAttributeImpl<AttributeType>(attr, null_value);
     };
 
     callOnDictionaryAttributeType(type, type_call);
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 0fa5e88ab3b..58e54931dee 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -116,12 +116,12 @@ ColumnPtr FlatDictionary::getColumn(
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
-
-    const auto& ids = getColumnDataAsIdendifiers(*key_columns.front(), backup_storage);
-
+    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    
     const auto & attribute = getAttribute(attribute_name);
 
     /// TODO: Check that attribute type is same as result type
+    /// TODO: Check if const will work as expected
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
@@ -266,7 +266,7 @@ ColumnUInt8::Ptr FlatDictionary::has(const Columns & key_columns, const DataType
     assert(!key_columns.empty());
 
     PaddedPODArray<Key> backup_storage;
-    const auto& ids = getColumnDataAsIdendifiers(*key_columns.front(), backup_storage);
+    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
 
     auto result = ColumnUInt8::create(ext::size(ids));
     auto& out = result->getData();
@@ -472,8 +472,8 @@ FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const Attribut
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
-        using AttributType = typename Type::AttributeType;
-        createAttributeImpl<AttributType>(attr, null_value);
+        using AttributeType = typename Type::AttributeType;
+        createAttributeImpl<AttributeType>(attr, null_value);
     };
 
     callOnDictionaryAttributeType(type, type_call);
@@ -595,29 +595,6 @@ const FlatDictionary::Attribute & FlatDictionary::getAttribute(const std::string
     return attributes[it->second];
 }
 
-const PaddedPODArray<FlatDictionary::Key> & FlatDictionary::getColumnDataAsIdendifiers(const IColumn & column, PaddedPODArray<Key> & backup_storage) const
-{
-
-    if (const auto *id_col = checkAndGetColumn<ColumnUInt64>(&column))
-    {
-        return id_col->getData();
-    }
-    else if (const auto *id_col_const = checkAndGetColumnConst<ColumnUInt64>(&column))
-    {
-        const auto full_column = id_col_const->convertToFullColumnIfConst();
-        const auto size = full_column->size();
-        backup_storage.resize(size);
-        for (size_t i = 0; i < size; ++i)
-            backup_storage[i] = full_column->getUInt(i);
-
-        return backup_storage;
-    }
-    else 
-        throw Exception{"Identifier column must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
-
-    return backup_storage;
-}
-
 PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
 {
     const auto ids_count = ext::size(loaded_ids);
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index af211f2edda..72da13b4cd4 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -78,7 +78,6 @@ public:
         const DataTypes & key_types,
         const ColumnPtr default_untyped) const override;
 
-
     ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@@ -90,7 +89,6 @@ private:
     struct Attribute final
     {
         AttributeUnderlyingType type;
-        // bool is_array;
 
         std::variant<
             UInt8,
@@ -127,7 +125,6 @@ private:
             ContainerType<StringRef>>
             arrays;
 
-        std::optional<ContainerType<size_t>> array_offsets;
         std::unique_ptr<Arena> string_arena;
     };
 
@@ -163,8 +160,6 @@ private:
     template <typename ChildType, typename AncestorType>
     void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
 
-    const PaddedPODArray<Key> & getColumnDataAsIdendifiers(const IColumn & column, PaddedPODArray<Key> & backup_storage) const;
-
     PaddedPODArray<Key> getIds() const;
 
     const DictionaryStructure dict_struct;
diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h
index ce613a26020..c619e85f523 100644
--- a/src/Dictionaries/IDictionary.h
+++ b/src/Dictionaries/IDictionary.h
@@ -155,6 +155,7 @@ struct IDictionary : IDictionaryBase
 
     virtual void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const = 0;
 
+    /// TODO: Rewrite
     /// Methods for hierarchy.
 
     virtual void isInVectorVector(
@@ -196,4 +197,34 @@ inline void checkAttributeType(const IDictionaryBase * dictionary, const std::st
                         attribute_name, toString(attribute_type), toString(to)};
 }
 
+template <typename T>
+static const PaddedPODArray<T> &
+getColumnDataAsPaddedPODArray(const IDictionaryBase * dictionary, const ColumnPtr column, PaddedPODArray<T> & backup_storage)
+{
+    bool is_const_column = isColumnConst(*column);
+    auto full_column = column->convertToFullColumnIfConst();
+    auto vector_col = checkAndGetColumn<ColumnVector<T>>(full_column.get());
+
+    if (!vector_col)
+    {
+        throw Exception{
+            ErrorCodes::TYPE_MISMATCH,
+            "{}: type mismatch: column has wrong type expected {}",
+            dictionary->getDictionaryID().getNameForLogs(),
+            "" /* TODO: Type name*/};
+    }
+
+    if (is_const_column)
+    {
+        // With type conversion and const columns we need to use backup storage here
+        auto & data = vector_col->getData();
+        backup_storage.assign(data);
+
+        return backup_storage;
+    }
+    else
+    {
+        return vector_col->getData();
+    }
+}
 }
diff --git a/src/Dictionaries/RangeDictionaryBlockInputStream.h b/src/Dictionaries/RangeDictionaryBlockInputStream.h
index a2353051e5d..aeb16f389b4 100644
--- a/src/Dictionaries/RangeDictionaryBlockInputStream.h
+++ b/src/Dictionaries/RangeDictionaryBlockInputStream.h
@@ -37,26 +37,6 @@ protected:
     Block getBlock(size_t start, size_t length) const override;
 
 private:
-    template <typename Type>
-    using DictionaryGetter = void (DictionaryType::*)(
-        const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, PaddedPODArray<Type> &) const;
-
-    template <typename Type>
-    using DictionaryDecimalGetter = void (DictionaryType::*)(
-        const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, DecimalPaddedPODArray<Type> &) const;
-
-    template <typename AttributeType, typename Getter>
-    ColumnPtr getColumnFromAttribute(
-        Getter getter,
-        const PaddedPODArray<Key> & ids_to_fill,
-        const PaddedPODArray<Int64> & dates,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & concrete_dictionary) const;
-    ColumnPtr getColumnFromAttributeString(
-        const PaddedPODArray<Key> & ids_to_fill,
-        const PaddedPODArray<Int64> & dates,
-        const DictionaryAttribute & attribute,
-        const DictionaryType & concrete_dictionary) const;
     template <typename T>
     ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
 
@@ -122,41 +102,6 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getBlock(
     return fillBlock(block_ids, block_start_dates, block_end_dates);
 }
 
-template <typename DictionaryType, typename RangeType, typename Key>
-template <typename AttributeType, typename Getter>
-ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttribute(
-    Getter getter,
-    const PaddedPODArray<Key> & ids_to_fill,
-    const PaddedPODArray<Int64> & dates,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & concrete_dictionary) const
-{
-    if constexpr (IsDecimalNumber<AttributeType>)
-    {
-        auto column = ColumnDecimal<AttributeType>::create(ids_to_fill.size(), 0); /// NOTE: There's wrong scale here, but it's unused.
-        (concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column->getData());
-        return column;
-    }
-    else
-    {
-        auto column_vector = ColumnVector<AttributeType>::create(ids_to_fill.size());
-        (concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column_vector->getData());
-        return column_vector;
-    }
-}
-
-template <typename DictionaryType, typename RangeType, typename Key>
-ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttributeString(
-    const PaddedPODArray<Key> & ids_to_fill,
-    const PaddedPODArray<Int64> & dates,
-    const DictionaryAttribute & attribute,
-    const DictionaryType & concrete_dictionary) const
-{
-    auto column_string = ColumnString::create();
-    concrete_dictionary.getString(attribute.name, ids_to_fill, dates, column_string.get());
-    return column_string;
-}
-
 template <typename DictionaryType, typename RangeType, typename Key>
 template <typename T>
 ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
@@ -168,7 +113,6 @@ ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getCo
     return column_vector;
 }
 
-
 template <typename DictionaryType, typename RangeType, typename Key>
 template <typename DictionarySpecialAttributeType, typename T>
 void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecialColumn(
@@ -216,68 +160,24 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock
     std::unordered_set<std::string> names(column_names.begin(), column_names.end());
 
     addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns);
+    auto ids_column = columns.back().column;
     addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns);
     addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns);
 
     auto date_key = makeDateKey(block_start_dates, block_end_dates);
+    auto date_column = getColumnFromPODArray(date_key);
 
     for (const auto idx : ext::range(0, structure.attributes.size()))
     {
         const DictionaryAttribute & attribute = structure.attributes[idx];
         if (names.find(attribute.name) != names.end())
         {
-            ColumnPtr column;
-#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
-    column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids_to_fill, date_key, attribute, *dictionary)
-            switch (attribute.underlying_type)
-            {
-                case AttributeUnderlyingType::utUInt8:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt8);
-                    break;
-                case AttributeUnderlyingType::utUInt16:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt16);
-                    break;
-                case AttributeUnderlyingType::utUInt32:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt32);
-                    break;
-                case AttributeUnderlyingType::utUInt64:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt64);
-                    break;
-                case AttributeUnderlyingType::utUInt128:
-                    GET_COLUMN_FORM_ATTRIBUTE(UInt128);
-                    break;
-                case AttributeUnderlyingType::utInt8:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int8);
-                    break;
-                case AttributeUnderlyingType::utInt16:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int16);
-                    break;
-                case AttributeUnderlyingType::utInt32:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int32);
-                    break;
-                case AttributeUnderlyingType::utInt64:
-                    GET_COLUMN_FORM_ATTRIBUTE(Int64);
-                    break;
-                case AttributeUnderlyingType::utFloat32:
-                    GET_COLUMN_FORM_ATTRIBUTE(Float32);
-                    break;
-                case AttributeUnderlyingType::utFloat64:
-                    GET_COLUMN_FORM_ATTRIBUTE(Float64);
-                    break;
-                case AttributeUnderlyingType::utDecimal32:
-                    GET_COLUMN_FORM_ATTRIBUTE(Decimal32);
-                    break;
-                case AttributeUnderlyingType::utDecimal64:
-                    GET_COLUMN_FORM_ATTRIBUTE(Decimal64);
-                    break;
-                case AttributeUnderlyingType::utDecimal128:
-                    GET_COLUMN_FORM_ATTRIBUTE(Decimal128);
-                    break;
-                case AttributeUnderlyingType::utString:
-                    column = getColumnFromAttributeString(ids_to_fill, date_key, attribute, *dictionary);
-                    break;
-            }
-#undef GET_COLUMN_FORM_ATTRIBUTE
+            ColumnPtr column = dictionary->getColumn(
+                attribute.name,
+                attribute.type,
+                {ids_column, date_column},
+                {std::make_shared<DataTypeUInt64>(), structure.range_max->type},
+                nullptr);
             columns.emplace_back(column, attribute.type, attribute.name);
         }
     }
diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp
index eeed581c6f4..57299da43b8 100644
--- a/src/Dictionaries/RangeHashedDictionary.cpp
+++ b/src/Dictionaries/RangeHashedDictionary.cpp
@@ -50,6 +50,7 @@ namespace ErrorCodes
     extern const int DICTIONARY_IS_EMPTY;
     extern const int TYPE_MISMATCH;
     extern const int UNSUPPORTED_METHOD;
+    extern const int NOT_IMPLEMENTED;
 }
 
 bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date)
@@ -85,66 +86,163 @@ RangeHashedDictionary::RangeHashedDictionary(
     calculateBytesAllocated();
 }
 
-
-#define DECLARE_MULTIPLE_GETTER(TYPE) \
-    void RangeHashedDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<RangeStorageType> & dates, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::ut##TYPE); \
-        getItems<TYPE>(attribute, ids, dates, out); \
-    }
-DECLARE_MULTIPLE_GETTER(UInt8)
-DECLARE_MULTIPLE_GETTER(UInt16)
-DECLARE_MULTIPLE_GETTER(UInt32)
-DECLARE_MULTIPLE_GETTER(UInt64)
-DECLARE_MULTIPLE_GETTER(UInt128)
-DECLARE_MULTIPLE_GETTER(Int8)
-DECLARE_MULTIPLE_GETTER(Int16)
-DECLARE_MULTIPLE_GETTER(Int32)
-DECLARE_MULTIPLE_GETTER(Int64)
-DECLARE_MULTIPLE_GETTER(Float32)
-DECLARE_MULTIPLE_GETTER(Float64)
-DECLARE_MULTIPLE_GETTER(Decimal32)
-DECLARE_MULTIPLE_GETTER(Decimal64)
-DECLARE_MULTIPLE_GETTER(Decimal128)
-#undef DECLARE_MULTIPLE_GETTER
-
-void RangeHashedDictionary::getString(
+ColumnPtr RangeHashedDictionary::getColumn(
     const std::string & attribute_name,
-    const PaddedPODArray<Key> & ids,
-    const PaddedPODArray<RangeStorageType> & dates,
-    ColumnString * out) const
+    const DataTypePtr &,
+    const Columns & key_columns,
+    const DataTypes &,
+    const ColumnPtr default_untyped) const
 {
-    const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::utString);
-    const auto & attr = *std::get<Ptr<StringRef>>(attribute.maps);
-    const auto & null_value = std::get<String>(attribute.null_values);
+    /// TODO: Validate input types
 
-    for (const auto i : ext::range(0, ids.size()))
+    ColumnPtr result;
+
+    const auto & attribute = getAttribute(attribute_name);
+
+    /// TODO: Check that attribute type is same as result type
+
+    auto size = key_columns.front()->size();
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        const auto * it = attr.find(ids[i]);
-        if (it)
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
         {
-            const auto date = dates[i];
-            const auto & ranges_and_values = it->getMapped();
-            const auto val_it
-                = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
-                  {
-                      return v.range.contains(date);
-                  });
+            auto column_string = ColumnString::create();
+            auto out = column_string.get();
 
-            const auto string_ref = val_it != std::end(ranges_and_values) ? val_it->value : StringRef{null_value};
-            out->insertData(string_ref.data, string_ref.size);
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                {
+                    getItemsImpl<StringRef, StringRef>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t row) { return default_col->getDataAt(row); });
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<String>();
+
+                    getItemsImpl<StringRef, StringRef>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t) { return def; });
+                }
+            }
+            else
+            {
+                const auto & null_value = std::get<StringRef>(attribute.null_values);
+
+                getItemsImpl<StringRef, StringRef>(
+                    attribute,
+                    key_columns,
+                    [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column_string);
         }
-        else
-            out->insertData(null_value.data(), null_value.size());
-    }
+        else if constexpr (IsNumber<AttributeType>)
+        {
+            auto column = ColumnVector<AttributeType>::create(size);
+            auto& out = column->getData();
 
-    query_count.fetch_add(ids.size(), std::memory_order_relaxed);
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    key_columns,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column);
+        }
+        else if constexpr (IsDecimalNumber<AttributeType>)
+        {
+            // auto scale = getDecimalScale(*attribute.type);
+            auto column = ColumnDecimal<AttributeType>::create(size, 0);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    key_columns,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; }
+                );
+            }
+
+            result = std::move(column);
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+   
+    return result;
 }
 
+ColumnUInt8::Ptr RangeHashedDictionary::has(const Columns &, const DataTypes &) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+        "Has not supported", getDictionaryID().getNameForLogs());
+}
 
 void RangeHashedDictionary::createAttributes()
 {
@@ -220,66 +318,84 @@ void RangeHashedDictionary::addAttributeSize(const Attribute & attribute)
     bucket_count = map_ref->getBufferSizeInCells();
 }
 
+template <>
+void RangeHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
+{
+    const auto & map_ref = std::get<Ptr<StringRef>>(attribute.maps);
+    bytes_allocated += sizeof(Collection<StringRef>) + map_ref->getBufferSizeInBytes();
+    bucket_count = map_ref->getBufferSizeInCells();
+    bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
+}
+
 void RangeHashedDictionary::calculateBytesAllocated()
 {
     bytes_allocated += attributes.size() * sizeof(attributes.front());
 
     for (const auto & attribute : attributes)
     {
-        switch (attribute.type)
+        auto type_call = [&](const auto & dictionary_attribute_type) 
         {
-            case AttributeUnderlyingType::utUInt8:
-                addAttributeSize<UInt8>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt16:
-                addAttributeSize<UInt16>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt32:
-                addAttributeSize<UInt32>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt64:
-                addAttributeSize<UInt64>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt128:
-                addAttributeSize<UInt128>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt8:
-                addAttributeSize<Int8>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt16:
-                addAttributeSize<Int16>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt32:
-                addAttributeSize<Int32>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt64:
-                addAttributeSize<Int64>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat32:
-                addAttributeSize<Float32>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat64:
-                addAttributeSize<Float64>(attribute);
-                break;
+            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+            using AttributeType = typename Type::AttributeType;
+            addAttributeSize<AttributeType>(attribute);
+        };
 
-            case AttributeUnderlyingType::utDecimal32:
-                addAttributeSize<Decimal32>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal64:
-                addAttributeSize<Decimal64>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal128:
-                addAttributeSize<Decimal128>(attribute);
-                break;
+        callOnDictionaryAttributeType(attribute.type, type_call);
 
-            case AttributeUnderlyingType::utString:
-            {
-                addAttributeSize<StringRef>(attribute);
-                bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
+        // switch (attribute.type)
+        // {
+        //     case AttributeUnderlyingType::utUInt8:
+        //         addAttributeSize<UInt8>(attribute);
+        //         break;
+        //     case AttributeUnderlyingType::utUInt16:
+        //         addAttributeSize<UInt16>(attribute);
+        //         break;
+        //     case AttributeUnderlyingType::utUInt32:
+        //         addAttributeSize<UInt32>(attribute);
+        //         break;
+        //     case AttributeUnderlyingType::utUInt64:
+        //         addAttributeSize<UInt64>(attribute);
+        //         break;
+        //     case AttributeUnderlyingType::utUInt128:
+        //         addAttributeSize<UInt128>(attribute);
+        //         break;
+        //     case AttributeUnderlyingType::utInt8:
+        //         addAttributeSize<Int8>(attribute);
+        //         break;
+        //     case AttributeUnderlyingType::utInt16:
+        //         addAttributeSize<Int16>(attribute);
+        //         break;
+        //     case AttributeUnderlyingType::utInt32:
+        //         addAttributeSize<Int32>(attribute);
+        //         break;
+        //     case AttributeUnderlyingType::utInt64:
+        //         addAttributeSize<Int64>(attribute);
+        //         break;
+        //     case AttributeUnderlyingType::utFloat32:
+        //         addAttributeSize<Float32>(attribute);
+        //         break;
+        //     case AttributeUnderlyingType::utFloat64:
+        //         addAttributeSize<Float64>(attribute);
+        //         break;
 
-                break;
-            }
-        }
+        //     case AttributeUnderlyingType::utDecimal32:
+        //         addAttributeSize<Decimal32>(attribute);
+        //         break;
+        //     case AttributeUnderlyingType::utDecimal64:
+        //         addAttributeSize<Decimal64>(attribute);
+        //         break;
+        //     case AttributeUnderlyingType::utDecimal128:
+        //         addAttributeSize<Decimal128>(attribute);
+        //         break;
+
+        //     case AttributeUnderlyingType::utString:
+        //     {
+        //         addAttributeSize<StringRef>(attribute);
+        //         bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
+
+        //         break;
+        //     }
+        // }
     }
 }
 
@@ -290,113 +406,54 @@ void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Fie
     attribute.maps = std::make_unique<Collection<T>>();
 }
 
+template <>
+void RangeHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
+{
+    attribute.string_arena = std::make_unique<Arena>();
+    const String & string = null_value.get<String>();
+    const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
+    attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
+    attribute.maps = std::make_unique<Collection<StringRef>>();
+}
+
 RangeHashedDictionary::Attribute
 RangeHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
 {
     Attribute attr{type, {}, {}, {}};
 
-    switch (type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        createAttributeImpl<AttributeType>(attr, null_value);
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
-
-        case AttributeUnderlyingType::utString:
-        {
-            attr.null_values = null_value.get<String>();
-            attr.maps = std::make_unique<Collection<StringRef>>();
-            attr.string_arena = std::make_unique<Arena>();
-            break;
-        }
-    }
+    callOnDictionaryAttributeType(type, type_call);
 
     return attr;
 }
 
-
-template <typename OutputType>
-void RangeHashedDictionary::getItems(
-    const Attribute & attribute,
-    const PaddedPODArray<Key> & ids,
-    const PaddedPODArray<RangeStorageType> & dates,
-    PaddedPODArray<OutputType> & out) const
-{
-    if (false) {} // NOLINT
-#define DISPATCH(TYPE) else if (attribute.type == AttributeUnderlyingType::ut##TYPE) getItemsImpl<TYPE, OutputType>(attribute, ids, dates, out);
-    DISPATCH(UInt8)
-    DISPATCH(UInt16)
-    DISPATCH(UInt32)
-    DISPATCH(UInt64)
-    DISPATCH(UInt128)
-    DISPATCH(Int8)
-    DISPATCH(Int16)
-    DISPATCH(Int32)
-    DISPATCH(Int64)
-    DISPATCH(Float32)
-    DISPATCH(Float64)
-    DISPATCH(Decimal32)
-    DISPATCH(Decimal64)
-    DISPATCH(Decimal128)
-#undef DISPATCH
-    else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
-}
-
-template <typename AttributeType, typename OutputType>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
 void RangeHashedDictionary::getItemsImpl(
     const Attribute & attribute,
-    const PaddedPODArray<Key> & ids,
-    const PaddedPODArray<RangeStorageType> & dates,
-    PaddedPODArray<OutputType> & out) const
+    const Columns & key_columns,
+    ValueSetter && set_value,
+    DefaultGetter && get_default) const
 {
-    const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
-    const auto null_value = std::get<AttributeType>(attribute.null_values);
+    PaddedPODArray<Key> key_backup_storage;
+    PaddedPODArray<RangeStorageType> range_backup_storage;
 
-    for (const auto i : ext::range(0, ids.size()))
+    const PaddedPODArray<Key> & ids = getColumnDataAsPaddedPODArray(this, key_columns[0], key_backup_storage);
+    const PaddedPODArray<RangeStorageType> & dates = getColumnDataAsPaddedPODArray(this, key_columns[1], range_backup_storage);
+
+    const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
+
+    for (const auto row : ext::range(0, ids.size()))
     {
-        const auto it = attr.find(ids[i]);
+        const auto it = attr.find(ids[row]);
         if (it)
         {
-            const auto date = dates[i];
+            const auto date = dates[row];
             const auto & ranges_and_values = it->getMapped();
             const auto val_it
                 = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
@@ -404,11 +461,11 @@ void RangeHashedDictionary::getItemsImpl(
                       return v.range.contains(date);
                   });
 
-            out[i] = static_cast<OutputType>(val_it != std::end(ranges_and_values) ? val_it->value : null_value); // NOLINT
+            set_value(row, static_cast<OutputType>(val_it != std::end(ranges_and_values) ? val_it->value : get_default(row))); // NOLINT
         }
         else
         {
-            out[i] = static_cast<OutputType>(null_value); // NOLINT
+            set_value(row, get_default(row));
         }
     }
 
diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h
index 46ae0390b6a..5588bdb3ced 100644
--- a/src/Dictionaries/RangeHashedDictionary.h
+++ b/src/Dictionaries/RangeHashedDictionary.h
@@ -52,38 +52,18 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    typedef Int64 RangeStorageType;
+    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::range; }
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_untyped) const override;
 
-#define DECLARE_MULTIPLE_GETTER(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<RangeStorageType> & dates, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE_MULTIPLE_GETTER(UInt8)
-    DECLARE_MULTIPLE_GETTER(UInt16)
-    DECLARE_MULTIPLE_GETTER(UInt32)
-    DECLARE_MULTIPLE_GETTER(UInt64)
-    DECLARE_MULTIPLE_GETTER(UInt128)
-    DECLARE_MULTIPLE_GETTER(Int8)
-    DECLARE_MULTIPLE_GETTER(Int16)
-    DECLARE_MULTIPLE_GETTER(Int32)
-    DECLARE_MULTIPLE_GETTER(Int64)
-    DECLARE_MULTIPLE_GETTER(Float32)
-    DECLARE_MULTIPLE_GETTER(Float64)
-    DECLARE_MULTIPLE_GETTER(Decimal32)
-    DECLARE_MULTIPLE_GETTER(Decimal64)
-    DECLARE_MULTIPLE_GETTER(Decimal128)
-#undef DECLARE_MULTIPLE_GETTER
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
 
-    void getString(
-        const std::string & attribute_name,
-        const PaddedPODArray<Key> & ids,
-        const PaddedPODArray<RangeStorageType> & dates,
-        ColumnString * out) const;
+    using RangeStorageType = Int64;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -130,7 +110,7 @@ private:
             Decimal128,
             Float32,
             Float64,
-            String>
+            StringRef>
             null_values;
         std::variant<
             Ptr<UInt8>,
@@ -166,21 +146,12 @@ private:
 
     Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
 
-
-    template <typename OutputType>
-    void getItems(
-        const Attribute & attribute,
-        const PaddedPODArray<Key> & ids,
-        const PaddedPODArray<RangeStorageType> & dates,
-        PaddedPODArray<OutputType> & out) const;
-
-    template <typename AttributeType, typename OutputType>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
     void getItemsImpl(
         const Attribute & attribute,
-        const PaddedPODArray<Key> & ids,
-        const PaddedPODArray<RangeStorageType> & dates,
-        PaddedPODArray<OutputType> & out) const;
-
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DefaultGetter && get_default) const;
 
     template <typename T>
     void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value);
diff --git a/src/Functions/FunctionsExternalDictionaries.cpp b/src/Functions/FunctionsExternalDictionaries.cpp
index 3d536630d7a..dbdbaf0e22a 100644
--- a/src/Functions/FunctionsExternalDictionaries.cpp
+++ b/src/Functions/FunctionsExternalDictionaries.cpp
@@ -38,8 +38,8 @@ void registerFunctionsExternalDictionaries(FunctionFactory & factory)
     factory.registerFunction<FunctionDictGetDateTimeOrDefault>();
     factory.registerFunction<FunctionDictGetUUIDOrDefault>();
     factory.registerFunction<FunctionDictGetStringOrDefault>();
-    factory.registerFunction<FunctionDictGetNoType>();
-    factory.registerFunction<FunctionDictGetNoTypeOrDefault>();
+    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::withoutDefault>>();
+    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::withDefault>>();
 }
 
 }
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 06c8cd5d650..5b20a4dfc08 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -104,8 +104,8 @@ public:
         if (!sample_columns)
             return false;
 
-        if (sample_columns.columns() != 3 && sample_columns.columns() != 4)
-            throw Exception{"Function dictGet... takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+        if (sample_columns.columns() < 3)
+            throw Exception{"Wrong arguments count", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
 
         const auto * dict_name_col = checkAndGetColumnConst<ColumnString>(sample_columns.getByPosition(0).column.get());
         if (!dict_name_col)
@@ -121,7 +121,6 @@ public:
 private:
     const Context & context;
     const ExternalDictionariesLoader & external_loader;
-    mutable std::shared_ptr<const IDictionaryBase> dictionary;
     /// Access cannot be not granted, since in this case checkAccess() will throw and access_checked will not be updated.
     std::atomic<bool> access_checked = false;
 };
@@ -178,19 +177,26 @@ private:
         auto dictionary = helper.getDictionary(arguments[0]);
         auto dictionary_identifier_type = dictionary->getIdentifierType();
 
-        const auto id_col_untyped = arguments[1].column;
+        const ColumnWithTypeAndName & key_column_with_type = arguments[1];
+        const auto key_column = key_column_with_type.column;
 
         if (dictionary_identifier_type == DictionaryIdentifierType::simple)
         {
-            return dictionary->has({ id_col_untyped }, { std::make_shared<DataTypeUInt64>() });
+            return dictionary->has({key_column}, {std::make_shared<DataTypeUInt64>()});
         }
         else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
         {
-            /// TODO: Check if column is tuple and pass
-            return nullptr;
+            /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
+            ColumnPtr key_column_full = key_column_with_type.column->convertToFullColumnIfConst();
+
+            const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_column_full).getColumnsCopy();
+            const auto & key_types = static_cast<const DataTypeTuple &>(*key_column_with_type.type).getElements();
+
+            return dictionary->has(key_columns, key_types);
         }
         else
         {
+            /// TODO: Add support for range
             return nullptr;
         }
     }
@@ -198,13 +204,6 @@ private:
     mutable FunctionDictHelper helper;
 };
 
-
-/** For ColumnVector. Either returns a reference to internal data,
-  *  or convert it to T type, stores the result in backup_storage and returns a reference to it.
-  */
-template <typename T>
-static const PaddedPODArray<T> & getColumnDataAsPaddedPODArray(const IColumn & column, PaddedPODArray<T> & backup_storage);
-
 enum class DictionaryGetFunctionType
 {
     withoutDefault,
@@ -232,12 +231,9 @@ public:
     String getName() const override { return name; }
 
 private:
-    size_t getNumberOfArguments() const override {
-        /// TODO: Check if ranged dictionary is working
-        return dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault ? 0 : 4; 
-    }
+    size_t getNumberOfArguments() const override { return 0; }
 
-    bool isVariadic() const override { return dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault; }
+    bool isVariadic() const override { return true; }
 
     bool useDefaultImplementationForConstants() const final { return true; }
 
@@ -245,38 +241,8 @@ private:
 
     ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    DataTypePtr getReturnTypeImpl(const DataTypes &) const override
     {
-        if (!isString(arguments[0]))
-            throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName()
-                + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!isString(arguments[1]))
-            throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName()
-                + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!WhichDataType(arguments[2]).isUInt64() &&
-            !isTuple(arguments[2]))
-            throw Exception{"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName()
-                + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::withDefault)
-        {
-            if (!checkAndGetDataType<DataType>(arguments[3].get()))
-                throw Exception{"Illegal type " + arguments[3]->getName() + " of fourth argument of function " + getName()
-                    + ", must be " + TypeName<Type>::get() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-        }
-        else
-        {
-            /// This is for the case of range dictionaries_loader.
-            if (arguments.size() == 4 && !arguments[3]->isValueRepresentedByInteger())
-            {
-                throw Exception{"Illegal type " + arguments[3]->getName() +
-                                " of fourth argument of function " + getName() +
-                                " must be convertible to Int64.", ErrorCodes::ILLEGAL_COLUMN};
-            }
-        }
-
         if constexpr (IsDataTypeDecimal<DataType>)
             return std::make_shared<DataType>(DataType::maxPrecision(), decimal_scale);
         else
@@ -285,11 +251,21 @@ private:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
     {
+        if (arguments.size() < 3)
+            throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+
         if (input_rows_count == 0)
             return result_type->createColumn();
 
-        auto dictionary = helper.getDictionary(arguments[0]);
-        ColumnPtr res;
+        const auto * dictionary_name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
+        if (!dictionary_name_col)
+            throw Exception{"First argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
+
+        String dictionary_name = dictionary_name_col->getValue<String>();
+
+        auto dictionary = helper.getDictionary(dictionary_name);
+        if (!dictionary)
+            throw Exception("First argument of function " + getName() + " does not name a dictionary", ErrorCodes::ILLEGAL_COLUMN);
 
         const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
         if (!attr_name_col)
@@ -297,21 +273,57 @@ private:
 
         String attr_name = attr_name_col->getValue<String>();
 
-        const ColumnWithTypeAndName & key_col_with_type = arguments[2];
-        const auto key_column = key_col_with_type.column;
+        /// TODO: Use accurateCast if argument is integer
+        if (!WhichDataType(arguments[2].type).isUInt64() && !isTuple(arguments[2].type))
+            throw Exception{
+                "Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName()
+                    + ", must be UInt64 or tuple(...).",
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
 
         auto dictionary_identifier_type = dictionary->getIdentifierType();
-        
+
+        size_t current_arguments_index = 3;
+
+        /// TODO: Add more information to error messages
+
+        ColumnPtr range_col = nullptr;
+        DataTypePtr range_col_type = nullptr;
+
+        if (dictionary_identifier_type == DictionaryIdentifierType::range)
+        {
+            if (current_arguments_index >= arguments.size())
+                throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+
+            range_col = arguments[current_arguments_index].column;
+            range_col_type = arguments[current_arguments_index].type;
+
+            if (!(range_col_type->isValueRepresentedByInteger() && range_col_type->getSizeOfValueInMemory() <= sizeof(Int64)))
+                throw Exception{
+                    "Illegal type " + range_col_type->getName() + " of fourth argument of function " + getName()
+                        + " must be convertible to Int64.",
+                    ErrorCodes::ILLEGAL_COLUMN};
+
+            ++current_arguments_index;
+        }
+
         ColumnPtr default_col = nullptr;
 
         if (dictionary_get_function_type == DictionaryGetFunctionType::withDefault)
         {
-            default_col = arguments[3].column;
+            if (current_arguments_index >= arguments.size())
+                throw Exception{"Wrong argument count for function test " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+
+            default_col = arguments[current_arguments_index].column;
         }
 
+        ColumnPtr res;
+
+        const ColumnWithTypeAndName & key_col_with_type = arguments[2];
+        const auto key_column = key_col_with_type.column;
+
         if (dictionary_identifier_type == DictionaryIdentifierType::simple)
         {
-            res = dictionary->getColumn(attr_name, result_type, { key_column }, { std::make_shared<DataTypeUInt64>() }, default_col);
+            res = dictionary->getColumn(attr_name, result_type, {key_column}, {std::make_shared<DataTypeUInt64>()}, default_col);
         }
         else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
         {
@@ -323,10 +335,13 @@ private:
 
             res = dictionary->getColumn(attr_name, result_type, key_columns, key_types, default_col);
         }
-        else
+        else if (dictionary_identifier_type == DictionaryIdentifierType::range)
         {
-            res = nullptr;
+            res = dictionary->getColumn(
+                attr_name, result_type, {key_column, range_col}, {std::make_shared<DataTypeUInt64>(), range_col_type}, default_col);
         }
+        else
+            throw Exception{"Unknown dictionary identifier type", ErrorCodes::BAD_ARGUMENTS};
 
         return res;
     }
@@ -416,10 +431,11 @@ using FunctionDictGetStringOrDefault = FunctionDictGetOrDefault<DataTypeString,
 
 /// TODO: Use new API
 /// This variant of function derives the result type automatically.
+template <DictionaryGetFunctionType dictionary_get_function_type>
 class FunctionDictGetNoType final : public IFunction
 {
 public:
-    static constexpr auto name = "dictGet";
+    static constexpr auto name = dictionary_get_function_type == DictionaryGetFunctionType::withDefault ? "dictGetOrDefault" : "dictGet";
 
     static FunctionPtr create(const Context & context)
     {
@@ -437,6 +453,8 @@ private:
     bool useDefaultImplementationForConstants() const final { return true; }
     ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
 
+    bool isDeterministic() const override { return false; }
+
     bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override
     {
         return helper.isDictGetFunctionInjective(sample_columns);
@@ -444,8 +462,8 @@ private:
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        if (arguments.size() != 3 && arguments.size() != 4)
-            throw Exception{"Function " + getName() + " takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+        if (arguments.size() < 3)
+            throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
 
         String dict_name;
         if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
@@ -465,29 +483,20 @@ private:
             throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName()
                 + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
 
-        if (!WhichDataType(arguments[2].type).isUInt64() &&
-            !isTuple(arguments[2].type))
-            throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName()
-                + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (arguments.size() == 4)
-        {
-            const auto * range_argument = arguments[3].type.get();
-            if (!(range_argument->isValueRepresentedByInteger() &&
-                   range_argument->getSizeOfValueInMemory() <= sizeof(Int64)))
-                throw Exception{"Illegal type " + range_argument->getName() + " of fourth argument of function " + getName()
-                    + ", must be convertible to " + TypeName<Int64>::get() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-        }
-
         auto dict = helper.getDictionary(dict_name);
         const DictionaryStructure & structure = dict->getStructure();
 
-        for (const auto idx : ext::range(0, structure.attributes.size()))
+        for (const auto& attribute : structure.attributes)
         {
-            const DictionaryAttribute & attribute = structure.attributes[idx];
-            if (attribute.name == attr_name)
+            if (attribute.name != attr_name)
+            {
+                continue;
+            }
+
+            WhichDataType dt = attribute.type;
+
+            if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault)
             {
-                WhichDataType dt = attribute.type;
                 switch (dt.idx)
                 {
                     case TypeIndex::String:
@@ -545,90 +554,9 @@ private:
                     default:
                         throw Exception("Unknown dictGet type", ErrorCodes::UNKNOWN_TYPE);
                 }
-                return attribute.type;
             }
-        }
-        throw Exception{"No such attribute '" + attr_name + "'", ErrorCodes::BAD_ARGUMENTS};
-    }
-
-    bool isDeterministic() const override { return false; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
-    {
-        return impl->executeImpl(arguments, result_type, input_rows_count);
-    }
-
-    const Context & context;
-    mutable FunctionDictHelper helper;
-    mutable FunctionPtr impl; // underlying function used by dictGet function without explicit type info
-};
-
-
-class FunctionDictGetNoTypeOrDefault final : public IFunction
-{
-public:
-    static constexpr auto name = "dictGetOrDefault";
-
-    static FunctionPtr create(const Context & context)
-    {
-        return std::make_shared<FunctionDictGetNoTypeOrDefault>(context);
-    }
-
-    explicit FunctionDictGetNoTypeOrDefault(const Context & context_) : context(context_), helper(context_) {}
-
-    String getName() const override { return name; }
-
-private:
-    size_t getNumberOfArguments() const override { return 4; }
-
-    bool useDefaultImplementationForConstants() const final { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
-
-    bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override
-    {
-        return helper.isDictGetFunctionInjective(sample_columns);
-    }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        String dict_name;
-        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
-        {
-            dict_name = name_col->getValue<String>();
-        }
-        else
-            throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
-                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        String attr_name;
-        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
-        {
-            attr_name = name_col->getValue<String>();
-        }
-        else
-            throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName()
-                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        if (!WhichDataType(arguments[2].type).isUInt64() &&
-            !isTuple(arguments[2].type))
-            throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName()
-                + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        auto dict = helper.getDictionary(dict_name);
-        const DictionaryStructure & structure = dict->getStructure();
-
-        for (const auto idx : ext::range(0, structure.attributes.size()))
-        {
-            const DictionaryAttribute & attribute = structure.attributes[idx];
-            if (attribute.name == attr_name)
+            else
             {
-                auto arg_type = arguments[3].type;
-                WhichDataType dt = attribute.type;
-
-                if ((arg_type->getTypeId() != dt.idx) || (dt.isStringOrFixedString() && !isString(arg_type)))
-                    throw Exception{"Illegal type " + arg_type->getName() + " of fourth argument of function " + getName() +
-                        ", must be " + getTypeName(dt.idx) + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
                 switch (dt.idx)
                 {
                     case TypeIndex::String:
@@ -685,15 +613,14 @@ private:
                     default:
                         throw Exception("Unknown dictGetOrDefault type", ErrorCodes::UNKNOWN_TYPE);
                 }
-
-                return attribute.type;
             }
+
+            return attribute.type;
         }
+
         throw Exception{"No such attribute '" + attr_name + "'", ErrorCodes::BAD_ARGUMENTS};
     }
 
-    bool isDeterministic() const override { return false; }
-
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
     {
         return impl->executeImpl(arguments, result_type, input_rows_count);
@@ -1004,27 +931,4 @@ private:
     mutable FunctionDictHelper helper;
 };
 
-
-template <typename T>
-static const PaddedPODArray<T> & getColumnDataAsPaddedPODArray(const IColumn & column, PaddedPODArray<T> & backup_storage)
-{
-    if (!isColumnConst(column))
-    {
-        if (const auto vector_col = checkAndGetColumn<ColumnVector<T>>(&column))
-        {
-            return vector_col->getData();
-        }
-    }
-
-    const auto full_column = column.convertToFullColumnIfConst();
-
-    // With type conversion and const columns we need to use backup storage here
-    const auto size = full_column->size();
-    backup_storage.resize(size);
-    for (size_t i = 0; i < size; ++i)
-        backup_storage[i] = full_column->getUInt(i);
-
-    return backup_storage;
-}
-
 }

From cc767d4f2e354fd7ea2a5f4783102f86bb1b3d22 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 21 Dec 2020 17:39:15 +0300
Subject: [PATCH 0218/1238] Updated HashedDictionary to new interface

---
 src/Dictionaries/FlatDictionary.cpp   |   5 +-
 src/Dictionaries/HashedDictionary.cpp | 586 ++++++++++----------------
 src/Dictionaries/HashedDictionary.h   |  79 +---
 3 files changed, 243 insertions(+), 427 deletions(-)

diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 58e54931dee..790c0100d24 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -109,10 +109,9 @@ ColumnPtr FlatDictionary::getColumn(
         const std::string& attribute_name,
         const DataTypePtr &,
         const Columns & key_columns,
-        const DataTypes & key_types,
+        const DataTypes &,
         const ColumnPtr default_untyped) const
 {
-    dict_struct.validateKeyTypes(key_types);
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
@@ -263,8 +262,6 @@ ColumnPtr FlatDictionary::getColumn(
 
 ColumnUInt8::Ptr FlatDictionary::has(const Columns & key_columns, const DataTypes &) const
 {
-    assert(!key_columns.empty());
-
     PaddedPODArray<Key> backup_storage;
     const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
 
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 34fa227e546..3034d585f88 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -4,7 +4,8 @@
 #include "DictionaryFactory.h"
 #include "ClickHouseDictionarySource.h"
 #include <Core/Defines.h>
-
+#include <Functions/FunctionHelpers.h>
+#include <Columns/ColumnsNumber.h>
 
 namespace
 {
@@ -125,183 +126,185 @@ void HashedDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
     isInImpl(child_id, ancestor_ids, out);
 }
 
-
-#define DECLARE(TYPE) \
-    void HashedDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
-        const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void HashedDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
+ColumnPtr HashedDictionary::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr &,
+    const Columns & key_columns,
+    const DataTypes &,
+    const ColumnPtr default_untyped) const
 {
+    // dict_struct.validateKeyTypes(key_types);
+    ColumnPtr result;
+
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    
     const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
 
-    const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
+    /// TODO: Check that attribute type is same as result type
+    /// TODO: Check if const will work as expected
 
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return null_value; });
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        auto size = ids.size();
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto column_string = ColumnString::create();
+            auto out = column_string.get();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                {
+                    getItemsImpl<StringRef, StringRef>(
+                        attribute,
+                        ids,
+                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t row) { return default_col->getDataAt(row); });
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<String>();
+
+                    getItemsImpl<StringRef, StringRef>(
+                        attribute,
+                        ids,
+                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t) { return def; });
+                }
+            }
+            else
+            {
+                const auto & null_value = std::get<StringRef>(attribute.null_values);
+
+                getItemsImpl<StringRef, StringRef>(
+                    attribute,
+                    ids,
+                    [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column_string);
+        }
+        else if constexpr (IsNumber<AttributeType>)
+        {
+            auto column = ColumnVector<AttributeType>::create(size);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    ids,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column);
+        }
+        else if constexpr (IsDecimalNumber<AttributeType>)
+        {
+            // auto scale = getDecimalScale(*attribute.type);
+            auto column = ColumnDecimal<AttributeType>::create(size, 0);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    ids,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; }
+                );
+            }
+
+            result = std::move(column);
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+   
+    return result;
 }
 
-#define DECLARE(TYPE) \
-    void HashedDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void HashedDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
+ColumnUInt8::Ptr HashedDictionary::has(const Columns & key_columns, const DataTypes &) const
 {
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
 
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t row) { return def->getDataAt(row); });
-}
+    size_t ids_count = ext::size(ids);
 
-#define DECLARE(TYPE) \
-    void HashedDictionary::get##TYPE( \
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
+    auto result = ColumnUInt8::create(ext::size(ids));
+    auto& out = result->getData();
 
-void HashedDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
-{
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return StringRef{def}; });
-}
-
-void HashedDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
-{
     const auto & attribute = attributes.front();
 
-    switch (attribute.type)
+    auto type_call = [&](const auto & dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            has<UInt8>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            has<UInt16>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            has<UInt32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            has<UInt64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            has<UInt128>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            has<Int8>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            has<Int16>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            has<Int32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            has<Int64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            has<Float32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            has<Float64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utString:
-            has<StringRef>(attribute, ids, out);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        has<AttributeType>(attribute, ids, out);
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            has<Decimal32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            has<Decimal64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            has<Decimal128>(attribute, ids, out);
-            break;
-    }
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    query_count.fetch_add(ids_count, std::memory_order_relaxed);
+
+    return result;
 }
 
 void HashedDictionary::createAttributes()
@@ -429,6 +432,13 @@ void HashedDictionary::resize(Attribute & attribute, size_t added_rows)
         map_ref->resize(added_rows);
     }
 }
+
+template <>
+void HashedDictionary::resize<String>(Attribute & attribute, size_t added_rows)
+{
+    resize<StringRef>(attribute, added_rows);
+}
+
 void HashedDictionary::resize(size_t added_rows)
 {
     if (!added_rows)
@@ -436,56 +446,14 @@ void HashedDictionary::resize(size_t added_rows)
 
     for (auto & attribute : attributes)
     {
-        switch (attribute.type)
+        auto type_call = [&](const auto & dictionary_attribute_type)
         {
-            case AttributeUnderlyingType::utUInt8:
-                resize<UInt8>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utUInt16:
-                resize<UInt16>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utUInt32:
-                resize<UInt32>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utUInt64:
-                resize<UInt64>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utUInt128:
-                resize<UInt128>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utInt8:
-                resize<Int8>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utInt16:
-                resize<Int16>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utInt32:
-                resize<Int32>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utInt64:
-                resize<Int64>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utFloat32:
-                resize<Float32>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utFloat64:
-                resize<Float64>(attribute, added_rows);
-                break;
+            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+            using AttributeType = typename Type::AttributeType;
+            resize<AttributeType>(attribute, added_rows);
+        };
 
-            case AttributeUnderlyingType::utDecimal32:
-                resize<Decimal32>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utDecimal64:
-                resize<Decimal64>(attribute, added_rows);
-                break;
-            case AttributeUnderlyingType::utDecimal128:
-                resize<Decimal128>(attribute, added_rows);
-                break;
-
-            case AttributeUnderlyingType::utString:
-                resize<StringRef>(attribute, added_rows);
-                break;
-        }
+        callOnDictionaryAttributeType(attribute.type, type_call);
     }
 }
 
@@ -562,66 +530,27 @@ void HashedDictionary::addAttributeSize(const Attribute & attribute)
     }
 }
 
+template <>
+void HashedDictionary::addAttributeSize<String>(const Attribute & attribute)
+{
+    addAttributeSize<StringRef>(attribute);
+    bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
+}
+
 void HashedDictionary::calculateBytesAllocated()
 {
     bytes_allocated += attributes.size() * sizeof(attributes.front());
 
     for (const auto & attribute : attributes)
     {
-        switch (attribute.type)
+        auto type_call = [&](const auto & dictionary_attribute_type)
         {
-            case AttributeUnderlyingType::utUInt8:
-                addAttributeSize<UInt8>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt16:
-                addAttributeSize<UInt16>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt32:
-                addAttributeSize<UInt32>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt64:
-                addAttributeSize<UInt64>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt128:
-                addAttributeSize<UInt128>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt8:
-                addAttributeSize<Int8>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt16:
-                addAttributeSize<Int16>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt32:
-                addAttributeSize<Int32>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt64:
-                addAttributeSize<Int64>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat32:
-                addAttributeSize<Float32>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat64:
-                addAttributeSize<Float64>(attribute);
-                break;
+            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+            using AttributeType = typename Type::AttributeType;
+            addAttributeSize<AttributeType>(attribute);
+        };
 
-            case AttributeUnderlyingType::utDecimal32:
-                addAttributeSize<Decimal32>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal64:
-                addAttributeSize<Decimal64>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal128:
-                addAttributeSize<Decimal128>(attribute);
-                break;
-
-            case AttributeUnderlyingType::utString:
-            {
-                addAttributeSize<StringRef>(attribute);
-                bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
-
-                break;
-            }
-        }
+        callOnDictionaryAttributeType(attribute.type, type_call);
     }
 }
 
@@ -635,67 +564,32 @@ void HashedDictionary::createAttributeImpl(Attribute & attribute, const Field &
         attribute.sparse_maps = std::make_unique<SparseCollectionType<T>>();
 }
 
+template <>
+void HashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
+{
+    attribute.string_arena = std::make_unique<Arena>();
+    const String & string = null_value.get<String>();
+    const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
+    attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
+
+    if (!sparse)
+        attribute.maps = std::make_unique<CollectionType<StringRef>>();
+    else
+        attribute.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
+}
+
 HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
 {
     Attribute attr{type, {}, {}, {}, {}};
 
-    switch (type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        createAttributeImpl<AttributeType>(attr, null_value);
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
-
-        case AttributeUnderlyingType::utString:
-        {
-            attr.null_values = null_value.get<String>();
-            if (!sparse)
-                attr.maps = std::make_unique<CollectionType<StringRef>>();
-            else
-                attr.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
-            attr.string_arena = std::make_unique<Arena>();
-            break;
-        }
-    }
+    callOnDictionaryAttributeType(type, type_call);
 
     return attr;
 }
@@ -811,8 +705,12 @@ void HashedDictionary::has(const Attribute & attribute, const PaddedPODArray<Key
 
     for (const auto i : ext::range(0, rows))
         out[i] = attr.find(ids[i]) != nullptr;
+}
 
-    query_count.fetch_add(rows, std::memory_order_relaxed);
+template <>
+void HashedDictionary::has<String>(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
+{
+    has<StringRef>(attribute, ids, out);
 }
 
 template <typename T, typename AttrType>
@@ -833,45 +731,27 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute &
     return getIdsAttrImpl<T>(*std::get<SparseCollectionPtrType<T>>(attribute.sparse_maps));
 }
 
+template <>
+PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds<String>(const Attribute & attribute) const
+{
+    return getIds<StringRef>(attribute);
+} 
+
 PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
 {
     const auto & attribute = attributes.front();
+    PaddedPODArray<HashedDictionary::Key> result;
 
-    switch (attribute.type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            return getIds<UInt8>(attribute);
-        case AttributeUnderlyingType::utUInt16:
-            return getIds<UInt16>(attribute);
-        case AttributeUnderlyingType::utUInt32:
-            return getIds<UInt32>(attribute);
-        case AttributeUnderlyingType::utUInt64:
-            return getIds<UInt64>(attribute);
-        case AttributeUnderlyingType::utUInt128:
-            return getIds<UInt128>(attribute);
-        case AttributeUnderlyingType::utInt8:
-            return getIds<Int8>(attribute);
-        case AttributeUnderlyingType::utInt16:
-            return getIds<Int16>(attribute);
-        case AttributeUnderlyingType::utInt32:
-            return getIds<Int32>(attribute);
-        case AttributeUnderlyingType::utInt64:
-            return getIds<Int64>(attribute);
-        case AttributeUnderlyingType::utFloat32:
-            return getIds<Float32>(attribute);
-        case AttributeUnderlyingType::utFloat64:
-            return getIds<Float64>(attribute);
-        case AttributeUnderlyingType::utString:
-            return getIds<StringRef>(attribute);
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        result = getIds<AttributeType>(attribute);
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            return getIds<Decimal32>(attribute);
-        case AttributeUnderlyingType::utDecimal64:
-            return getIds<Decimal64>(attribute);
-        case AttributeUnderlyingType::utDecimal128:
-            return getIds<Decimal128>(attribute);
-    }
-    return PaddedPODArray<Key>();
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    return result;
 }
 
 BlockInputStreamPtr HashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index b3d1c16ed26..ff64fb29f1f 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -66,77 +66,16 @@ public:
 
     void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::simple; }
 
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_untyped) const override;
 
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void
-    getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
-        const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) \
-        const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
-
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
 
     void isInVectorVector(
         const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
@@ -180,7 +119,7 @@ private:
             Decimal128,
             Float32,
             Float64,
-            String>
+            StringRef>
             null_values;
         std::variant<
             CollectionPtrType<UInt8>,

From cd6ec3234e0ab9e37c297970de1caa27c6105b99 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sun, 27 Dec 2020 19:44:40 +0300
Subject: [PATCH 0219/1238] Updated DirectDictionary to new interface

---
 src/Dictionaries/DirectDictionary.cpp | 473 ++++++++++++--------------
 src/Dictionaries/DirectDictionary.h   |  79 +----
 2 files changed, 225 insertions(+), 327 deletions(-)

diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index 1fbfcc07215..e101055aa5c 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -3,7 +3,7 @@
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
 #include <Core/Defines.h>
-
+#include <Functions/FunctionHelpers.h>
 
 namespace DB
 {
@@ -128,184 +128,218 @@ void DirectDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
     isInImpl(child_id, ancestor_ids, out);
 }
 
-
-#define DECLARE(TYPE) \
-    void DirectDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void DirectDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
+ColumnPtr DirectDictionary::getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr &,
+        const Columns & key_columns,
+        const DataTypes &,
+        const ColumnPtr default_untyped) const
 {
+    ColumnPtr result;
+
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    
     const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
 
-    const auto & null_value = std::get<StringRef>(attribute.null_values);
-    getItemsStringImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
-        [&](const size_t) { return String(null_value.data, null_value.size); });
-}
+    /// TODO: Check that attribute type is same as result type
+    /// TODO: Check if const will work as expected
 
-#define DECLARE(TYPE) \
-    void DirectDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void DirectDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
-{
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    getItemsStringImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
-        [&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
-}
-
-#define DECLARE(TYPE) \
-    void DirectDictionary::get##TYPE( \
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void DirectDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
-{
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    DirectDictionary::getItemsStringImpl<StringRef, StringRef>(
-        attribute,
-        ids,
-        [&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
-        [&](const size_t) { return def; });
-}
-
-
-void DirectDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
-{
-    const auto & attribute = attributes.front();
-
-    switch (attribute.type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            has<UInt8>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            has<UInt16>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            has<UInt32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            has<UInt64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            has<UInt128>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            has<Int8>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            has<Int16>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            has<Int32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            has<Int64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            has<Float32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            has<Float64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utString:
-            has<String>(attribute, ids, out);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            has<Decimal32>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            has<Decimal64>(attribute, ids, out);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            has<Decimal128>(attribute, ids, out);
-            break;
-    }
+        auto size = ids.size();
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto column_string = ColumnString::create();
+            auto out = column_string.get();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                {
+                    getItemsImpl<String, String>(
+                        attribute,
+                        ids,
+                        [&](const size_t, const String value)
+                        {
+                            const auto ref = StringRef{value};
+                            out->insertData(ref.data, ref.size);
+                        },
+                        [&](const size_t row)
+                        {
+                            const auto ref = default_col->getDataAt(row);
+                            return String(ref.data, ref.size);
+                        });
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<String>();
+
+                    getItemsImpl<String, String>(
+                        attribute,
+                        ids,
+                        [&](const size_t, const String value)
+                        {
+                            const auto ref = StringRef{value};
+                            out->insertData(ref.data, ref.size);
+                        },
+                        [&](const size_t) { return def; });
+                }
+            }
+            else
+            {
+                const auto & null_value = std::get<StringRef>(attribute.null_values);
+
+                getItemsImpl<String, String>(
+                    attribute,
+                    ids,
+                    [&](const size_t, const String value)
+                    {
+                        const auto ref = StringRef{value};
+                        out->insertData(ref.data, ref.size);
+                    },
+                    [&](const size_t) { return String(null_value.data, null_value.size); });
+            }
+
+            result = std::move(column_string);
+        }
+        else if constexpr (IsNumber<AttributeType>)
+        {
+            auto column = ColumnVector<AttributeType>::create(size);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    ids,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column);
+        }
+        else if constexpr (IsDecimalNumber<AttributeType>)
+        {
+            // auto scale = getDecimalScale(*attribute.type);
+            auto column = ColumnDecimal<AttributeType>::create(size, 0);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    ids,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; }
+                );
+            }
+
+            result = std::move(column);
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+   
+    return result;
 }
 
+ColumnUInt8::Ptr DirectDictionary::has(const Columns & key_columns, const DataTypes &) const
+{
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+
+    auto result = ColumnUInt8::create(ext::size(ids));
+    auto& out = result->getData();
+
+    const auto rows = ext::size(ids);
+
+    HashMap<Key, UInt8> has_key;
+    for (const auto row : ext::range(0, rows))
+        has_key[ids[row]] = 0;
+
+    std::vector<Key> to_load;
+    to_load.reserve(has_key.size());
+    for (auto it = has_key.begin(); it != has_key.end(); ++it)
+        to_load.emplace_back(static_cast<Key>(it->getKey()));
+
+    auto stream = source_ptr->loadIds(to_load);
+    stream->readPrefix();
+
+    while (const auto block = stream->read())
+    {
+        const IColumn & id_column = *block.safeGetByPosition(0).column;
+
+        for (const auto row_idx : ext::range(0, id_column.size()))
+        {
+            const auto key = id_column[row_idx].get<UInt64>();
+            has_key[key] = 1;
+        }
+    }
+
+    stream->readSuffix();
+
+    for (const auto row : ext::range(0, rows))
+        out[row] = has_key[ids[row]];
+
+    query_count.fetch_add(rows, std::memory_order_relaxed);
+
+    return result;
+}
 
 void DirectDictionary::createAttributes()
 {
@@ -349,55 +383,14 @@ DirectDictionary::Attribute DirectDictionary::createAttributeWithType(const Attr
 {
     Attribute attr{type, {}, {}, attr_name};
 
-    switch (type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utString:
-            createAttributeImpl<String>(attr, null_value);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        createAttributeImpl<AttributeType>(attr, null_value);
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
-    }
+    callOnDictionaryAttributeType(type, type_call);
 
     return attr;
 }
@@ -427,13 +420,18 @@ void DirectDictionary::getItemsImpl(
 
         for (const size_t attribute_idx : ext::range(0, attributes.size()))
         {
+            if (attribute.name != attribute_name_by_index.at(attribute_idx))
+            {
+                continue;
+            }
+
             const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
 
             for (const auto row_idx : ext::range(0, id_column.size()))
             {
                 const auto key = id_column[row_idx].get<UInt64>();
 
-                if (value_by_key.find(key) != value_by_key.end() && attribute.name == attribute_name_by_index.at(attribute_idx))
+                if (value_by_key.find(key) != value_by_key.end())
                 {
                     if (attribute.type == AttributeUnderlyingType::utFloat32)
                     {
@@ -514,43 +512,6 @@ const DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::st
 }
 
 
-template <typename T>
-void DirectDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
-{
-    const auto rows = ext::size(ids);
-
-    HashMap<Key, UInt8> has_key;
-    for (const auto row : ext::range(0, rows))
-        has_key[ids[row]] = 0;
-
-    std::vector<Key> to_load;
-    to_load.reserve(has_key.size());
-    for (auto it = has_key.begin(); it != has_key.end(); ++it)
-        to_load.emplace_back(static_cast<Key>(it->getKey()));
-
-    auto stream = source_ptr->loadIds(to_load);
-    stream->readPrefix();
-
-    while (const auto block = stream->read())
-    {
-        const IColumn & id_column = *block.safeGetByPosition(0).column;
-
-        for (const auto row_idx : ext::range(0, id_column.size()))
-        {
-            const auto key = id_column[row_idx].get<UInt64>();
-            has_key[key] = 1;
-        }
-    }
-
-    stream->readSuffix();
-
-    for (const auto row : ext::range(0, rows))
-        out[row] = has_key[ids[row]];
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
-}
-
-
 BlockInputStreamPtr DirectDictionary::getBlockInputStream(const Names & /* column_names */, size_t /* max_block_size */) const
 {
     return source_ptr->loadAll();
diff --git a/src/Dictionaries/DirectDictionary.h b/src/Dictionaries/DirectDictionary.h
index 8b59fe16c69..1ee67190f04 100644
--- a/src/Dictionaries/DirectDictionary.h
+++ b/src/Dictionaries/DirectDictionary.h
@@ -65,76 +65,16 @@ public:
     void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
     void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::simple; }
 
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_untyped) const override;
 
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void
-    getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
-        const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
-
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -193,9 +133,6 @@ private:
 
     const Attribute & getAttribute(const std::string & attribute_name) const;
 
-    template <typename T>
-    void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
-
     Key getValueOrNullByKey(const Key & to_find) const;
 
     template <typename ChildType, typename AncestorType>

From 6017d1a034786a6e360950fce81c09cc340ec789 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sun, 27 Dec 2020 20:23:09 +0300
Subject: [PATCH 0220/1238] Updated CachedDictionary to new interface

---
 src/Dictionaries/CacheDictionary.cpp          | 165 +++++++++++++++---
 src/Dictionaries/CacheDictionary.h            |  77 ++------
 .../CacheDictionary_generate1.cpp             |  32 ----
 .../CacheDictionary_generate2.cpp             |  34 ----
 .../CacheDictionary_generate3.cpp             |  31 ----
 5 files changed, 154 insertions(+), 185 deletions(-)
 delete mode 100644 src/Dictionaries/CacheDictionary_generate1.cpp
 delete mode 100644 src/Dictionaries/CacheDictionary_generate2.cpp
 delete mode 100644 src/Dictionaries/CacheDictionary_generate3.cpp

diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index 1a0d7e28fea..c7517332594 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -17,7 +17,7 @@
 #include "CacheDictionary.inc.h"
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
-
+#include <Functions/FunctionHelpers.h>
 
 namespace ProfileEvents
 {
@@ -249,32 +249,145 @@ void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArra
         out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end();
 }
 
-void CacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
+ColumnPtr CacheDictionary::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr &,
+    const Columns & key_columns,
+    const DataTypes &,
+    const ColumnPtr default_untyped) const
 {
+    ColumnPtr result;
+
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    
     auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
 
-    const auto null_value = StringRef{std::get<String>(attribute.null_value)};
+    /// TODO: Check that attribute type is same as result type
+    /// TODO: Check if const will work as expected
 
-    getItemsString(attribute, ids, out, [&](const size_t) { return null_value; });
-}
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-void CacheDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
-{
-    auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+        auto identifiers_size = ids.size();
 
-    getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); });
-}
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto column_string = ColumnString::create();
 
-void CacheDictionary::getString(
-    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
-{
-    auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                {
+                    getItemsString(attribute, ids, column_string.get(), [&](const size_t row) { return default_col->getDataAt(row); });
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<String>();
 
-    getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; });
+                    getItemsString(attribute, ids, column_string.get(), [&](const size_t) { return StringRef{def}; });
+                }
+            }
+            else
+            {
+                const auto null_value = StringRef{std::get<String>(attribute.null_value)};
+
+                getItemsString(attribute, ids, column_string.get(), [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column_string);
+        }
+        else if constexpr (IsNumber<AttributeType>)
+        {
+            auto column = ColumnVector<AttributeType>::create(identifiers_size);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                {
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        out,
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        out,
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_value);
+
+                getItemsNumberImpl<AttributeType, AttributeType>(
+                    attribute,
+                    ids,
+                    out,
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column);
+        }
+        else if constexpr (IsDecimalNumber<AttributeType>)
+        {
+            // auto scale = getDecimalScale(*attribute.type);
+            auto column = ColumnDecimal<AttributeType>::create(identifiers_size, 0);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
+                {
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        out,
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        attribute,
+                        ids,
+                        out,
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_value);
+
+                getItemsNumberImpl<AttributeType, AttributeType>(
+                    attribute,
+                    ids,
+                    out,
+                    [&](const size_t) { return null_value; }
+                );
+            }
+
+            result = std::move(column);
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+   
+    return result;
 }
 
 template<class... Ts>
@@ -375,8 +488,14 @@ size_t CacheDictionary::findCellIdxForSet(const Key & id) const
     return oldest_id;
 }
 
-void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
+ColumnUInt8::Ptr CacheDictionary::has(const Columns & key_columns, const DataTypes &) const
 {
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+
+    auto result = ColumnUInt8::create(ext::size(ids));
+    auto& out = result->getData();
+
     /// There are three types of ids.
     /// - Valid ids. These ids are presented in local cache and their lifetime is not expired.
     /// - CacheExpired ids. Ids that are in local cache, but their values are rotted (lifetime is expired).
@@ -444,7 +563,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
     {
         /// Nothing to update - return;
         if (!cache_expired_count)
-            return;
+            return result;
 
         if (allow_read_expired_keys)
         {
@@ -458,7 +577,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
 
             tryPushToUpdateQueueOrThrow(update_unit_ptr);
             /// Update is async - no need to wait.
-            return;
+            return result;
         }
     }
 
@@ -483,6 +602,8 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
             for (const auto row : cache_expired_or_not_found_ids[key])
                 out[row] = true;
     }
+
+    return result;
 }
 
 
diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h
index d242c6bc3eb..5833f623517 100644
--- a/src/Dictionaries/CacheDictionary.h
+++ b/src/Dictionaries/CacheDictionary.h
@@ -119,77 +119,22 @@ public:
 
     std::exception_ptr getLastException() const override;
 
+    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::simple; }
+
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_untyped) const override;
+
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void
-    getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
-        const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
     void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
 
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
-
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
 private:
diff --git a/src/Dictionaries/CacheDictionary_generate1.cpp b/src/Dictionaries/CacheDictionary_generate1.cpp
deleted file mode 100644
index 2c6742b3a8c..00000000000
--- a/src/Dictionaries/CacheDictionary_generate1.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <Dictionaries/CacheDictionary.h>
-#include <Dictionaries/CacheDictionary.inc.h>
-
-namespace DB
-{
-#define DEFINE(TYPE) \
-    void CacheDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
-        const \
-    { \
-        auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-        const auto null_value = std::get<TYPE>(attribute.null_value); \
-        getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return null_value; }); \
-    }
-
-DEFINE(UInt8)
-DEFINE(UInt16)
-DEFINE(UInt32)
-DEFINE(UInt64)
-DEFINE(UInt128)
-DEFINE(Int8)
-DEFINE(Int16)
-DEFINE(Int32)
-DEFINE(Int64)
-DEFINE(Float32)
-DEFINE(Float64)
-DEFINE(Decimal32)
-DEFINE(Decimal64)
-DEFINE(Decimal128)
-
-#undef DEFINE
-}
diff --git a/src/Dictionaries/CacheDictionary_generate2.cpp b/src/Dictionaries/CacheDictionary_generate2.cpp
deleted file mode 100644
index be28a6302c2..00000000000
--- a/src/Dictionaries/CacheDictionary_generate2.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <Dictionaries/CacheDictionary.h>
-#include <Dictionaries/CacheDictionary.inc.h>
-
-namespace DB
-{
-#define DEFINE(TYPE) \
-    void CacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t row) { return def[row]; }); \
-    }
-
-DEFINE(UInt8)
-DEFINE(UInt16)
-DEFINE(UInt32)
-DEFINE(UInt64)
-DEFINE(UInt128)
-DEFINE(Int8)
-DEFINE(Int16)
-DEFINE(Int32)
-DEFINE(Int64)
-DEFINE(Float32)
-DEFINE(Float64)
-DEFINE(Decimal32)
-DEFINE(Decimal64)
-DEFINE(Decimal128)
-
-#undef DEFINE
-}
diff --git a/src/Dictionaries/CacheDictionary_generate3.cpp b/src/Dictionaries/CacheDictionary_generate3.cpp
deleted file mode 100644
index 36195f166db..00000000000
--- a/src/Dictionaries/CacheDictionary_generate3.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-#include <Dictionaries/CacheDictionary.h>
-#include <Dictionaries/CacheDictionary.inc.h>
-
-namespace DB
-{
-#define DEFINE(TYPE) \
-    void CacheDictionary::get##TYPE( \
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
-    { \
-        auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>(attribute, ids, out, [&](const size_t) { return def; }); \
-    }
-
-DEFINE(UInt8)
-DEFINE(UInt16)
-DEFINE(UInt32)
-DEFINE(UInt64)
-DEFINE(UInt128)
-DEFINE(Int8)
-DEFINE(Int16)
-DEFINE(Int32)
-DEFINE(Int64)
-DEFINE(Float32)
-DEFINE(Float64)
-DEFINE(Decimal32)
-DEFINE(Decimal64)
-DEFINE(Decimal128)
-
-#undef DEFINE
-}

From 81f4bc7b4c937703db0a6ea47ad7f22a29155c62 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sun, 27 Dec 2020 20:44:45 +0300
Subject: [PATCH 0221/1238] Updated SSDCacheDictionary to new interface

---
 src/Dictionaries/CacheDictionary.h      |   2 -
 src/Dictionaries/SSDCacheDictionary.cpp | 266 ++++++++++++++----------
 src/Dictionaries/SSDCacheDictionary.h   |  79 +------
 3 files changed, 163 insertions(+), 184 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h
index 5833f623517..70bea884ae4 100644
--- a/src/Dictionaries/CacheDictionary.h
+++ b/src/Dictionaries/CacheDictionary.h
@@ -133,8 +133,6 @@ public:
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
 
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
-
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
 private:
diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp
index 030f95da2ab..23507792f7e 100644
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@@ -23,6 +23,7 @@
 #include <city.h>
 #include <fcntl.h>
 
+#include <Functions/FunctionHelpers.h>
 
 namespace ProfileEvents
 {
@@ -1327,93 +1328,146 @@ SSDCacheDictionary::SSDCacheDictionary(
     createAttributes();
 }
 
-#define DECLARE(TYPE) \
-    void SSDCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto index = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-        const auto null_value = std::get<TYPE>(null_values[index]); /* NOLINT */ \
-        getItemsNumberImpl<TYPE, TYPE>(index, ids, out, [&](const size_t) { return null_value; }); /* NOLINT */ \
-    }
+ColumnPtr SSDCacheDictionary::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr &,
+    const Columns & key_columns,
+    const DataTypes &,
+    const ColumnPtr default_untyped) const
+{
+    ColumnPtr result;
 
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    
+    const auto index = getAttributeIndex(attribute_name);
 
-#define DECLARE(TYPE) \
-    void SSDCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto index = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>( \
-            index, \
-            ids, \
-            out, \
-            [&](const size_t row) { return def[row]; }); \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    /// TODO: Check that attribute type is same as result type
+    /// TODO: Check if const will work as expected
 
-#define DECLARE(TYPE) \
-    void SSDCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto index = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>( \
-            index, \
-            ids, \
-            out, \
-            [&](const size_t) { return def; }); \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        auto identifiers_size = ids.size();
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto column_string = ColumnString::create();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                {
+                    getItemsStringImpl(index, ids, column_string.get(), [&](const size_t row) { return default_col->getDataAt(row); });
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<String>();
+
+                    getItemsStringImpl(index, ids, column_string.get(), [&](const size_t) { return StringRef{def}; });
+                }
+            }
+            else
+            {
+                const auto null_value = StringRef{std::get<String>(null_values[index])};
+
+                getItemsStringImpl(index, ids, column_string.get(), [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column_string);
+        }
+        else if constexpr (IsNumber<AttributeType>)
+        {
+            auto column = ColumnVector<AttributeType>::create(identifiers_size);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                {
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        index,
+                        ids,
+                        out,
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        index,
+                        ids,
+                        out,
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(null_values[index]);;
+
+                getItemsNumberImpl<AttributeType, AttributeType>(
+                    index,
+                    ids,
+                    out,
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column);
+        }
+        else if constexpr (IsDecimalNumber<AttributeType>)
+        {
+            // auto scale = getDecimalScale(*attribute.type);
+            auto column = ColumnDecimal<AttributeType>::create(identifiers_size, 0);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
+                {
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        index,
+                        ids,
+                        out,
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        index,
+                        ids,
+                        out,
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(null_values[index]);;
+
+                getItemsNumberImpl<AttributeType, AttributeType>(
+                    index,
+                    ids,
+                    out,
+                    [&](const size_t) { return null_value; }
+                );
+            }
+
+            result = std::move(column);
+        }
+    };
+
+    callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
+   
+    return result;
+}
 
 template <typename AttributeType, typename OutputType, typename DefaultGetter>
 void SSDCacheDictionary::getItemsNumberImpl(
@@ -1445,34 +1499,6 @@ void SSDCacheDictionary::getItemsNumberImpl(
             getLifetime());
 }
 
-void SSDCacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
-{
-    const auto index = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
-
-    const auto null_value = StringRef{std::get<String>(null_values[index])};
-
-    getItemsStringImpl(index, ids, out, [&](const size_t) { return null_value; });
-}
-
-void SSDCacheDictionary::getString(
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
-{
-    const auto index = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
-
-    getItemsStringImpl(index, ids, out, [&](const size_t row) { return def->getDataAt(row); });
-}
-
-void SSDCacheDictionary::getString(
-        const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
-{
-    const auto index = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
-
-    getItemsStringImpl(index, ids, out, [&](const size_t) { return StringRef{def}; });
-}
-
 template <typename DefaultGetter>
 void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const PaddedPODArray<Key> & ids,
         ColumnString * out, DefaultGetter && get_default) const
@@ -1545,14 +1571,24 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
     }
 }
 
-void SSDCacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
+ColumnUInt8::Ptr SSDCacheDictionary::has(const Columns & key_columns, const DataTypes &) const
 {
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+
+    auto result = ColumnUInt8::create(ext::size(ids));
+    auto& out = result->getData();
+
+    const auto rows = ext::size(ids);
+    for (const auto row : ext::range(0, rows))
+        out[row] = false;
+
     const auto now = std::chrono::system_clock::now();
 
     std::unordered_map<Key, std::vector<size_t>> not_found_ids;
     storage.has(ids, out, not_found_ids, now);
     if (not_found_ids.empty())
-        return;
+        return result;
 
     std::vector<Key> required_ids(not_found_ids.size());
     std::transform(std::begin(not_found_ids), std::end(not_found_ids), std::begin(required_ids), [](const auto & pair) { return pair.first; });
@@ -1571,6 +1607,8 @@ void SSDCacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UIn
                     out[row] = false;
             },
             getLifetime());
+
+    return result;
 }
 
 BlockInputStreamPtr SSDCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h
index 8219a52bac9..158bf38cf42 100644
--- a/src/Dictionaries/SSDCacheDictionary.h
+++ b/src/Dictionaries/SSDCacheDictionary.h
@@ -351,77 +351,20 @@ public:
 
     std::exception_ptr getLastException() const override { return storage.getLastException(); }
 
+    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::simple; }
+
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_untyped) const override;
+
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+
     template <typename T>
     using ResultArrayType = SSDCacheStorage::ResultArrayType<T>;
 
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const PaddedPODArray<Key> & ids, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void
-    getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * def, ColumnString * out)
-    const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * out) const;
-
-    void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
-
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
 private:

From b4896ee82bd2b2b061179adbb790b3cc9978c992 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 28 Dec 2020 00:36:24 +0300
Subject: [PATCH 0222/1238] Updated ComplexKeyDirectDictionary to new interface

---
 .../ComplexKeyDirectDictionary.cpp            | 582 +++++++-----------
 src/Dictionaries/ComplexKeyDirectDictionary.h |  90 +--
 src/Dictionaries/DirectDictionary.cpp         |  47 --
 src/Dictionaries/DirectDictionary.h           |   9 -
 4 files changed, 239 insertions(+), 489 deletions(-)

diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.cpp b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
index c4b8678672c..0668c5da07a 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
@@ -3,6 +3,7 @@
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
 #include <Core/Defines.h>
+#include <Functions/FunctionHelpers.h>
 
 namespace DB
 {
@@ -31,194 +32,235 @@ ComplexKeyDirectDictionary::ComplexKeyDirectDictionary(
     createAttributes();
 }
 
-#define DECLARE(TYPE) \
-    void ComplexKeyDirectDictionary::get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void ComplexKeyDirectDictionary::getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
+ColumnPtr ComplexKeyDirectDictionary::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr &,
+    const Columns & key_columns,
+    const DataTypes & key_types,
+    const ColumnPtr default_untyped) const
 {
     dict_struct.validateKeyTypes(key_types);
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
 
-    const auto & null_value = std::get<StringRef>(attribute.null_values);
-    getItemsStringImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
-        [&](const size_t) { return String(null_value.data, null_value.size); });
-}
-
-#define DECLARE(TYPE) \
-    void ComplexKeyDirectDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void ComplexKeyDirectDictionary::getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const
-{
-    dict_struct.validateKeyTypes(key_types);
+    ColumnPtr result;
 
     const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
 
-    getItemsStringImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
-        [&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
-}
+    /// TODO: Check that attribute type is same as result type
+    /// TODO: Check if const will work as expected
+    
+    auto size = key_columns.front()->size();
 
-#define DECLARE(TYPE) \
-    void ComplexKeyDirectDictionary::get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void ComplexKeyDirectDictionary::getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const
-{
-    dict_struct.validateKeyTypes(key_types);
-
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    ComplexKeyDirectDictionary::getItemsStringImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
-        [&](const size_t) { return def; });
-}
-
-
-void ComplexKeyDirectDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
-{
-    dict_struct.validateKeyTypes(key_types);
-    const auto & attribute = attributes.front();
-
-    switch (attribute.type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            has<UInt8>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            has<UInt16>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            has<UInt32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            has<UInt64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            has<UInt128>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            has<Int8>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            has<Int16>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            has<Int32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            has<Int64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            has<Float32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            has<Float64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utString:
-            has<String>(attribute, key_columns, out);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            has<Decimal32>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            has<Decimal64>(attribute, key_columns, out);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            has<Decimal128>(attribute, key_columns, out);
-            break;
-    }
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto column_string = ColumnString::create();
+            auto out = column_string.get();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                {
+                    getItemsImpl<String, String>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t, const String value)
+                        {
+                            const auto ref = StringRef{value};
+                            out->insertData(ref.data, ref.size);
+                        },
+                        [&](const size_t row)
+                        {
+                            const auto ref = default_col->getDataAt(row);
+                            return String(ref.data, ref.size);
+                        });
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<String>();
+
+                    getItemsImpl<String, String>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t, const String value)
+                        {
+                            const auto ref = StringRef{value};
+                            out->insertData(ref.data, ref.size);
+                        },
+                        [&](const size_t) { return def; });
+                }
+            }
+            else
+            {
+                const auto & null_value = std::get<StringRef>(attribute.null_values);
+
+                getItemsImpl<String, String>(
+                    attribute,
+                    key_columns,
+                    [&](const size_t, const String value)
+                    {
+                        const auto ref = StringRef{value};
+                        out->insertData(ref.data, ref.size);
+                    },
+                    [&](const size_t) { return String(null_value.data, null_value.size); });
+            }
+
+            result = std::move(column_string);
+        }
+        else if constexpr (IsNumber<AttributeType>)
+        {
+            auto column = ColumnVector<AttributeType>::create(size);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    key_columns,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column);
+        }
+        else if constexpr (IsDecimalNumber<AttributeType>)
+        {
+            // auto scale = getDecimalScale(*attribute.type);
+            auto column = ColumnDecimal<AttributeType>::create(size, 0);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    key_columns,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; }
+                );
+            }
+
+            result = std::move(column);
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+   
+    return result;
 }
 
+ColumnUInt8::Ptr ComplexKeyDirectDictionary::has(const Columns & key_columns, const DataTypes & key_types) const
+{
+    dict_struct.validateKeyTypes(key_types);
+
+    auto size = key_columns.front()->size();
+    auto result = ColumnUInt8::create(size);
+    auto& out = result->getData();
+
+    const auto rows = key_columns.front()->size();
+    const auto keys_size = dict_struct.key->size();
+    StringRefs keys_array(keys_size);
+    MapType<UInt8> has_key;
+    Arena temporary_keys_pool;
+    std::vector<size_t> to_load(rows);
+    PODArray<StringRef> keys(rows);
+
+    for (const auto row : ext::range(0, rows))
+    {
+        const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
+        keys[row] = key;
+        has_key[key] = 0;
+        to_load[row] = row;
+    }
+
+    auto stream = source_ptr->loadKeys(key_columns, to_load);
+
+    stream->readPrefix();
+
+    while (const auto block = stream->read())
+    {
+        const auto columns = ext::map<Columns>(
+            ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
+
+        Arena pool;
+
+        StringRefs keys_temp(keys_size);
+
+        const auto columns_size = columns.front()->size();
+
+        for (const auto row_idx : ext::range(0, columns_size))
+        {
+            const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
+            if (has_key.has(key))
+            {
+                has_key[key] = 1;
+            }
+        }
+    }
+
+    stream->readSuffix();
+
+    for (const auto row : ext::range(0, rows))
+    {
+        out[row] = has_key[keys[row]];
+    }
+
+    query_count.fetch_add(rows, std::memory_order_relaxed);
+
+    return result;
+}
 
 void ComplexKeyDirectDictionary::createAttributes()
 {
@@ -237,7 +279,6 @@ void ComplexKeyDirectDictionary::createAttributes()
     }
 }
 
-
 template <typename T>
 void ComplexKeyDirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
 {
@@ -258,55 +299,14 @@ ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttribut
 {
     Attribute attr{type, {}, {}, attr_name};
 
-    switch (type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utString:
-            createAttributeImpl<String>(attr, null_value);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+        createAttributeImpl<AttributeType>(attr, null_value);
+    };
 
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
-    }
+    callOnDictionaryAttributeType(type, type_call);
 
     return attr;
 }
@@ -392,6 +392,11 @@ void ComplexKeyDirectDictionary::getItemsImpl(
             });
         for (const size_t attribute_idx : ext::range(0, attributes.size()))
         {
+            if (attribute.name != attribute_name_by_index.at(attribute_idx))
+            {
+                continue;
+            }
+
             const IColumn & attribute_column = *attribute_columns[attribute_idx];
             Arena pool;
 
@@ -402,7 +407,7 @@ void ComplexKeyDirectDictionary::getItemsImpl(
             for (const auto row_idx : ext::range(0, columns_size))
             {
                 const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
-                if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
+                if (value_by_key.has(key))
                 {
                     if (attribute.type == AttributeUnderlyingType::utFloat32)
                     {
@@ -428,72 +433,6 @@ void ComplexKeyDirectDictionary::getItemsImpl(
     query_count.fetch_add(rows, std::memory_order_relaxed);
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-void ComplexKeyDirectDictionary::getItemsStringImpl(
-    const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
-{
-    const auto rows = key_columns.front()->size();
-    const auto keys_size = dict_struct.key->size();
-    StringRefs keys_array(keys_size);
-    MapType<String> value_by_key;
-    Arena temporary_keys_pool;
-    std::vector<size_t> to_load(rows);
-    PODArray<StringRef> keys(rows);
-
-    for (const auto row : ext::range(0, rows))
-    {
-        const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
-        keys[row] = key;
-        value_by_key[key] = get_default(row);
-        to_load[row] = row;
-    }
-
-    auto stream = source_ptr->loadKeys(key_columns, to_load);
-    const auto attributes_size = attributes.size();
-
-    stream->readPrefix();
-
-    while (const auto block = stream->read())
-    {
-        const auto columns = ext::map<Columns>(
-            ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
-
-        const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
-            {
-                return block.safeGetByPosition(keys_size + attribute_idx).column;
-            });
-        for (const size_t attribute_idx : ext::range(0, attributes.size()))
-        {
-            const IColumn & attribute_column = *attribute_columns[attribute_idx];
-            Arena pool;
-
-            StringRefs keys_temp(keys_size);
-
-            const auto columns_size = columns.front()->size();
-
-            for (const auto row_idx : ext::range(0, columns_size))
-            {
-                const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
-                if (value_by_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
-                {
-                    const String from_source = attribute_column[row_idx].template get<String>();
-                    value_by_key[key] = from_source;
-                }
-            }
-        }
-    }
-
-    stream->readSuffix();
-
-    for (const auto row : ext::range(0, rows))
-    {
-        set_value(row, value_by_key[keys[row]]);
-    }
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
-}
-
-
 const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAttribute(const std::string & attribute_name) const
 {
     const auto it = attribute_index_by_name.find(attribute_name);
@@ -503,65 +442,6 @@ const ComplexKeyDirectDictionary::Attribute & ComplexKeyDirectDictionary::getAtt
     return attributes[it->second];
 }
 
-
-template <typename T>
-void ComplexKeyDirectDictionary::has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const
-{
-    const auto rows = key_columns.front()->size();
-    const auto keys_size = dict_struct.key->size();
-    StringRefs keys_array(keys_size);
-    MapType<UInt8> has_key;
-    Arena temporary_keys_pool;
-    std::vector<size_t> to_load(rows);
-    PODArray<StringRef> keys(rows);
-
-    for (const auto row : ext::range(0, rows))
-    {
-        const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
-        keys[row] = key;
-        has_key[key] = 0;
-        to_load[row] = row;
-    }
-
-    auto stream = source_ptr->loadKeys(key_columns, to_load);
-
-    stream->readPrefix();
-
-    while (const auto block = stream->read())
-    {
-        const auto columns = ext::map<Columns>(
-            ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
-
-        for (const size_t attribute_idx : ext::range(0, attributes.size()))
-        {
-            Arena pool;
-
-            StringRefs keys_temp(keys_size);
-
-            const auto columns_size = columns.front()->size();
-
-            for (const auto row_idx : ext::range(0, columns_size))
-            {
-                const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
-                if (has_key.has(key) && attribute.name == attribute_name_by_index.at(attribute_idx))
-                {
-                    has_key[key] = 1;
-                }
-            }
-        }
-    }
-
-    stream->readSuffix();
-
-    for (const auto row : ext::range(0, rows))
-    {
-        out[row] = has_key[keys[row]];
-    }
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
-}
-
-
 BlockInputStreamPtr ComplexKeyDirectDictionary::getBlockInputStream(const Names & /* column_names */, size_t /* max_block_size */) const
 {
     return source_ptr->loadAll();
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.h b/src/Dictionaries/ComplexKeyDirectDictionary.h
index dc602be103f..6c1f73f12f7 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.h
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.h
@@ -60,78 +60,16 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::complex; }
 
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_untyped) const override;
 
-    void getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const TYPE def, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, ColumnString * const out) const;
-
-    void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -168,8 +106,6 @@ private:
     template <typename T>
     void addAttributeSize(const Attribute & attribute);
 
-    void calculateBytesAllocated();
-
     template <typename T>
     void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
@@ -179,17 +115,10 @@ private:
     StringRef placeKeysInPool(
         const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector<DictionaryAttribute> & key_attributes, Pool & pool) const;
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-    void getItemsStringImpl(
-        const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
-
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
     void getItemsImpl(
         const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
 
-    template <typename T>
-    void resize(Attribute & attribute, const Key id);
-
     template <typename T>
     void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
 
@@ -197,9 +126,6 @@ private:
 
     const Attribute & getAttribute(const std::string & attribute_name) const;
 
-    template <typename T>
-    void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
-
     const DictionaryStructure dict_struct;
     const DictionarySourcePtr source_ptr;
     const DictionaryLifetime dict_lifetime;
diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index e101055aa5c..d024a7d7059 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -455,53 +455,6 @@ void DirectDictionary::getItemsImpl(
     query_count.fetch_add(rows, std::memory_order_relaxed);
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-void DirectDictionary::getItemsStringImpl(
-    const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
-{
-    const auto rows = ext::size(ids);
-
-    HashMap<Key, String> value_by_key;
-    for (const auto row : ext::range(0, rows))
-        value_by_key[ids[row]] = get_default(row);
-
-    std::vector<Key> to_load;
-    to_load.reserve(value_by_key.size());
-    for (auto it = value_by_key.begin(); it != value_by_key.end(); ++it)
-        to_load.emplace_back(static_cast<Key>(it->getKey()));
-
-    auto stream = source_ptr->loadIds(to_load);
-    stream->readPrefix();
-
-    while (const auto block = stream->read())
-    {
-        const IColumn & id_column = *block.safeGetByPosition(0).column;
-
-        for (const size_t attribute_idx : ext::range(0, attributes.size()))
-        {
-
-            const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
-
-            for (const auto row_idx : ext::range(0, id_column.size()))
-            {
-                const auto key = id_column[row_idx].get<UInt64>();
-                if (value_by_key.find(key) != value_by_key.end() && attribute.name == attribute_name_by_index.at(attribute_idx))
-                {
-                    const String from_source = attribute_column[row_idx].get<String>();
-                    value_by_key[key] = from_source;
-                }
-            }
-        }
-    }
-    stream->readSuffix();
-
-    for (const auto row : ext::range(0, rows))
-        set_value(row, value_by_key[ids[row]]);
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
-}
-
-
 const DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const
 {
     const auto it = attribute_index_by_name.find(attribute_name);
diff --git a/src/Dictionaries/DirectDictionary.h b/src/Dictionaries/DirectDictionary.h
index 1ee67190f04..48250315657 100644
--- a/src/Dictionaries/DirectDictionary.h
+++ b/src/Dictionaries/DirectDictionary.h
@@ -108,24 +108,15 @@ private:
     template <typename T>
     void addAttributeSize(const Attribute & attribute);
 
-    void calculateBytesAllocated();
-
     template <typename T>
     void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
     Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-    void getItemsStringImpl(
-        const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
-
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
     void getItemsImpl(
         const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
 
-    template <typename T>
-    void resize(Attribute & attribute, const Key id);
-
     template <typename T>
     void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
 

From 0d2e498ed4d84d116ec0095f5455833a2080f37c Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 28 Dec 2020 23:49:40 +0300
Subject: [PATCH 0223/1238] Updated ComplexKeyCacheDictionary to new interface

---
 .../ComplexKeyCacheDictionary.cpp             | 276 +++++++++++++++---
 src/Dictionaries/ComplexKeyCacheDictionary.h  |  89 +-----
 ...acheDictionary_createAttributeWithType.cpp |  45 ---
 .../ComplexKeyCacheDictionary_generate1.cpp   |  32 --
 .../ComplexKeyCacheDictionary_generate2.cpp   |  35 ---
 .../ComplexKeyCacheDictionary_generate3.cpp   |  35 ---
 ...exKeyCacheDictionary_setAttributeValue.cpp |  78 -----
 ...cheDictionary_setDefaultAttributeValue.cpp |  71 -----
 8 files changed, 245 insertions(+), 416 deletions(-)
 delete mode 100644 src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp
 delete mode 100644 src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp
 delete mode 100644 src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp
 delete mode 100644 src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp
 delete mode 100644 src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp
 delete mode 100644 src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp

diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index fefa2a44aaf..504db30b8d1 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -10,6 +10,7 @@
 #include <ext/range.h>
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
+#include <Functions/FunctionHelpers.h>
 
 
 namespace ProfileEvents
@@ -70,48 +71,145 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(
     createAttributes();
 }
 
-
-void ComplexKeyCacheDictionary::getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
-{
-    dict_struct.validateKeyTypes(key_types);
-
-    auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    const auto null_value = StringRef{std::get<String>(attribute.null_values)};
-
-    getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; });
-}
-
-void ComplexKeyCacheDictionary::getString(
+ColumnPtr ComplexKeyCacheDictionary::getColumn(
     const std::string & attribute_name,
+    const DataTypePtr &,
     const Columns & key_columns,
     const DataTypes & key_types,
-    const ColumnString * const def,
-    ColumnString * const out) const
+    const ColumnPtr default_untyped) const
 {
     dict_struct.validateKeyTypes(key_types);
 
-    auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); });
-}
-
-void ComplexKeyCacheDictionary::getString(
-    const std::string & attribute_name,
-    const Columns & key_columns,
-    const DataTypes & key_types,
-    const String & def,
-    ColumnString * const out) const
-{
-    dict_struct.validateKeyTypes(key_types);
+    ColumnPtr result;
 
     auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
 
-    getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; });
+    /// TODO: Check that attribute type is same as result type
+    /// TODO: Check if const will work as expected
+    
+    auto keys_size = key_columns.front()->size();
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto column_string = ColumnString::create();
+            auto out = column_string.get();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                {
+                    getItemsString(attribute, key_columns, out, [&](const size_t row) { return default_col->getDataAt(row); });
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<String>();
+
+                    getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; });
+                }
+            }
+            else
+            {
+                    const auto null_value = StringRef{std::get<String>(attribute.null_values)};
+
+                    getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column_string);
+        }
+        else if constexpr (IsNumber<AttributeType>)
+        {
+            auto column = ColumnVector<AttributeType>::create(keys_size);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                {
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        out,
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        out,
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsNumberImpl<AttributeType, AttributeType>(
+                    attribute,
+                    key_columns,
+                    out,
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column);
+        }
+        else if constexpr (IsDecimalNumber<AttributeType>)
+        {
+            // auto scale = getDecimalScale(*attribute.type);
+            auto column = ColumnDecimal<AttributeType>::create(keys_size, 0);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
+                {
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        out,
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        out,
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsNumberImpl<AttributeType, AttributeType>(
+                    attribute,
+                    key_columns,
+                    out,
+                    [&](const size_t) { return null_value; }
+                );
+            }
+
+            result = std::move(column);
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+   
+    return result;
 }
 
 /// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag,
@@ -158,15 +256,21 @@ ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata
     return {oldest_id, false, false};
 }
 
-void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
+ColumnUInt8::Ptr ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes & key_types) const
 {
     dict_struct.validateKeyTypes(key_types);
 
+    const auto rows_num = key_columns.front()->size();
+
+    auto result = ColumnUInt8::create(rows_num);
+    auto& out = result->getData();
+
+    for (const auto row : ext::range(0, rows_num))
+        out[row] = false;
+
     /// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
     MapType<std::vector<size_t>> outdated_keys;
 
-
-    const auto rows_num = key_columns.front()->size();
     const auto keys_size = dict_struct.key->size();
     StringRefs keys(keys_size);
     Arena temporary_keys_pool;
@@ -212,7 +316,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
     hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
 
     if (outdated_keys.empty())
-        return;
+        return result;
 
     std::vector<size_t> required_rows(outdated_keys.size());
     std::transform(
@@ -233,6 +337,8 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
             for (const auto out_idx : outdated_keys[key])
                 out[out_idx] = false;
         });
+
+    return result;
 }
 
 void ComplexKeyCacheDictionary::createAttributes()
@@ -263,6 +369,102 @@ ComplexKeyCacheDictionary::Attribute & ComplexKeyCacheDictionary::getAttribute(c
     return attributes[it->second];
 }
 
+void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
+{
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            const auto & null_value_ref = std::get<String>(attribute.null_values);
+            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
+
+            if (string_ref.data != null_value_ref.data())
+            {
+                if (string_ref.data)
+                    string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
+
+                string_ref = StringRef{null_value_ref};
+            }
+        }
+        else
+        {
+            std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = std::get<AttributeType>(attribute.null_values); 
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+}
+
+ComplexKeyCacheDictionary::Attribute
+ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
+{
+    Attribute attr{type, {}, {}};
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            attr.null_values = null_value.get<String>();
+            attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
+            bytes_allocated += size * sizeof(StringRef);
+            if (!string_arena)
+                string_arena = std::make_unique<ArenaWithFreeLists>();
+        }
+        else
+        {
+            attr.null_values = AttributeType(null_value.get<NearestFieldType<AttributeType>>()); /* NOLINT */
+            attr.arrays = std::make_unique<ContainerType<AttributeType>>(size); /* NOLINT */
+            bytes_allocated += size * sizeof(AttributeType);
+        }
+    };
+
+    callOnDictionaryAttributeType(type, type_call);
+
+    return attr;
+}
+
+void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
+{
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            const auto & string = value.get<String>();
+            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
+            const auto & null_value_ref = std::get<String>(attribute.null_values);
+
+            /// free memory unless it points to a null_value
+            if (string_ref.data && string_ref.data != null_value_ref.data())
+                string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
+
+            const auto str_size = string.size();
+            if (str_size != 0)
+            {
+                auto * str_ptr = string_arena->alloc(str_size);
+                std::copy(string.data(), string.data() + str_size, str_ptr);
+                string_ref = StringRef{str_ptr, str_size};
+            }
+            else
+                string_ref = {};
+        }
+        else
+        {
+            std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = value.get<NearestFieldType<AttributeType>>();
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+}
+
 StringRef ComplexKeyCacheDictionary::allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) const
 {
     if (key_size_is_fixed)
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.h b/src/Dictionaries/ComplexKeyCacheDictionary.h
index 2663fee266d..2711bd6c8cc 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.h
@@ -89,93 +89,16 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::complex; }
 
-/// In all functions below, key_columns must be full (non-constant) columns.
-/// See the requirement in IDataType.h for text-serialization functions.
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-        const std::string & attribute_name,
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnString * const def,
-        ColumnString * const out) const;
+        const ColumnPtr default_untyped) const override;
 
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-        const std::string & attribute_name,
-        const Columns & key_columns,
-        const DataTypes & key_types,
-        const String & def,
-        ColumnString * const out) const;
-
-    void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override; 
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp b/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp
deleted file mode 100644
index ba9f8d014fd..00000000000
--- a/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include "ComplexKeyCacheDictionary.h"
-
-namespace DB
-{
-ComplexKeyCacheDictionary::Attribute
-ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
-{
-    Attribute attr{type, {}, {}};
-
-    switch (type)
-    {
-#define DISPATCH(TYPE) \
-    case AttributeUnderlyingType::ut##TYPE: \
-        attr.null_values = TYPE(null_value.get<NearestFieldType<TYPE>>()); /* NOLINT */ \
-        attr.arrays = std::make_unique<ContainerType<TYPE>>(size); /* NOLINT */ \
-        bytes_allocated += size * sizeof(TYPE); \
-        break;
-        DISPATCH(UInt8)
-        DISPATCH(UInt16)
-        DISPATCH(UInt32)
-        DISPATCH(UInt64)
-        DISPATCH(UInt128)
-        DISPATCH(Int8)
-        DISPATCH(Int16)
-        DISPATCH(Int32)
-        DISPATCH(Int64)
-        DISPATCH(Decimal32)
-        DISPATCH(Decimal64)
-        DISPATCH(Decimal128)
-        DISPATCH(Float32)
-        DISPATCH(Float64)
-#undef DISPATCH
-        case AttributeUnderlyingType::utString:
-            attr.null_values = null_value.get<String>();
-            attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
-            bytes_allocated += size * sizeof(StringRef);
-            if (!string_arena)
-                string_arena = std::make_unique<ArenaWithFreeLists>();
-            break;
-    }
-
-    return attr;
-}
-
-}
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp b/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp
deleted file mode 100644
index 01d39722d33..00000000000
--- a/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <Dictionaries/ComplexKeyCacheDictionary.h>
-
-namespace DB
-{
-#define DEFINE(TYPE) \
-    void ComplexKeyCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-        auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-        getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return null_value; }); \
-    }
-
-DEFINE(UInt8)
-DEFINE(UInt16)
-DEFINE(UInt32)
-DEFINE(UInt64)
-DEFINE(UInt128)
-DEFINE(Int8)
-DEFINE(Int16)
-DEFINE(Int32)
-DEFINE(Int64)
-DEFINE(Float32)
-DEFINE(Float64)
-DEFINE(Decimal32)
-DEFINE(Decimal64)
-DEFINE(Decimal128)
-
-#undef DEFINE
-}
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp b/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp
deleted file mode 100644
index deb34706f54..00000000000
--- a/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <Dictionaries/ComplexKeyCacheDictionary.h>
-
-namespace DB
-{
-#define DEFINE(TYPE) \
-    void ComplexKeyCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-        auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t row) { return def[row]; }); \
-    }
-
-DEFINE(UInt8)
-DEFINE(UInt16)
-DEFINE(UInt32)
-DEFINE(UInt64)
-DEFINE(UInt128)
-DEFINE(Int8)
-DEFINE(Int16)
-DEFINE(Int32)
-DEFINE(Int64)
-DEFINE(Float32)
-DEFINE(Float64)
-DEFINE(Decimal32)
-DEFINE(Decimal64)
-DEFINE(Decimal128)
-
-#undef DEFINE
-}
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp b/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp
deleted file mode 100644
index 2a84fdc89f6..00000000000
--- a/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <Dictionaries/ComplexKeyCacheDictionary.h>
-
-namespace DB
-{
-#define DEFINE(TYPE) \
-    void ComplexKeyCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        dict_struct.validateKeyTypes(key_types); \
-        auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>(attribute, key_columns, out, [&](const size_t) { return def; }); \
-    }
-
-DEFINE(UInt8)
-DEFINE(UInt16)
-DEFINE(UInt32)
-DEFINE(UInt64)
-DEFINE(UInt128)
-DEFINE(Int8)
-DEFINE(Int16)
-DEFINE(Int32)
-DEFINE(Int64)
-DEFINE(Float32)
-DEFINE(Float64)
-DEFINE(Decimal32)
-DEFINE(Decimal64)
-DEFINE(Decimal128)
-
-#undef DEFINE
-}
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp b/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp
deleted file mode 100644
index 2df8f95bc0c..00000000000
--- a/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-#include "ComplexKeyCacheDictionary.h"
-
-namespace DB
-{
-void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
-{
-    switch (attribute.type)
-    {
-        case AttributeUnderlyingType::utUInt8:
-            std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
-            break;
-        case AttributeUnderlyingType::utInt8:
-            std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
-            break;
-        case AttributeUnderlyingType::utInt16:
-            std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
-            break;
-        case AttributeUnderlyingType::utInt32:
-            std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
-            break;
-        case AttributeUnderlyingType::utInt64:
-            std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
-            break;
-
-        case AttributeUnderlyingType::utDecimal32:
-            std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
-            break;
-
-        case AttributeUnderlyingType::utString:
-        {
-            const auto & string = value.get<String>();
-            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
-            const auto & null_value_ref = std::get<String>(attribute.null_values);
-
-            /// free memory unless it points to a null_value
-            if (string_ref.data && string_ref.data != null_value_ref.data())
-                string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
-
-            const auto str_size = string.size();
-            if (str_size != 0)
-            {
-                auto * str_ptr = string_arena->alloc(str_size);
-                std::copy(string.data(), string.data() + str_size, str_ptr);
-                string_ref = StringRef{str_ptr, str_size};
-            }
-            else
-                string_ref = {};
-
-            break;
-        }
-    }
-}
-
-}
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp b/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp
deleted file mode 100644
index aa03cc88038..00000000000
--- a/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-#include "ComplexKeyCacheDictionary.h"
-
-namespace DB
-{
-void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
-{
-    switch (attribute.type)
-    {
-        case AttributeUnderlyingType::utUInt8:
-            std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
-            break;
-
-        case AttributeUnderlyingType::utDecimal32:
-            std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = std::get<Decimal64>(attribute.null_values);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = std::get<Decimal128>(attribute.null_values);
-            break;
-
-        case AttributeUnderlyingType::utString:
-        {
-            const auto & null_value_ref = std::get<String>(attribute.null_values);
-            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
-
-            if (string_ref.data != null_value_ref.data())
-            {
-                if (string_ref.data)
-                    string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);
-
-                string_ref = StringRef{null_value_ref};
-            }
-
-            break;
-        }
-    }
-}
-
-}

From 07fab85cc464309df50674e0bedf9f9c71c6c2ec Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 29 Dec 2020 00:17:30 +0300
Subject: [PATCH 0224/1238] Updated SSDComplexKeyCacheDictionary to new
 interface

---
 .../SSDComplexKeyCacheDictionary.cpp          | 280 ++++++++++--------
 .../SSDComplexKeyCacheDictionary.h            |  90 +-----
 2 files changed, 169 insertions(+), 201 deletions(-)

diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
index b23529eac7d..452065c20ad 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
@@ -24,6 +24,7 @@
 #include <city.h>
 #include <fcntl.h>
 
+#include <Functions/FunctionHelpers.h>
 
 namespace ProfileEvents
 {
@@ -1376,90 +1377,152 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary(
     createAttributes();
 }
 
-#define DECLARE(TYPE) \
-    void SSDComplexKeyCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto index = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-        const auto null_value = std::get<TYPE>(null_values[index]); /* NOLINT */ \
-        getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t) { return null_value; }); /* NOLINT */ \
-    }
+ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr &,
+    const Columns & key_columns,
+    const DataTypes & key_types,
+    const ColumnPtr default_untyped) const
+{
+    dict_struct.validateKeyTypes(key_types);
 
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    const auto index = getAttributeIndex(attribute_name);
 
-#define DECLARE(TYPE) \
-    void SSDComplexKeyCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto index = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t row) { return def[row]; }); /* NOLINT */ \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    ColumnPtr result;
 
-#define DECLARE(TYPE) \
-    void SSDComplexKeyCacheDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto index = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-        getItemsNumberImpl<TYPE, TYPE>(index, key_columns, key_types, out, [&](const size_t) { return def; }); /* NOLINT */ \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
+    /// TODO: Check that attribute type is same as result type
+    /// TODO: Check if const will work as expected
+    
+    auto keys_size = key_columns.front()->size();
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto column_string = ColumnString::create();
+            auto out = column_string.get();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                {
+                    getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t row) { return default_col->getDataAt(row); });
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<String>();
+
+                    getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return StringRef{def}; });
+                }
+            }
+            else
+            {
+                const auto null_value = StringRef{std::get<String>(null_values[index])};
+
+                getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column_string);
+        }
+        else if constexpr (IsNumber<AttributeType>)
+        {
+            auto column = ColumnVector<AttributeType>::create(keys_size);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                {
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        index,
+                        key_columns,
+                        key_types,
+                        out,
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        index,
+                        key_columns,
+                        key_types,
+                        out,
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(null_values[index]); /* NOLINT */
+
+                getItemsNumberImpl<AttributeType, AttributeType>(
+                    index,
+                    key_columns,
+                    key_types,
+                    out,
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column);
+        }
+        else if constexpr (IsDecimalNumber<AttributeType>)
+        {
+            // auto scale = getDecimalScale(*attribute.type);
+            auto column = ColumnDecimal<AttributeType>::create(keys_size, 0);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
+                {
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        index,
+                        key_columns,
+                        key_types,
+                        out,
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsNumberImpl<AttributeType, AttributeType>(
+                        index,
+                        key_columns,
+                        key_types,
+                        out,
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(null_values[index]); /* NOLINT */
+
+                getItemsNumberImpl<AttributeType, AttributeType>(
+                    index,
+                    key_columns,
+                    key_types,
+                    out,
+                    [&](const size_t) { return null_value; }
+                );
+            }
+
+            result = std::move(column);
+        }
+    };
+
+    callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
+   
+    return result;
+}
 
 template <typename AttributeType, typename OutputType, typename DefaultGetter>
 void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
@@ -1508,42 +1571,6 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
             getLifetime());
 }
 
-void SSDComplexKeyCacheDictionary::getString(
-    const std::string & attribute_name,
-    const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
-{
-    const auto index = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
-
-    const auto null_value = StringRef{std::get<String>(null_values[index])};
-
-    getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return null_value; });
-}
-
-void SSDComplexKeyCacheDictionary::getString(
-        const std::string & attribute_name,
-        const Columns & key_columns, const DataTypes & key_types,
-        const ColumnString * const def, ColumnString * const out) const
-{
-    const auto index = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
-
-    getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t row) { return def->getDataAt(row); });
-}
-
-void SSDComplexKeyCacheDictionary::getString(
-        const std::string & attribute_name,
-        const Columns & key_columns,
-        const DataTypes & key_types,
-        const String & def,
-        ColumnString * const out) const
-{
-    const auto index = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString);
-
-    getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return StringRef{def}; });
-}
-
 template <typename DefaultGetter>
 void SSDComplexKeyCacheDictionary::getItemsStringImpl(
     const size_t attribute_index,
@@ -1639,20 +1666,27 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
     }
 }
 
-void SSDComplexKeyCacheDictionary::has(
-    const Columns & key_columns,
-    const DataTypes & key_types,
-    PaddedPODArray<UInt8> & out) const
+ColumnUInt8::Ptr SSDComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes & key_types) const
 {
     dict_struct.validateKeyTypes(key_types);
 
+    PaddedPODArray<Key> backup_storage;
+    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+
+    auto result = ColumnUInt8::create(ext::size(ids));
+    auto& out = result->getData();
+
+    const auto rows = ext::size(ids);
+    for (const auto row : ext::range(0, rows))
+        out[row] = false;
+
     const auto now = std::chrono::system_clock::now();
 
     std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
     TemporalComplexKeysPool not_found_pool;
     storage.has(key_columns, key_types, out, not_found_keys, not_found_pool, now);
     if (not_found_keys.empty())
-        return;
+        return result;
 
     std::vector<KeyRef> required_keys(not_found_keys.size());
     std::transform(std::begin(not_found_keys), std::end(not_found_keys), std::begin(required_keys), [](const auto & pair) { return pair.first; });
@@ -1681,6 +1715,8 @@ void SSDComplexKeyCacheDictionary::has(
                     out[row] = false;
             },
             getLifetime());
+
+    return result;
 }
 
 BlockInputStreamPtr SSDComplexKeyCacheDictionary::getBlockInputStream(
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
index 4758d62f1df..47c8f54a77d 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
@@ -569,88 +569,20 @@ public:
 
     std::exception_ptr getLastException() const override { return storage.getLastException(); }
 
+    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::complex; }
+
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_untyped) const override;
+
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override; 
+
     template <typename T>
     using ResultArrayType = SSDComplexKeyCacheStorage::ResultArrayType<T>;
 
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns,
-        const DataTypes & key_types, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns,
-        const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns,
-        const DataTypes & key_types, const String & def, ColumnString * const out) const;
-
-    void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
-
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
 private:

From 6f7ad821602104e0849f1ae020ba0c1c5a2e2b2b Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 29 Dec 2020 13:09:01 +0300
Subject: [PATCH 0225/1238] Updated IPAddressDictionary to new interface

---
 src/Dictionaries/IPAddressDictionary.cpp | 473 +++++++++--------------
 src/Dictionaries/IPAddressDictionary.h   |  87 +----
 2 files changed, 196 insertions(+), 364 deletions(-)

diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index f8af4686ad8..0f2c2c8a706 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -16,6 +16,7 @@
 #include <ext/range.h>
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
+#include <Functions/FunctionHelpers.h>
 
 namespace DB
 {
@@ -266,167 +267,170 @@ IPAddressDictionary::IPAddressDictionary(
     calculateBytesAllocated();
 }
 
-#define DECLARE(TYPE) \
-    void IPAddressDictionary::get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
-    { \
-        validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(attribute.null_values); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t) { return null_value; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void IPAddressDictionary::getString(
-    const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
-{
-    validateKeyTypes(key_types);
-
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return null_value; });
-}
-
-#define DECLARE(TYPE) \
-    void IPAddressDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t row) { return def[row]; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void IPAddressDictionary::getString(
+ColumnPtr IPAddressDictionary::getColumn(
     const std::string & attribute_name,
+    const DataTypePtr &,
     const Columns & key_columns,
     const DataTypes & key_types,
-    const ColumnString * const def,
-    ColumnString * const out) const
+    const ColumnPtr default_untyped) const
 {
     validateKeyTypes(key_types);
 
-    const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
-
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t row) { return def->getDataAt(row); });
-}
-
-#define DECLARE(TYPE) \
-    void IPAddressDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        validateKeyTypes(key_types); \
-\
-        const auto & attribute = getAttribute(attribute_name); \
-        checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
-    }
-DECLARE(UInt8)
-DECLARE(UInt16)
-DECLARE(UInt32)
-DECLARE(UInt64)
-DECLARE(UInt128)
-DECLARE(Int8)
-DECLARE(Int16)
-DECLARE(Int32)
-DECLARE(Int64)
-DECLARE(Float32)
-DECLARE(Float64)
-DECLARE(Decimal32)
-DECLARE(Decimal64)
-DECLARE(Decimal128)
-#undef DECLARE
-
-void IPAddressDictionary::getString(
-    const std::string & attribute_name,
-    const Columns & key_columns,
-    const DataTypes & key_types,
-    const String & def,
-    ColumnString * const out) const
-{
-    validateKeyTypes(key_types);
+    ColumnPtr result;
 
     const auto & attribute = getAttribute(attribute_name);
-    checkAttributeType(this, attribute_name, attribute.type, AttributeUnderlyingType::utString);
 
-    getItemsImpl<StringRef, StringRef>(
-        attribute,
-        key_columns,
-        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-        [&](const size_t) { return StringRef{def}; });
+    /// TODO: Check that attribute type is same as result type
+    /// TODO: Check if const will work as expected
+    
+    auto size = key_columns.front()->size();
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto column_string = ColumnString::create();
+            auto out = column_string.get();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                {
+                    getItemsImpl<StringRef, StringRef>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t row) { return default_col->getDataAt(row); });
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<String>();
+
+                    getItemsImpl<StringRef, StringRef>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t) { return StringRef { def }; });
+                }
+            }
+            else
+            {
+                const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
+
+                getItemsImpl<StringRef, StringRef>(
+                    attribute,
+                    key_columns,
+                    [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column_string);
+        }
+        else if constexpr (IsNumber<AttributeType>)
+        {
+            auto column = ColumnVector<AttributeType>::create(size);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    key_columns,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column);
+        }
+        else if constexpr (IsDecimalNumber<AttributeType>)
+        {
+            // auto scale = getDecimalScale(*attribute.type);
+            auto column = ColumnDecimal<AttributeType>::create(size, 0);
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        attribute,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(attribute.null_values);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    attribute,
+                    key_columns,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; }
+                );
+            }
+
+            result = std::move(column);
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+   
+    return result;
 }
 
-void IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
+
+ColumnUInt8::Ptr IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key_types) const
 {
     validateKeyTypes(key_types);
 
     const auto first_column = key_columns.front();
     const auto rows = first_column->size();
+
+    auto result = ColumnUInt8::create(rows);
+    auto& out = result->getData();
+
     if (first_column->isNumeric())
     {
         uint8_t addrv6_buf[IPV6_BINARY_LENGTH];
@@ -451,6 +455,8 @@ void IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key
     }
 
     query_count.fetch_add(rows, std::memory_order_relaxed);
+
+    return result;
 }
 
 void IPAddressDictionary::createAttributes()
@@ -652,6 +658,13 @@ void IPAddressDictionary::addAttributeSize(const Attribute & attribute)
     bucket_count = vec.size();
 }
 
+template <>
+void IPAddressDictionary::addAttributeSize<String>(const Attribute & attribute)
+{
+    addAttributeSize<StringRef>(attribute);
+    bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
+}
+
 void IPAddressDictionary::calculateBytesAllocated()
 {
     if (auto * ipv4_col = std::get_if<IPv4Container>(&ip_column))
@@ -669,64 +682,18 @@ void IPAddressDictionary::calculateBytesAllocated()
 
     for (const auto & attribute : attributes)
     {
-        switch (attribute.type)
+        auto type_call = [&](const auto & dictionary_attribute_type)
         {
-            case AttributeUnderlyingType::utUInt8:
-                addAttributeSize<UInt8>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt16:
-                addAttributeSize<UInt16>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt32:
-                addAttributeSize<UInt32>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt64:
-                addAttributeSize<UInt64>(attribute);
-                break;
-            case AttributeUnderlyingType::utUInt128:
-                addAttributeSize<UInt128>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt8:
-                addAttributeSize<Int8>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt16:
-                addAttributeSize<Int16>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt32:
-                addAttributeSize<Int32>(attribute);
-                break;
-            case AttributeUnderlyingType::utInt64:
-                addAttributeSize<Int64>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat32:
-                addAttributeSize<Float32>(attribute);
-                break;
-            case AttributeUnderlyingType::utFloat64:
-                addAttributeSize<Float64>(attribute);
-                break;
+            using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+            using AttributeType = typename Type::AttributeType;
 
-            case AttributeUnderlyingType::utDecimal32:
-                addAttributeSize<Decimal32>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal64:
-                addAttributeSize<Decimal64>(attribute);
-                break;
-            case AttributeUnderlyingType::utDecimal128:
-                addAttributeSize<Decimal128>(attribute);
-                break;
+            addAttributeSize<AttributeType>(attribute);
+        };
 
-            case AttributeUnderlyingType::utString:
-            {
-                addAttributeSize<StringRef>(attribute);
-                bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
-
-                break;
-            }
-        }
+        callOnDictionaryAttributeType(attribute.type, type_call);
     }
 }
 
-
 template <typename T>
 void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
 {
@@ -734,65 +701,27 @@ void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field
     attribute.maps.emplace<ContainerType<T>>();
 }
 
+template <>
+void IPAddressDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
+{
+    attribute.null_values = null_value.isNull() ? String() : null_value.get<String>();
+    attribute.maps.emplace<ContainerType<StringRef>>();
+    attribute.string_arena = std::make_unique<Arena>();
+}
+
 IPAddressDictionary::Attribute IPAddressDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
 {
     Attribute attr{type, {}, {}, {}};
 
-    switch (type)
+    auto type_call = [&](const auto & dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            createAttributeImpl<UInt8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            createAttributeImpl<UInt16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            createAttributeImpl<UInt32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            createAttributeImpl<UInt64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            createAttributeImpl<UInt128>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            createAttributeImpl<Int8>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            createAttributeImpl<Int16>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            createAttributeImpl<Int32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            createAttributeImpl<Int64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            createAttributeImpl<Float32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            createAttributeImpl<Float64>(attr, null_value);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            createAttributeImpl<Decimal32>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            createAttributeImpl<Decimal64>(attr, null_value);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            createAttributeImpl<Decimal128>(attr, null_value);
-            break;
+        createAttributeImpl<AttributeType>(attr, null_value);
+    };
 
-        case AttributeUnderlyingType::utString:
-        {
-
-            attr.null_values = null_value.isNull() ? String() : null_value.get<String>();
-            attr.maps.emplace<ContainerType<StringRef>>();
-            attr.string_arena = std::make_unique<Arena>();
-            break;
-        }
-    }
+    callOnDictionaryAttributeType(type, type_call);
 
     return attr;
 }
@@ -940,45 +869,23 @@ void IPAddressDictionary::setAttributeValueImpl(Attribute & attribute, const T v
 
 void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field & value)
 {
-    switch (attribute.type)
-    {
-        case AttributeUnderlyingType::utUInt8:
-            return setAttributeValueImpl<UInt8>(attribute, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt16:
-            return setAttributeValueImpl<UInt16>(attribute, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt32:
-            return setAttributeValueImpl<UInt32>(attribute, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt64:
-            return setAttributeValueImpl<UInt64>(attribute, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt128:
-            return setAttributeValueImpl<UInt128>(attribute, value.get<UInt128>());
-        case AttributeUnderlyingType::utInt8:
-            return setAttributeValueImpl<Int8>(attribute, value.get<Int64>());
-        case AttributeUnderlyingType::utInt16:
-            return setAttributeValueImpl<Int16>(attribute, value.get<Int64>());
-        case AttributeUnderlyingType::utInt32:
-            return setAttributeValueImpl<Int32>(attribute, value.get<Int64>());
-        case AttributeUnderlyingType::utInt64:
-            return setAttributeValueImpl<Int64>(attribute, value.get<Int64>());
-        case AttributeUnderlyingType::utFloat32:
-            return setAttributeValueImpl<Float32>(attribute, value.get<Float64>());
-        case AttributeUnderlyingType::utFloat64:
-            return setAttributeValueImpl<Float64>(attribute, value.get<Float64>());
+    auto type_call = [&](const auto & dictionary_attribute_type) {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            return setAttributeValueImpl<Decimal32>(attribute, value.get<Decimal32>());
-        case AttributeUnderlyingType::utDecimal64:
-            return setAttributeValueImpl<Decimal64>(attribute, value.get<Decimal64>());
-        case AttributeUnderlyingType::utDecimal128:
-            return setAttributeValueImpl<Decimal128>(attribute, value.get<Decimal128>());
-
-        case AttributeUnderlyingType::utString:
+        if constexpr (std::is_same_v<AttributeType, String>)
         {
             const auto & string = value.get<String>();
             const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
-            return setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()});
+            setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()}); 
         }
-    }
+        else
+        {
+            setAttributeValueImpl<AttributeType>(attribute, value.get<NearestFieldType<AttributeType>>());
+        }
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
 }
 
 const IPAddressDictionary::Attribute & IPAddressDictionary::getAttribute(const std::string & attribute_name) const
diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h
index 2009141ebcc..7db46ddf6a5 100644
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@@ -61,91 +61,16 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
+    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::complex; }
 
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-        const std::string & attribute_name,
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnString * const def,
-        ColumnString * const out) const;
+        const ColumnPtr default_untyped) const override;
 
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes & key_types, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const;
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-        const std::string & attribute_name,
-        const Columns & key_columns,
-        const DataTypes & key_types,
-        const String & def,
-        ColumnString * const out) const;
-
-    void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override; 
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 

From 0ed64d8ab0c5301c2e9eed1317dad86b9ba29a85 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 29 Dec 2020 13:46:25 +0300
Subject: [PATCH 0226/1238] Simplified getColumn implementation

---
 src/Dictionaries/CacheDictionary.cpp          |  63 +++-------
 .../ComplexKeyCacheDictionary.cpp             |  63 +++-------
 .../ComplexKeyDirectDictionary.cpp            |  63 +++-------
 .../ComplexKeyHashedDictionary.cpp            |  63 +++-------
 src/Dictionaries/DirectDictionary.cpp         |  63 +++-------
 src/Dictionaries/FlatDictionary.cpp           |  63 +++-------
 src/Dictionaries/HashedDictionary.cpp         |  63 +++-------
 src/Dictionaries/IPAddressDictionary.cpp      |  63 +++-------
 src/Dictionaries/RangeHashedDictionary.cpp    | 118 +++---------------
 src/Dictionaries/SSDCacheDictionary.cpp       |  63 +++-------
 .../SSDComplexKeyCacheDictionary.cpp          |  66 +++-------
 11 files changed, 187 insertions(+), 564 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index c7517332594..cbb12d3b2ec 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -299,14 +299,27 @@ ColumnPtr CacheDictionary::getColumn(
 
             result = std::move(column_string);
         }
-        else if constexpr (IsNumber<AttributeType>)
+        else
         {
-            auto column = ColumnVector<AttributeType>::create(identifiers_size);
+            using ResultColumnType
+                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
+            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
+
+            ResultColumnPtr column;
+
+            if constexpr (IsDecimalNumber<AttributeType>)
+            {
+                // auto scale = getDecimalScale(*attribute.type);
+                column = ColumnDecimal<AttributeType>::create(identifiers_size, 0);
+            }
+            else if constexpr (IsNumber<AttributeType>)
+                column = ColumnVector<AttributeType>::create(identifiers_size);
+ 
             auto& out = column->getData();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsNumberImpl<AttributeType, AttributeType>(
                         attribute,
@@ -315,49 +328,7 @@ ColumnPtr CacheDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        out,
-                        [&](const size_t) { return def; }
-                    );
-                }
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_value);
-
-                getItemsNumberImpl<AttributeType, AttributeType>(
-                    attribute,
-                    ids,
-                    out,
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column);
-        }
-        else if constexpr (IsDecimalNumber<AttributeType>)
-        {
-            // auto scale = getDecimalScale(*attribute.type);
-            auto column = ColumnDecimal<AttributeType>::create(identifiers_size, 0);
-            auto& out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
-                {
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        out,
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index 504db30b8d1..be67121aef1 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -121,14 +121,27 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
 
             result = std::move(column_string);
         }
-        else if constexpr (IsNumber<AttributeType>)
+        else
         {
-            auto column = ColumnVector<AttributeType>::create(keys_size);
+            using ResultColumnType
+                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
+            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
+
+            ResultColumnPtr column;
+
+            if constexpr (IsDecimalNumber<AttributeType>)
+            {
+                // auto scale = getDecimalScale(*attribute.type);
+                column = ColumnDecimal<AttributeType>::create(keys_size, 0);
+            }
+            else if constexpr (IsNumber<AttributeType>)
+                column = ColumnVector<AttributeType>::create(keys_size);
+ 
             auto& out = column->getData();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsNumberImpl<AttributeType, AttributeType>(
                         attribute,
@@ -137,49 +150,7 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        out,
-                        [&](const size_t) { return def; }
-                    );
-                }
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsNumberImpl<AttributeType, AttributeType>(
-                    attribute,
-                    key_columns,
-                    out,
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column);
-        }
-        else if constexpr (IsDecimalNumber<AttributeType>)
-        {
-            // auto scale = getDecimalScale(*attribute.type);
-            auto column = ColumnDecimal<AttributeType>::create(keys_size, 0);
-            auto& out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
-                {
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        out,
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.cpp b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
index 0668c5da07a..8e084375499 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
@@ -110,14 +110,27 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
 
             result = std::move(column_string);
         }
-        else if constexpr (IsNumber<AttributeType>)
+        else
         {
-            auto column = ColumnVector<AttributeType>::create(size);
+            using ResultColumnType
+                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
+            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
+
+            ResultColumnPtr column;
+
+            if constexpr (IsDecimalNumber<AttributeType>)
+            {
+                // auto scale = getDecimalScale(*attribute.type);
+                column = ColumnDecimal<AttributeType>::create(size, 0);
+            }
+            else if constexpr (IsNumber<AttributeType>)
+                column = ColumnVector<AttributeType>::create(size);
+ 
             auto& out = column->getData();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -126,49 +139,7 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t) { return def; }
-                    );
-                }
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    key_columns,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column);
-        }
-        else if constexpr (IsDecimalNumber<AttributeType>)
-        {
-            // auto scale = getDecimalScale(*attribute.type);
-            auto column = ColumnDecimal<AttributeType>::create(size, 0);
-            auto& out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
-                {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index 23d28bfe6ec..a74185ec887 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -96,14 +96,27 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
 
             result = std::move(column_string);
         }
-        else if constexpr (IsNumber<AttributeType>)
+        else
         {
-            auto column = ColumnVector<AttributeType>::create(size);
+            using ResultColumnType
+                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
+            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
+
+            ResultColumnPtr column;
+
+            if constexpr (IsDecimalNumber<AttributeType>)
+            {
+                // auto scale = getDecimalScale(*attribute.type);
+                column = ColumnDecimal<AttributeType>::create(size, 0);
+            }
+            else if constexpr (IsNumber<AttributeType>)
+                column = ColumnVector<AttributeType>::create(size);
+ 
             auto& out = column->getData();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -112,49 +125,7 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t) { return def; }
-                    );
-                }
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    key_columns,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column);
-        }
-        else if constexpr (IsDecimalNumber<AttributeType>)
-        {
-            // auto scale = getDecimalScale(*attribute.type);
-            auto column = ColumnDecimal<AttributeType>::create(size, 0);
-            auto& out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
-                {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index d024a7d7059..8018fc02fcf 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -207,14 +207,27 @@ ColumnPtr DirectDictionary::getColumn(
 
             result = std::move(column_string);
         }
-        else if constexpr (IsNumber<AttributeType>)
+        else
         {
-            auto column = ColumnVector<AttributeType>::create(size);
+            using ResultColumnType
+                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
+            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
+
+            ResultColumnPtr column;
+
+            if constexpr (IsDecimalNumber<AttributeType>)
+            {
+                // auto scale = getDecimalScale(*attribute.type);
+                column = ColumnDecimal<AttributeType>::create(size, 0);
+            }
+            else if constexpr (IsNumber<AttributeType>)
+                column = ColumnVector<AttributeType>::create(size);
+ 
             auto& out = column->getData();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -223,49 +236,7 @@ ColumnPtr DirectDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t) { return def; }
-                    );
-                }
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    ids,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column);
-        }
-        else if constexpr (IsDecimalNumber<AttributeType>)
-        {
-            // auto scale = getDecimalScale(*attribute.type);
-            auto column = ColumnDecimal<AttributeType>::create(size, 0);
-            auto& out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
-                {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 790c0100d24..2c856dc59ca 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -168,14 +168,27 @@ ColumnPtr FlatDictionary::getColumn(
 
             result = std::move(column_string);
         }
-        else if constexpr (IsNumber<AttributeType>)
+        else
         {
-            auto column = ColumnVector<AttributeType>::create(size);
+            using ResultColumnType
+                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
+            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
+
+            ResultColumnPtr column;
+
+            if constexpr (IsDecimalNumber<AttributeType>)
+            {
+                // auto scale = getDecimalScale(*attribute.type);
+                column = ColumnDecimal<AttributeType>::create(size, 0);
+            }
+            else if constexpr (IsNumber<AttributeType>)
+                column = ColumnVector<AttributeType>::create(size);
+ 
             auto& out = column->getData();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -184,49 +197,7 @@ ColumnPtr FlatDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t) { return def; }
-                    );
-                }
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    ids,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column);
-        }
-        else if constexpr (IsDecimalNumber<AttributeType>)
-        {
-            // auto scale = getDecimalScale(*attribute.type);
-            auto column = ColumnDecimal<AttributeType>::create(size, 0);
-            auto& out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
-                {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 3034d585f88..a1e3f176706 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -190,14 +190,27 @@ ColumnPtr HashedDictionary::getColumn(
 
             result = std::move(column_string);
         }
-        else if constexpr (IsNumber<AttributeType>)
+        else
         {
-            auto column = ColumnVector<AttributeType>::create(size);
+            using ResultColumnType
+                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
+            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
+
+            ResultColumnPtr column;
+
+            if constexpr (IsDecimalNumber<AttributeType>)
+            {
+                // auto scale = getDecimalScale(*attribute.type);
+                column = ColumnDecimal<AttributeType>::create(size, 0);
+            }
+            else if constexpr (IsNumber<AttributeType>)
+                column = ColumnVector<AttributeType>::create(size);
+ 
             auto& out = column->getData();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -206,49 +219,7 @@ ColumnPtr HashedDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t) { return def; }
-                    );
-                }
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    ids,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column);
-        }
-        else if constexpr (IsDecimalNumber<AttributeType>)
-        {
-            // auto scale = getDecimalScale(*attribute.type);
-            auto column = ColumnDecimal<AttributeType>::create(size, 0);
-            auto& out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
-                {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index 0f2c2c8a706..028b01607d0 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -329,14 +329,27 @@ ColumnPtr IPAddressDictionary::getColumn(
 
             result = std::move(column_string);
         }
-        else if constexpr (IsNumber<AttributeType>)
+        else
         {
-            auto column = ColumnVector<AttributeType>::create(size);
+            using ResultColumnType
+                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
+            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
+
+            ResultColumnPtr column;
+
+            if constexpr (IsDecimalNumber<AttributeType>)
+            {
+                // auto scale = getDecimalScale(*attribute.type);
+                column = ColumnDecimal<AttributeType>::create(size, 0);
+            }
+            else if constexpr (IsNumber<AttributeType>)
+                column = ColumnVector<AttributeType>::create(size);
+ 
             auto& out = column->getData();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -345,49 +358,7 @@ ColumnPtr IPAddressDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t) { return def; }
-                    );
-                }
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    key_columns,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column);
-        }
-        else if constexpr (IsDecimalNumber<AttributeType>)
-        {
-            // auto scale = getDecimalScale(*attribute.type);
-            auto column = ColumnDecimal<AttributeType>::create(size, 0);
-            auto& out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
-                {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp
index 57299da43b8..710a018d85f 100644
--- a/src/Dictionaries/RangeHashedDictionary.cpp
+++ b/src/Dictionaries/RangeHashedDictionary.cpp
@@ -147,14 +147,27 @@ ColumnPtr RangeHashedDictionary::getColumn(
 
             result = std::move(column_string);
         }
-        else if constexpr (IsNumber<AttributeType>)
+        else
         {
-            auto column = ColumnVector<AttributeType>::create(size);
+            using ResultColumnType
+                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
+            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
+
+            ResultColumnPtr column;
+
+            if constexpr (IsDecimalNumber<AttributeType>)
+            {
+                // auto scale = getDecimalScale(*attribute.type);
+                column = ColumnDecimal<AttributeType>::create(size, 0);
+            }
+            else if constexpr (IsNumber<AttributeType>)
+                column = ColumnVector<AttributeType>::create(size);
+ 
             auto& out = column->getData();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -163,49 +176,7 @@ ColumnPtr RangeHashedDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t) { return def; }
-                    );
-                }
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    key_columns,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column);
-        }
-        else if constexpr (IsDecimalNumber<AttributeType>)
-        {
-            // auto scale = getDecimalScale(*attribute.type);
-            auto column = ColumnDecimal<AttributeType>::create(size, 0);
-            auto& out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
-                {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
@@ -341,61 +312,6 @@ void RangeHashedDictionary::calculateBytesAllocated()
         };
 
         callOnDictionaryAttributeType(attribute.type, type_call);
-
-        // switch (attribute.type)
-        // {
-        //     case AttributeUnderlyingType::utUInt8:
-        //         addAttributeSize<UInt8>(attribute);
-        //         break;
-        //     case AttributeUnderlyingType::utUInt16:
-        //         addAttributeSize<UInt16>(attribute);
-        //         break;
-        //     case AttributeUnderlyingType::utUInt32:
-        //         addAttributeSize<UInt32>(attribute);
-        //         break;
-        //     case AttributeUnderlyingType::utUInt64:
-        //         addAttributeSize<UInt64>(attribute);
-        //         break;
-        //     case AttributeUnderlyingType::utUInt128:
-        //         addAttributeSize<UInt128>(attribute);
-        //         break;
-        //     case AttributeUnderlyingType::utInt8:
-        //         addAttributeSize<Int8>(attribute);
-        //         break;
-        //     case AttributeUnderlyingType::utInt16:
-        //         addAttributeSize<Int16>(attribute);
-        //         break;
-        //     case AttributeUnderlyingType::utInt32:
-        //         addAttributeSize<Int32>(attribute);
-        //         break;
-        //     case AttributeUnderlyingType::utInt64:
-        //         addAttributeSize<Int64>(attribute);
-        //         break;
-        //     case AttributeUnderlyingType::utFloat32:
-        //         addAttributeSize<Float32>(attribute);
-        //         break;
-        //     case AttributeUnderlyingType::utFloat64:
-        //         addAttributeSize<Float64>(attribute);
-        //         break;
-
-        //     case AttributeUnderlyingType::utDecimal32:
-        //         addAttributeSize<Decimal32>(attribute);
-        //         break;
-        //     case AttributeUnderlyingType::utDecimal64:
-        //         addAttributeSize<Decimal64>(attribute);
-        //         break;
-        //     case AttributeUnderlyingType::utDecimal128:
-        //         addAttributeSize<Decimal128>(attribute);
-        //         break;
-
-        //     case AttributeUnderlyingType::utString:
-        //     {
-        //         addAttributeSize<StringRef>(attribute);
-        //         bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
-
-        //         break;
-        //     }
-        // }
     }
 }
 
diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp
index 23507792f7e..e3e4cf6c1a0 100644
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@@ -1378,14 +1378,27 @@ ColumnPtr SSDCacheDictionary::getColumn(
 
             result = std::move(column_string);
         }
-        else if constexpr (IsNumber<AttributeType>)
+        else
         {
-            auto column = ColumnVector<AttributeType>::create(identifiers_size);
+            using ResultColumnType
+                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
+            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
+
+            ResultColumnPtr column;
+
+            if constexpr (IsDecimalNumber<AttributeType>)
+            {
+                // auto scale = getDecimalScale(*attribute.type);
+                column = ColumnDecimal<AttributeType>::create(identifiers_size, 0);
+            }
+            else if constexpr (IsNumber<AttributeType>)
+                column = ColumnVector<AttributeType>::create(identifiers_size);
+ 
             auto& out = column->getData();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsNumberImpl<AttributeType, AttributeType>(
                         index,
@@ -1394,49 +1407,7 @@ ColumnPtr SSDCacheDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        index,
-                        ids,
-                        out,
-                        [&](const size_t) { return def; }
-                    );
-                }
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(null_values[index]);;
-
-                getItemsNumberImpl<AttributeType, AttributeType>(
-                    index,
-                    ids,
-                    out,
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column);
-        }
-        else if constexpr (IsDecimalNumber<AttributeType>)
-        {
-            // auto scale = getDecimalScale(*attribute.type);
-            auto column = ColumnDecimal<AttributeType>::create(identifiers_size, 0);
-            auto& out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
-                {
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        index,
-                        ids,
-                        out,
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
index 452065c20ad..05aa5a27ea9 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
@@ -1427,14 +1427,27 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
 
             result = std::move(column_string);
         }
-        else if constexpr (IsNumber<AttributeType>)
+        else
         {
-            auto column = ColumnVector<AttributeType>::create(keys_size);
+            using ResultColumnType
+                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
+            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
+
+            ResultColumnPtr column;
+
+            if constexpr (IsDecimalNumber<AttributeType>)
+            {
+                // auto scale = getDecimalScale(*attribute.type);
+                column = ColumnDecimal<AttributeType>::create(keys_size, 0);
+            }
+            else if constexpr (IsNumber<AttributeType>)
+                column = ColumnVector<AttributeType>::create(keys_size);
+ 
             auto& out = column->getData();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
+                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsNumberImpl<AttributeType, AttributeType>(
                         index,
@@ -1444,52 +1457,7 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        index,
-                        key_columns,
-                        key_types,
-                        out,
-                        [&](const size_t) { return def; }
-                    );
-                }
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(null_values[index]); /* NOLINT */
-
-                getItemsNumberImpl<AttributeType, AttributeType>(
-                    index,
-                    key_columns,
-                    key_types,
-                    out,
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column);
-        }
-        else if constexpr (IsDecimalNumber<AttributeType>)
-        {
-            // auto scale = getDecimalScale(*attribute.type);
-            auto column = ColumnDecimal<AttributeType>::create(keys_size, 0);
-            auto& out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
-                {
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        index,
-                        key_columns,
-                        key_types,
-                        out,
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
+                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 

From 8fdbde864298aa5db53f3dc238a6160765b4fc4d Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 29 Dec 2020 13:55:40 +0300
Subject: [PATCH 0227/1238] Updated PolygonDictionary to new interface

---
 src/Dictionaries/PolygonDictionary.cpp | 277 ++++++++++++-------------
 src/Dictionaries/PolygonDictionary.h   | 104 +---------
 2 files changed, 144 insertions(+), 237 deletions(-)

diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp
index 04eadbfc0ce..2b9bf951192 100644
--- a/src/Dictionaries/PolygonDictionary.cpp
+++ b/src/Dictionaries/PolygonDictionary.cpp
@@ -5,6 +5,7 @@
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnTuple.h>
 #include <DataTypes/DataTypeArray.h>
+#include <Functions/FunctionHelpers.h>
 
 #include <numeric>
 
@@ -92,6 +93,129 @@ bool IPolygonDictionary::isInjective(const std::string &) const
     return false;
 }
 
+ColumnPtr IPolygonDictionary::getColumn(
+    const std::string & attribute_name,
+    const DataTypePtr &,
+    const Columns & key_columns,
+    const DataTypes &,
+    const ColumnPtr default_untyped) const
+{
+    /// TODO: Validate input types
+
+    ColumnPtr result;
+
+    const auto index = getAttributeIndex(attribute_name);
+
+    /// TODO: Check that attribute type is same as result type
+
+    auto size = key_columns.front()->size();
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
+    {
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
+
+        if constexpr (std::is_same_v<AttributeType, String>)
+        {
+            auto column_string = ColumnString::create();
+            auto out = column_string.get();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                {
+                    getItemsImpl<String, StringRef>(
+                        index,
+                        key_columns,
+                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t row) { return default_col->getDataAt(row); });
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<String>();
+
+                    getItemsImpl<StringRef, StringRef>(
+                        index,
+                        key_columns,
+                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t) { return def; });
+                }
+            }
+            else
+            {
+                const auto & null_value = StringRef{std::get<String>(null_values[index])};
+
+                getItemsImpl<String, StringRef>(
+                    index,
+                    key_columns,
+                    [&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
+                    [&](const size_t) { return null_value; });
+            }
+
+            result = std::move(column_string);
+        }
+        else
+        {
+            using ResultColumnType
+                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
+            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
+
+            ResultColumnPtr column;
+
+            if constexpr (IsDecimalNumber<AttributeType>)
+            {
+                // auto scale = getDecimalScale(*attribute.type);
+                column = ColumnDecimal<AttributeType>::create(size, 0);
+            }
+            else if constexpr (IsNumber<AttributeType>)
+                column = ColumnVector<AttributeType>::create(size);
+ 
+            auto& out = column->getData();
+
+            if (default_untyped != nullptr)
+            {
+                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                {
+                    getItemsImpl<AttributeType, AttributeType>(
+                        index,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row) { return default_col->getData()[row]; }
+                    );
+                }
+                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                {
+                    const auto & def = default_col_const->template getValue<AttributeType>();
+
+                    getItemsImpl<AttributeType, AttributeType>(
+                        index,
+                        key_columns,
+                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t) { return def; }
+                    );
+                }
+            }
+            else
+            {
+                const auto null_value = std::get<AttributeType>(null_values[index]);
+
+                getItemsImpl<AttributeType, AttributeType>(
+                    index,
+                    key_columns,
+                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t) { return null_value; }
+                );
+            }
+
+            result = std::move(column);
+        }
+    };
+
+    callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
+   
+    return result;
+}
+
 BlockInputStreamPtr IPolygonDictionary::getBlockInputStream(const Names &, size_t) const
 {
     // TODO: In order for this to work one would first have to support retrieving arrays from dictionaries.
@@ -255,8 +379,14 @@ std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const C
     return result;
 }
 
-void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, PaddedPODArray<UInt8> & out) const
+ColumnUInt8::Ptr IPolygonDictionary::has(const Columns & key_columns, const DataTypes & key_types) const
 {
+    dict_struct.validateKeyTypes(key_types);
+
+    auto size = key_columns.front()->size();
+    auto result = ColumnUInt8::create(size);
+    auto& out = result->getData();
+
     size_t row = 0;
     for (const auto & pt : extractPoints(key_columns))
     {
@@ -266,6 +396,8 @@ void IPolygonDictionary::has(const Columns & key_columns, const DataTypes &, Pad
     }
 
     query_count.fetch_add(row, std::memory_order_relaxed);
+
+    return result;
 }
 
 size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name) const
@@ -276,149 +408,6 @@ size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name)
     return it->second;
 }
 
-#define DECLARE(TYPE) \
-    void IPolygonDictionary::get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const \
-    { \
-        const auto ind = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-\
-        const auto null_value = std::get<TYPE>(null_values[ind]); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            ind, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t) { return null_value; }); \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-void IPolygonDictionary::getString(
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const
-{
-    const auto ind = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
-
-    const auto & null_value = StringRef{std::get<String>(null_values[ind])};
-
-    getItemsImpl<String, StringRef>(
-            ind,
-            key_columns,
-            [&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
-            [&](const size_t) { return null_value; });
-}
-
-#define DECLARE(TYPE) \
-    void IPolygonDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes &, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto ind = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            ind, \
-            key_columns, \
-            [&](const size_t row, const auto value) { out[row] = value; }, \
-            [&](const size_t row) { return def[row]; }); \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-void IPolygonDictionary::getString(
-        const std::string & attribute_name,
-        const Columns & key_columns,
-        const DataTypes &,
-        const ColumnString * const def,
-        ColumnString * const out) const
-{
-    const auto ind = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
-
-    getItemsImpl<String, StringRef>(
-            ind,
-            key_columns,
-            [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-            [&](const size_t row) { return def->getDataAt(row); });
-}
-
-#define DECLARE(TYPE) \
-    void IPolygonDictionary::get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes &, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const \
-    { \
-        const auto ind = getAttributeIndex(attribute_name); \
-        checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::ut##TYPE); \
-\
-        getItemsImpl<TYPE, TYPE>( \
-            ind, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
-    }
-    DECLARE(UInt8)
-    DECLARE(UInt16)
-    DECLARE(UInt32)
-    DECLARE(UInt64)
-    DECLARE(UInt128)
-    DECLARE(Int8)
-    DECLARE(Int16)
-    DECLARE(Int32)
-    DECLARE(Int64)
-    DECLARE(Float32)
-    DECLARE(Float64)
-    DECLARE(Decimal32)
-    DECLARE(Decimal64)
-    DECLARE(Decimal128)
-#undef DECLARE
-
-void IPolygonDictionary::getString(
-        const std::string & attribute_name,
-        const Columns & key_columns,
-        const DataTypes &,
-        const String & def,
-        ColumnString * const out) const
-{
-    const auto ind = getAttributeIndex(attribute_name);
-    checkAttributeType(this, attribute_name, dict_struct.attributes[ind].underlying_type, AttributeUnderlyingType::utString);
-
-    getItemsImpl<String, StringRef>(
-            ind,
-            key_columns,
-            [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-            [&](const size_t) { return StringRef{def}; });
-}
-
 template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
 void IPolygonDictionary::getItemsImpl(
         size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
diff --git a/src/Dictionaries/PolygonDictionary.h b/src/Dictionaries/PolygonDictionary.h
index 75114cff435..f7b7d92fd27 100644
--- a/src/Dictionaries/PolygonDictionary.h
+++ b/src/Dictionaries/PolygonDictionary.h
@@ -78,101 +78,19 @@ public:
 
     bool isInjective(const std::string & attribute_name) const override;
 
+    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::complex; }
+
+    ColumnPtr getColumn(
+        const std::string& attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_untyped) const override;
+
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
-    template <typename T>
-    using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
-
-    /** Functions used to retrieve attributes of specific type by key. */
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const;
-        DECLARE(UInt8)
-        DECLARE(UInt16)
-        DECLARE(UInt32)
-        DECLARE(UInt64)
-        DECLARE(UInt128)
-        DECLARE(Int8)
-        DECLARE(Int16)
-        DECLARE(Int32)
-        DECLARE(Int64)
-        DECLARE(Float32)
-        DECLARE(Float64)
-        DECLARE(Decimal32)
-        DECLARE(Decimal64)
-        DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes &, \
-        const PaddedPODArray<TYPE> & def, \
-        ResultArrayType<TYPE> & out) const;
-        DECLARE(UInt8)
-        DECLARE(UInt16)
-        DECLARE(UInt32)
-        DECLARE(UInt64)
-        DECLARE(UInt128)
-        DECLARE(Int8)
-        DECLARE(Int16)
-        DECLARE(Int32)
-        DECLARE(Int64)
-        DECLARE(Float32)
-        DECLARE(Float64)
-        DECLARE(Decimal32)
-        DECLARE(Decimal64)
-        DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-            const std::string & attribute_name,
-            const Columns & key_columns,
-            const DataTypes &,
-            const ColumnString * const def,
-            ColumnString * const out) const;
-
-#define DECLARE(TYPE) \
-    void get##TYPE( \
-        const std::string & attribute_name, \
-        const Columns & key_columns, \
-        const DataTypes &, \
-        const TYPE def, \
-        ResultArrayType<TYPE> & out) const;
-        DECLARE(UInt8)
-        DECLARE(UInt16)
-        DECLARE(UInt32)
-        DECLARE(UInt64)
-        DECLARE(UInt128)
-        DECLARE(Int8)
-        DECLARE(Int16)
-        DECLARE(Int32)
-        DECLARE(Int64)
-        DECLARE(Float32)
-        DECLARE(Float64)
-        DECLARE(Decimal32)
-        DECLARE(Decimal64)
-        DECLARE(Decimal128)
-#undef DECLARE
-
-    void getString(
-            const std::string & attribute_name,
-            const Columns & key_columns,
-            const DataTypes & key_types,
-            const String & def,
-            ColumnString * const out) const;
-
-    /** Checks whether or not a point can be found in one of the polygons in the dictionary.
-     *  The check is performed for multiple points represented by columns of their x and y coordinates.
-     *  The boolean result is written to out.
-     */
-    // TODO: Refactor the whole dictionary design to perform stronger checks, i.e. make this an override.
-    void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
-
     /** Single coordinate type. */
     using Coord = Float32;
     /** A two-dimensional point in Euclidean coordinates. */

From 791c7204d76516bf560aaeed94bc63a172f56b7b Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 29 Dec 2020 18:21:49 +0300
Subject: [PATCH 0228/1238] Fix style issues

---
 src/Dictionaries/CacheDictionary.cpp             | 10 +++++-----
 src/Dictionaries/ComplexKeyCacheDictionary.cpp   | 10 +++++-----
 src/Dictionaries/ComplexKeyCacheDictionary.h     |  2 +-
 src/Dictionaries/ComplexKeyDirectDictionary.cpp  | 10 +++++-----
 src/Dictionaries/ComplexKeyHashedDictionary.cpp  |  8 ++++----
 src/Dictionaries/DictionaryStructure.cpp         |  2 +-
 src/Dictionaries/DirectDictionary.cpp            | 10 +++++-----
 src/Dictionaries/FlatDictionary.cpp              | 11 +++++------
 src/Dictionaries/HashedDictionary.cpp            | 16 ++++++++--------
 src/Dictionaries/IDictionary.h                   |  3 +--
 src/Dictionaries/IPAddressDictionary.cpp         | 13 +++++++------
 src/Dictionaries/IPAddressDictionary.h           |  2 +-
 src/Dictionaries/PolygonDictionary.cpp           |  8 ++++----
 src/Dictionaries/RangeHashedDictionary.cpp       |  8 ++++----
 src/Dictionaries/SSDCacheDictionary.cpp          | 10 +++++-----
 .../SSDComplexKeyCacheDictionary.cpp             |  8 ++++----
 src/Dictionaries/SSDComplexKeyCacheDictionary.h  |  2 +-
 src/Dictionaries/ya.make                         |  9 ---------
 src/Functions/FunctionsExternalDictionaries.h    | 11 ++++-------
 19 files changed, 70 insertions(+), 83 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index cbb12d3b2ec..d8dd390576c 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -259,8 +259,8 @@ ColumnPtr CacheDictionary::getColumn(
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
-    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
-    
+    const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+
     auto & attribute = getAttribute(attribute_name);
 
     /// TODO: Check that attribute type is same as result type
@@ -314,8 +314,8 @@ ColumnPtr CacheDictionary::getColumn(
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(identifiers_size);
- 
-            auto& out = column->getData();
+
+            auto & out = column->getData();
 
             if (default_untyped != nullptr)
             {
@@ -357,7 +357,7 @@ ColumnPtr CacheDictionary::getColumn(
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
-   
+
     return result;
 }
 
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index be67121aef1..4059915666e 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -86,7 +86,7 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
 
     /// TODO: Check that attribute type is same as result type
     /// TODO: Check if const will work as expected
-    
+
     auto keys_size = key_columns.front()->size();
 
     auto type_call = [&](const auto &dictionary_attribute_type)
@@ -136,8 +136,8 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(keys_size);
- 
-            auto& out = column->getData();
+
+            auto & out = column->getData();
 
             if (default_untyped != nullptr)
             {
@@ -179,7 +179,7 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
-   
+
     return result;
 }
 
@@ -362,7 +362,7 @@ void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute,
         }
         else
         {
-            std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = std::get<AttributeType>(attribute.null_values); 
+            std::get<ContainerPtrType<AttributeType>>(attribute.arrays)[idx] = std::get<AttributeType>(attribute.null_values);
         }
     };
 
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.h b/src/Dictionaries/ComplexKeyCacheDictionary.h
index 2711bd6c8cc..1f8248c594a 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.h
@@ -98,7 +98,7 @@ public:
         const DataTypes & key_types,
         const ColumnPtr default_untyped) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override; 
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.cpp b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
index 8e084375499..ffcdb746bba 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
@@ -47,10 +47,10 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
 
     /// TODO: Check that attribute type is same as result type
     /// TODO: Check if const will work as expected
-    
+
     auto size = key_columns.front()->size();
 
-    auto type_call = [&](const auto &dictionary_attribute_type)
+    auto type_call = [&](const auto & dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
@@ -125,8 +125,8 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
- 
-            auto& out = column->getData();
+
+            auto & out = column->getData();
 
             if (default_untyped != nullptr)
             {
@@ -168,7 +168,7 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
-   
+
     return result;
 }
 
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index a74185ec887..1b7ef979d38 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -49,7 +49,7 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
 
     /// TODO: Check that attribute type is same as result type
     /// TODO: Check if const will work as expected
-    
+
     auto size = key_columns.front()->size();
 
     auto type_call = [&](const auto &dictionary_attribute_type)
@@ -111,8 +111,8 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
- 
-            auto& out = column->getData();
+
+            auto & out = column->getData();
 
             if (default_untyped != nullptr)
             {
@@ -154,7 +154,7 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
-   
+
     return result;
 }
 
diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index b2340cf39a0..7164d3cfd5d 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -328,7 +328,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
             is_array = true;
             type = array_type->getNestedType();
         }
-        
+
         if (type->isNullable())
         {
             is_nullable = true;
diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index 8018fc02fcf..992fd32e4a6 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -138,8 +138,8 @@ ColumnPtr DirectDictionary::getColumn(
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
-    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
-    
+    const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+
     const auto & attribute = getAttribute(attribute_name);
 
     /// TODO: Check that attribute type is same as result type
@@ -222,8 +222,8 @@ ColumnPtr DirectDictionary::getColumn(
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
- 
-            auto& out = column->getData();
+
+            auto & out = column->getData();
 
             if (default_untyped != nullptr)
             {
@@ -265,7 +265,7 @@ ColumnPtr DirectDictionary::getColumn(
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
-   
+
     return result;
 }
 
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 2c856dc59ca..959f852d431 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -16,7 +16,6 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
     extern const int DICTIONARY_IS_EMPTY;
     extern const int UNSUPPORTED_METHOD;
-    extern const int ILLEGAL_COLUMN;
 }
 
 static const auto initial_array_size = 1024;
@@ -115,8 +114,8 @@ ColumnPtr FlatDictionary::getColumn(
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
-    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
-    
+    const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+
     const auto & attribute = getAttribute(attribute_name);
 
     /// TODO: Check that attribute type is same as result type
@@ -183,8 +182,8 @@ ColumnPtr FlatDictionary::getColumn(
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
- 
-            auto& out = column->getData();
+
+            auto & out = column->getData();
 
             if (default_untyped != nullptr)
             {
@@ -226,7 +225,7 @@ ColumnPtr FlatDictionary::getColumn(
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
-   
+
     return result;
 }
 
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index a1e3f176706..4fbc24e3bda 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -137,14 +137,14 @@ ColumnPtr HashedDictionary::getColumn(
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
-    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
-    
+    const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+
     const auto & attribute = getAttribute(attribute_name);
 
     /// TODO: Check that attribute type is same as result type
     /// TODO: Check if const will work as expected
 
-    auto type_call = [&](const auto &dictionary_attribute_type)
+    auto type_call = [&](const auto & dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
@@ -205,8 +205,8 @@ ColumnPtr HashedDictionary::getColumn(
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
- 
-            auto& out = column->getData();
+
+            auto & out = column->getData();
 
             if (default_untyped != nullptr)
             {
@@ -248,7 +248,7 @@ ColumnPtr HashedDictionary::getColumn(
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
-   
+
     return result;
 }
 
@@ -706,14 +706,14 @@ template <>
 PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds<String>(const Attribute & attribute) const
 {
     return getIds<StringRef>(attribute);
-} 
+}
 
 PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
 {
     const auto & attribute = attributes.front();
     PaddedPODArray<HashedDictionary::Key> result;
 
-    auto type_call = [&](const auto &dictionary_attribute_type)
+    auto type_call = [&](const auto & dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h
index c619e85f523..61f249bad44 100644
--- a/src/Dictionaries/IDictionary.h
+++ b/src/Dictionaries/IDictionary.h
@@ -207,8 +207,7 @@ getColumnDataAsPaddedPODArray(const IDictionaryBase * dictionary, const ColumnPt
 
     if (!vector_col)
     {
-        throw Exception{
-            ErrorCodes::TYPE_MISMATCH,
+        throw Exception{ErrorCodes::TYPE_MISMATCH,
             "{}: type mismatch: column has wrong type expected {}",
             dictionary->getDictionaryID().getNameForLogs(),
             "" /* TODO: Type name*/};
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index 028b01607d0..477b379150f 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -282,7 +282,7 @@ ColumnPtr IPAddressDictionary::getColumn(
 
     /// TODO: Check that attribute type is same as result type
     /// TODO: Check if const will work as expected
-    
+
     auto size = key_columns.front()->size();
 
     auto type_call = [&](const auto &dictionary_attribute_type)
@@ -344,8 +344,8 @@ ColumnPtr IPAddressDictionary::getColumn(
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
- 
-            auto& out = column->getData();
+
+            auto & out = column->getData();
 
             if (default_untyped != nullptr)
             {
@@ -387,7 +387,7 @@ ColumnPtr IPAddressDictionary::getColumn(
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
-   
+
     return result;
 }
 
@@ -840,7 +840,8 @@ void IPAddressDictionary::setAttributeValueImpl(Attribute & attribute, const T v
 
 void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field & value)
 {
-    auto type_call = [&](const auto & dictionary_attribute_type) {
+    auto type_call = [&](const auto & dictionary_attribute_type)
+    {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
 
@@ -848,7 +849,7 @@ void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field &
         {
             const auto & string = value.get<String>();
             const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
-            setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()}); 
+            setAttributeValueImpl<StringRef>(attribute, StringRef{string_in_arena, string.size()});
         }
         else
         {
diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h
index 7db46ddf6a5..3d9565c47ce 100644
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@@ -70,7 +70,7 @@ public:
         const DataTypes & key_types,
         const ColumnPtr default_untyped) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override; 
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp
index 2b9bf951192..da3f367e1a5 100644
--- a/src/Dictionaries/PolygonDictionary.cpp
+++ b/src/Dictionaries/PolygonDictionary.cpp
@@ -134,7 +134,7 @@ ColumnPtr IPolygonDictionary::getColumn(
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
-                    getItemsImpl<StringRef, StringRef>(
+                    getItemsImpl<String, StringRef>(
                         index,
                         key_columns,
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
@@ -169,8 +169,8 @@ ColumnPtr IPolygonDictionary::getColumn(
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
- 
-            auto& out = column->getData();
+
+            auto & out = column->getData();
 
             if (default_untyped != nullptr)
             {
@@ -212,7 +212,7 @@ ColumnPtr IPolygonDictionary::getColumn(
     };
 
     callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
-   
+
     return result;
 }
 
diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp
index 710a018d85f..97c8fcb257c 100644
--- a/src/Dictionaries/RangeHashedDictionary.cpp
+++ b/src/Dictionaries/RangeHashedDictionary.cpp
@@ -162,8 +162,8 @@ ColumnPtr RangeHashedDictionary::getColumn(
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
- 
-            auto& out = column->getData();
+
+            auto & out = column->getData();
 
             if (default_untyped != nullptr)
             {
@@ -205,7 +205,7 @@ ColumnPtr RangeHashedDictionary::getColumn(
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
-   
+
     return result;
 }
 
@@ -304,7 +304,7 @@ void RangeHashedDictionary::calculateBytesAllocated()
 
     for (const auto & attribute : attributes)
     {
-        auto type_call = [&](const auto & dictionary_attribute_type) 
+        auto type_call = [&](const auto & dictionary_attribute_type)
         {
             using Type = std::decay_t<decltype(dictionary_attribute_type)>;
             using AttributeType = typename Type::AttributeType;
diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp
index e3e4cf6c1a0..dccc1932250 100644
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@@ -1338,8 +1338,8 @@ ColumnPtr SSDCacheDictionary::getColumn(
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
-    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
-    
+    const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+
     const auto index = getAttributeIndex(attribute_name);
 
     /// TODO: Check that attribute type is same as result type
@@ -1393,8 +1393,8 @@ ColumnPtr SSDCacheDictionary::getColumn(
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(identifiers_size);
- 
-            auto& out = column->getData();
+
+            auto & out = column->getData();
 
             if (default_untyped != nullptr)
             {
@@ -1436,7 +1436,7 @@ ColumnPtr SSDCacheDictionary::getColumn(
     };
 
     callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
-   
+
     return result;
 }
 
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
index 05aa5a27ea9..0d19b6f8463 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
@@ -1392,7 +1392,7 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
 
     /// TODO: Check that attribute type is same as result type
     /// TODO: Check if const will work as expected
-    
+
     auto keys_size = key_columns.front()->size();
 
     auto type_call = [&](const auto &dictionary_attribute_type)
@@ -1442,8 +1442,8 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(keys_size);
- 
-            auto& out = column->getData();
+
+            auto & out = column->getData();
 
             if (default_untyped != nullptr)
             {
@@ -1488,7 +1488,7 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
     };
 
     callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
-   
+
     return result;
 }
 
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
index 47c8f54a77d..a12a9789c45 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
@@ -578,7 +578,7 @@ public:
         const DataTypes & key_types,
         const ColumnPtr default_untyped) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override; 
+    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
 
     template <typename T>
     using ResultArrayType = SSDComplexKeyCacheStorage::ResultArrayType<T>;
diff --git a/src/Dictionaries/ya.make b/src/Dictionaries/ya.make
index 19a0f5008b8..f754f250b38 100644
--- a/src/Dictionaries/ya.make
+++ b/src/Dictionaries/ya.make
@@ -17,20 +17,11 @@ NO_COMPILER_WARNINGS()
 
 SRCS(
     CacheDictionary.cpp
-    CacheDictionary_generate1.cpp
-    CacheDictionary_generate2.cpp
-    CacheDictionary_generate3.cpp
     CassandraBlockInputStream.cpp
     CassandraDictionarySource.cpp
     CassandraHelpers.cpp
     ClickHouseDictionarySource.cpp
     ComplexKeyCacheDictionary.cpp
-    ComplexKeyCacheDictionary_createAttributeWithType.cpp
-    ComplexKeyCacheDictionary_generate1.cpp
-    ComplexKeyCacheDictionary_generate2.cpp
-    ComplexKeyCacheDictionary_generate3.cpp
-    ComplexKeyCacheDictionary_setAttributeValue.cpp
-    ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp
     ComplexKeyDirectDictionary.cpp
     ComplexKeyHashedDictionary.cpp
     DictionaryBlockInputStreamBase.cpp
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 5b20a4dfc08..5e63e41d7df 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -54,7 +54,6 @@ namespace ErrorCodes
     extern const int UNSUPPORTED_METHOD;
     extern const int UNKNOWN_TYPE;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int TYPE_MISMATCH;
     extern const int ILLEGAL_COLUMN;
     extern const int BAD_ARGUMENTS;
 }
@@ -275,9 +274,8 @@ private:
 
         /// TODO: Use accurateCast if argument is integer
         if (!WhichDataType(arguments[2].type).isUInt64() && !isTuple(arguments[2].type))
-            throw Exception{
-                "Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName()
-                    + ", must be UInt64 or tuple(...).",
+            throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function "
+                    + getName() + ", must be UInt64 or tuple(...).",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
 
         auto dictionary_identifier_type = dictionary->getIdentifierType();
@@ -298,9 +296,8 @@ private:
             range_col_type = arguments[current_arguments_index].type;
 
             if (!(range_col_type->isValueRepresentedByInteger() && range_col_type->getSizeOfValueInMemory() <= sizeof(Int64)))
-                throw Exception{
-                    "Illegal type " + range_col_type->getName() + " of fourth argument of function " + getName()
-                        + " must be convertible to Int64.",
+                throw Exception{"Illegal type " + range_col_type->getName() + " of fourth argument of function "
+                        + getName() + " must be convertible to Int64.",
                     ErrorCodes::ILLEGAL_COLUMN};
 
             ++current_arguments_index;

From 164e55972bab6b388611305b51223ea520ddeaf9 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 29 Dec 2020 19:29:36 +0300
Subject: [PATCH 0229/1238] Fix style issue

---
 src/Dictionaries/SSDCacheDictionary.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp
index dccc1932250..c5a8d55b7f2 100644
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@@ -1421,7 +1421,7 @@ ColumnPtr SSDCacheDictionary::getColumn(
             }
             else
             {
-                const auto null_value = std::get<AttributeType>(null_values[index]);;
+                const auto null_value = std::get<AttributeType>(null_values[index]);
 
                 getItemsNumberImpl<AttributeType, AttributeType>(
                     index,

From b01027787e2fad6792063f903b9f68cbb56a40f0 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sat, 2 Jan 2021 18:03:28 +0300
Subject: [PATCH 0230/1238] Fixed tests

---
 src/Dictionaries/CacheDictionary.cpp          |  8 ++---
 .../ComplexKeyCacheDictionary.cpp             | 10 +++---
 .../ComplexKeyDirectDictionary.cpp            | 10 +++---
 .../ComplexKeyHashedDictionary.cpp            | 10 +++---
 src/Dictionaries/DictionaryBlockInputStream.h |  3 +-
 src/Dictionaries/DictionaryStructure.cpp      | 30 +++++++++-------
 src/Dictionaries/DirectDictionary.cpp         | 10 +++---
 src/Dictionaries/FlatDictionary.cpp           | 10 +++---
 src/Dictionaries/HashedDictionary.cpp         | 10 +++---
 src/Dictionaries/IPAddressDictionary.cpp      | 10 +++---
 src/Dictionaries/PolygonDictionary.cpp        | 14 ++++----
 .../RangeDictionaryBlockInputStream.h         |  2 +-
 src/Dictionaries/RangeHashedDictionary.cpp    | 34 ++++++++++++-------
 src/Dictionaries/SSDCacheDictionary.cpp       |  8 ++---
 .../SSDComplexKeyCacheDictionary.cpp          | 18 +++++-----
 15 files changed, 98 insertions(+), 89 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index d8dd390576c..521b95243ae 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -279,11 +279,11 @@ ColumnPtr CacheDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
                 {
                     getItemsString(attribute, ids, column_string.get(), [&](const size_t row) { return default_col->getDataAt(row); });
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
@@ -319,7 +319,7 @@ ColumnPtr CacheDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsNumberImpl<AttributeType, AttributeType>(
                         attribute,
@@ -328,7 +328,7 @@ ColumnPtr CacheDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index 4059915666e..336c2a9eeef 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -97,15 +97,15 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
-            auto out = column_string.get();
+            auto * out = column_string.get();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
                 {
                     getItemsString(attribute, key_columns, out, [&](const size_t row) { return default_col->getDataAt(row); });
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
@@ -141,7 +141,7 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsNumberImpl<AttributeType, AttributeType>(
                         attribute,
@@ -150,7 +150,7 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.cpp b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
index ffcdb746bba..3d96f1e410a 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
@@ -58,11 +58,11 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
-            auto out = column_string.get();
+            auto * out = column_string.get();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
                 {
                     getItemsImpl<String, String>(
                         attribute,
@@ -78,7 +78,7 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                             return String(ref.data, ref.size);
                         });
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
@@ -130,7 +130,7 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -139,7 +139,7 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index 1b7ef979d38..8bb1243dd2a 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -60,11 +60,11 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
-            auto out = column_string.get();
+            auto * out = column_string.get();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
                 {
                     getItemsImpl<StringRef, StringRef>(
                         attribute,
@@ -72,7 +72,7 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t row) { return default_col->getDataAt(row); });
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
@@ -116,7 +116,7 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -125,7 +125,7 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/DictionaryBlockInputStream.h b/src/Dictionaries/DictionaryBlockInputStream.h
index 915ab7601ed..e22f9579621 100644
--- a/src/Dictionaries/DictionaryBlockInputStream.h
+++ b/src/Dictionaries/DictionaryBlockInputStream.h
@@ -205,11 +205,10 @@ Block DictionaryBlockInputStream<Key>::fillBlock(
             block_columns.push_back(column);
 
     const DictionaryStructure & structure = dictionary->getStructure();
-    ColumnPtr ids_column;
+    ColumnPtr ids_column = getColumnFromIds(ids_to_fill);
 
     if (structure.id && names.find(structure.id->name) != names.end())
     {
-        ids_column = getColumnFromIds(ids_to_fill);
         block_columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), structure.id->name);
     }
 
diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index 7164d3cfd5d..75e2616ad97 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -84,6 +84,11 @@ AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type)
             return AttributeUnderlyingType::utDecimal128;
     }
 
+    // Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
+    // TODO: This should be fixed by fully supporting arrays in dictionaries.
+    if (type.find("Array") == 0)
+        return AttributeUnderlyingType::utString;
+
     throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE};
 }
 
@@ -322,21 +327,20 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
         bool is_array = false;
         bool is_nullable = false;
 
-        const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(type.get());
-        if (array_type)
-        {
-            is_array = true;
-            type = array_type->getNestedType();
-        }
+        // const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(type.get());
+        // if (array_type)
+        // {
+        //     is_array = true;
+        //     type = array_type->getNestedType();
+        // }
 
-        if (type->isNullable())
-        {
-            is_nullable = true;
-            type = removeNullable(type);
-        }
+        // if (type->isNullable())
+        // {
+        //     is_nullable = true;
+        //     type = removeNullable(type);
+        // }
 
-        /// TODO: Fix for decimal
-        const auto underlying_type = getAttributeUnderlyingType(type->getName());
+        const auto underlying_type = getAttributeUnderlyingType(type_string);
 
         const auto expression = config.getString(prefix + "expression", "");
         if (!expression.empty())
diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index 992fd32e4a6..8995a9f3f3f 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -155,11 +155,11 @@ ColumnPtr DirectDictionary::getColumn(
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
-            auto out = column_string.get();
+            auto * out = column_string.get();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
                 {
                     getItemsImpl<String, String>(
                         attribute,
@@ -175,7 +175,7 @@ ColumnPtr DirectDictionary::getColumn(
                             return String(ref.data, ref.size);
                         });
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
@@ -227,7 +227,7 @@ ColumnPtr DirectDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -236,7 +236,7 @@ ColumnPtr DirectDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 959f852d431..0bbd856fcfe 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -131,11 +131,11 @@ ColumnPtr FlatDictionary::getColumn(
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
-            auto out = column_string.get();
+            auto * out = column_string.get();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
                 {
                     getItemsImpl<StringRef, StringRef>(
                         attribute,
@@ -143,7 +143,7 @@ ColumnPtr FlatDictionary::getColumn(
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t row) { return default_col->getDataAt(row); });
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
@@ -187,7 +187,7 @@ ColumnPtr FlatDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -196,7 +196,7 @@ ColumnPtr FlatDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 4fbc24e3bda..7cb116c7bc9 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -154,11 +154,11 @@ ColumnPtr HashedDictionary::getColumn(
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
-            auto out = column_string.get();
+            auto * out = column_string.get();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
                 {
                     getItemsImpl<StringRef, StringRef>(
                         attribute,
@@ -166,7 +166,7 @@ ColumnPtr HashedDictionary::getColumn(
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t row) { return default_col->getDataAt(row); });
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
@@ -210,7 +210,7 @@ ColumnPtr HashedDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -219,7 +219,7 @@ ColumnPtr HashedDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index 477b379150f..5706c5bc39a 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -293,11 +293,11 @@ ColumnPtr IPAddressDictionary::getColumn(
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
-            auto out = column_string.get();
+            auto * out = column_string.get();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
                 {
                     getItemsImpl<StringRef, StringRef>(
                         attribute,
@@ -305,7 +305,7 @@ ColumnPtr IPAddressDictionary::getColumn(
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t row) { return default_col->getDataAt(row); });
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
@@ -349,7 +349,7 @@ ColumnPtr IPAddressDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -358,7 +358,7 @@ ColumnPtr IPAddressDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp
index da3f367e1a5..3bfc8d997ac 100644
--- a/src/Dictionaries/PolygonDictionary.cpp
+++ b/src/Dictionaries/PolygonDictionary.cpp
@@ -118,11 +118,11 @@ ColumnPtr IPolygonDictionary::getColumn(
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
-            auto out = column_string.get();
+            auto * out = column_string.get();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
                 {
                     getItemsImpl<String, StringRef>(
                         index,
@@ -130,7 +130,7 @@ ColumnPtr IPolygonDictionary::getColumn(
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t row) { return default_col->getDataAt(row); });
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
@@ -174,7 +174,7 @@ ColumnPtr IPolygonDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         index,
@@ -183,7 +183,7 @@ ColumnPtr IPolygonDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
@@ -379,10 +379,8 @@ std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const C
     return result;
 }
 
-ColumnUInt8::Ptr IPolygonDictionary::has(const Columns & key_columns, const DataTypes & key_types) const
+ColumnUInt8::Ptr IPolygonDictionary::has(const Columns & key_columns, const DataTypes &) const
 {
-    dict_struct.validateKeyTypes(key_types);
-
     auto size = key_columns.front()->size();
     auto result = ColumnUInt8::create(size);
     auto& out = result->getData();
diff --git a/src/Dictionaries/RangeDictionaryBlockInputStream.h b/src/Dictionaries/RangeDictionaryBlockInputStream.h
index aeb16f389b4..3da43c85c45 100644
--- a/src/Dictionaries/RangeDictionaryBlockInputStream.h
+++ b/src/Dictionaries/RangeDictionaryBlockInputStream.h
@@ -176,7 +176,7 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock
                 attribute.name,
                 attribute.type,
                 {ids_column, date_column},
-                {std::make_shared<DataTypeUInt64>(), structure.range_max->type},
+                {std::make_shared<DataTypeUInt64>(), std::make_shared<DataTypeInt64>()},
                 nullptr);
             columns.emplace_back(column, attribute.type, attribute.name);
         }
diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp
index 97c8fcb257c..9684eec5cfa 100644
--- a/src/Dictionaries/RangeHashedDictionary.cpp
+++ b/src/Dictionaries/RangeHashedDictionary.cpp
@@ -5,6 +5,7 @@
 #include <ext/range.h>
 #include "DictionaryFactory.h"
 #include "RangeDictionaryBlockInputStream.h"
+#include <Interpreters/castColumn.h>
 
 namespace
 {
@@ -90,7 +91,7 @@ ColumnPtr RangeHashedDictionary::getColumn(
     const std::string & attribute_name,
     const DataTypePtr &,
     const Columns & key_columns,
-    const DataTypes &,
+    const DataTypes & key_types,
     const ColumnPtr default_untyped) const
 {
     /// TODO: Validate input types
@@ -103,6 +104,15 @@ ColumnPtr RangeHashedDictionary::getColumn(
 
     auto size = key_columns.front()->size();
 
+    /// Cast second column to storage type
+    Columns modified_key_columns = key_columns;
+
+    auto range_storage_column = key_columns[1];
+    ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
+
+    auto range_column_storage_type = std::make_shared<DataTypeInt64>();
+    modified_key_columns[1] = castColumnAccurate(column_to_cast, range_column_storage_type);
+
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
@@ -111,25 +121,25 @@ ColumnPtr RangeHashedDictionary::getColumn(
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
-            auto out = column_string.get();
+            auto * out = column_string.get();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
                 {
                     getItemsImpl<StringRef, StringRef>(
                         attribute,
-                        key_columns,
+                        modified_key_columns,
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t row) { return default_col->getDataAt(row); });
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
                     getItemsImpl<StringRef, StringRef>(
                         attribute,
-                        key_columns,
+                        modified_key_columns,
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t) { return def; });
                 }
@@ -140,7 +150,7 @@ ColumnPtr RangeHashedDictionary::getColumn(
 
                 getItemsImpl<StringRef, StringRef>(
                     attribute,
-                    key_columns,
+                    modified_key_columns,
                     [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                     [&](const size_t) { return null_value; });
             }
@@ -167,22 +177,22 @@ ColumnPtr RangeHashedDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
-                        key_columns,
+                        modified_key_columns,
                         [&](const size_t row, const auto value) { return out[row] = value; },
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
-                        key_columns,
+                        modified_key_columns,
                         [&](const size_t row, const auto value) { return out[row] = value; },
                         [&](const size_t) { return def; }
                     );
@@ -194,7 +204,7 @@ ColumnPtr RangeHashedDictionary::getColumn(
 
                 getItemsImpl<AttributeType, AttributeType>(
                     attribute,
-                    key_columns,
+                    modified_key_columns,
                     [&](const size_t row, const auto value) { return out[row] = value; },
                     [&](const size_t) { return null_value; }
                 );
diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp
index c5a8d55b7f2..aa3893daaeb 100644
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@@ -1358,11 +1358,11 @@ ColumnPtr SSDCacheDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
                 {
                     getItemsStringImpl(index, ids, column_string.get(), [&](const size_t row) { return default_col->getDataAt(row); });
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
@@ -1398,7 +1398,7 @@ ColumnPtr SSDCacheDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsNumberImpl<AttributeType, AttributeType>(
                         index,
@@ -1407,7 +1407,7 @@ ColumnPtr SSDCacheDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
index 0d19b6f8463..f878c25fef5 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
@@ -1403,15 +1403,15 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
-            auto out = column_string.get();
+            auto * out = column_string.get();
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
                 {
                     getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t row) { return default_col->getDataAt(row); });
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
@@ -1447,7 +1447,7 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
 
             if (default_untyped != nullptr)
             {
-                if (const auto default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
                 {
                     getItemsNumberImpl<AttributeType, AttributeType>(
                         index,
@@ -1457,7 +1457,7 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
@@ -1638,14 +1638,12 @@ ColumnUInt8::Ptr SSDComplexKeyCacheDictionary::has(const Columns & key_columns,
 {
     dict_struct.validateKeyTypes(key_types);
 
-    PaddedPODArray<Key> backup_storage;
-    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    const auto rows_num = key_columns.front()->size();
 
-    auto result = ColumnUInt8::create(ext::size(ids));
+    auto result = ColumnUInt8::create(rows_num);
     auto& out = result->getData();
 
-    const auto rows = ext::size(ids);
-    for (const auto row : ext::range(0, rows))
+    for (const auto row : ext::range(0, rows_num))
         out[row] = false;
 
     const auto now = std::chrono::system_clock::now();

From 7ded8be0d5c4af97c6a995831f1669be23f4199c Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sun, 3 Jan 2021 01:08:54 +0300
Subject: [PATCH 0231/1238] Added Nullable support for FlatDictionary

---
 src/Dictionaries/DictionaryBlockInputStream.h |   6 +-
 src/Dictionaries/DictionaryStructure.cpp      |  84 +++++------
 src/Dictionaries/FlatDictionary.cpp           | 110 ++++++++-------
 src/Dictionaries/FlatDictionary.h             |   6 +-
 .../FunctionsExternalDictionaries.cpp         |   5 +-
 src/Functions/FunctionsExternalDictionaries.h | 131 ++++++++++++++++--
 6 files changed, 224 insertions(+), 118 deletions(-)

diff --git a/src/Dictionaries/DictionaryBlockInputStream.h b/src/Dictionaries/DictionaryBlockInputStream.h
index e22f9579621..8361c01a75f 100644
--- a/src/Dictionaries/DictionaryBlockInputStream.h
+++ b/src/Dictionaries/DictionaryBlockInputStream.h
@@ -227,15 +227,11 @@ Block DictionaryBlockInputStream<Key>::fillBlock(
                 column = dictionary->getColumn(
                     attribute.name, attribute.type, {ids_column}, {std::make_shared<DataTypeUInt64>()}, nullptr /* default_untyped*/);
             }
-            else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
+            else
             {
                 column = dictionary->getColumn(
                     attribute.name, attribute.type, keys, data_types, nullptr /* default_untyped*/);
             }
-            else
-            {
-                column = nullptr;
-            }
 
             block_columns.emplace_back(column, attribute.type, attribute.name);
         }
diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index 75e2616ad97..3ccfdd49a27 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -42,54 +42,44 @@ namespace
 }
 
 
-AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type)
+AttributeUnderlyingType getAttributeUnderlyingType(const DataTypePtr & type)
 {
-    static const std::unordered_map<std::string, AttributeUnderlyingType> dictionary
+    auto type_index = type->getTypeId();
+
+    switch (type_index)
     {
-        {"UInt8", AttributeUnderlyingType::utUInt8},
-        {"UInt16", AttributeUnderlyingType::utUInt16},
-        {"UInt32", AttributeUnderlyingType::utUInt32},
-        {"UInt64", AttributeUnderlyingType::utUInt64},
-        {"UUID", AttributeUnderlyingType::utUInt128},
-        {"Int8", AttributeUnderlyingType::utInt8},
-        {"Int16", AttributeUnderlyingType::utInt16},
-        {"Int32", AttributeUnderlyingType::utInt32},
-        {"Int64", AttributeUnderlyingType::utInt64},
-        {"Float32", AttributeUnderlyingType::utFloat32},
-        {"Float64", AttributeUnderlyingType::utFloat64},
-        {"String", AttributeUnderlyingType::utString},
-        {"Date", AttributeUnderlyingType::utUInt16},
-    };
+        case TypeIndex::UInt8:          return AttributeUnderlyingType::utUInt8;
+        case TypeIndex::UInt16:         return AttributeUnderlyingType::utUInt16;
+        case TypeIndex::UInt32:         return AttributeUnderlyingType::utUInt32;
+        case TypeIndex::UInt64:         return AttributeUnderlyingType::utUInt64;
+        case TypeIndex::UInt128:        return AttributeUnderlyingType::utUInt128;
 
-    const auto it = dictionary.find(type);
-    if (it != std::end(dictionary))
-        return it->second;
+        case TypeIndex::Int8:           return AttributeUnderlyingType::utInt8;
+        case TypeIndex::Int16:          return AttributeUnderlyingType::utInt16;
+        case TypeIndex::Int32:          return AttributeUnderlyingType::utInt32;
+        case TypeIndex::Int64:          return AttributeUnderlyingType::utInt64;
 
-    /// Can contain arbitrary scale and timezone parameters.
-    if (type.find("DateTime64") == 0)
-        return AttributeUnderlyingType::utUInt64;
+        case TypeIndex::Float32:        return AttributeUnderlyingType::utFloat32;
+        case TypeIndex::Float64:        return AttributeUnderlyingType::utFloat64;
 
-    /// Can contain arbitrary timezone as parameter.
-    if (type.find("DateTime") == 0)
-        return AttributeUnderlyingType::utUInt32;
+        case TypeIndex::Decimal32:      return AttributeUnderlyingType::utDecimal32;
+        case TypeIndex::Decimal64:      return AttributeUnderlyingType::utDecimal64;
+        case TypeIndex::Decimal128:     return AttributeUnderlyingType::utDecimal128;
 
-    if (type.find("Decimal") == 0)
-    {
-        size_t start = strlen("Decimal");
-        if (type.find("32", start) == start)
-            return AttributeUnderlyingType::utDecimal32;
-        if (type.find("64", start) == start)
-            return AttributeUnderlyingType::utDecimal64;
-        if (type.find("128", start) == start)
-            return AttributeUnderlyingType::utDecimal128;
+        case TypeIndex::Date:           return AttributeUnderlyingType::utUInt16;
+        case TypeIndex::DateTime:       return AttributeUnderlyingType::utUInt32;
+        case TypeIndex::DateTime64:     return AttributeUnderlyingType::utUInt64;
+
+        case TypeIndex::String:         return AttributeUnderlyingType::utString;
+
+        // Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
+        // TODO: This should be fixed by fully supporting arrays in dictionaries.
+        case TypeIndex::Array:          return AttributeUnderlyingType::utString;
+
+        default: break;
     }
 
-    // Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
-    // TODO: This should be fixed by fully supporting arrays in dictionaries.
-    if (type.find("Array") == 0)
-        return AttributeUnderlyingType::utString;
-
-    throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE};
+    throw Exception{"Unknown type for dictionary" + type->getName(), ErrorCodes::UNKNOWN_TYPE};
 }
 
 
@@ -334,13 +324,13 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
         //     type = array_type->getNestedType();
         // }
 
-        // if (type->isNullable())
-        // {
-        //     is_nullable = true;
-        //     type = removeNullable(type);
-        // }
+        if (type->isNullable())
+        {
+            is_nullable = true;
+            type = removeNullable(type);
+        }
 
-        const auto underlying_type = getAttributeUnderlyingType(type_string);
+        const auto underlying_type = getAttributeUnderlyingType(type);
 
         const auto expression = config.getString(prefix + "expression", "");
         if (!expression.empty())
@@ -353,7 +343,9 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
             try
             {
                 if (null_value_string.empty())
+                {
                     null_value = type->getDefault();
+                }
                 else
                 {
                     ReadBufferFromString null_value_buffer{null_value_string};
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 0bbd856fcfe..a7346fb0ad7 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -1,6 +1,7 @@
 #include "FlatDictionary.h"
 #include <IO/WriteHelpers.h>
 #include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnNullable.h>
 #include <Functions/FunctionHelpers.h>
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
@@ -116,8 +117,18 @@ ColumnPtr FlatDictionary::getColumn(
     PaddedPODArray<Key> backup_storage;
     const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
 
+    auto size = ids.size();
+
     const auto & attribute = getAttribute(attribute_name);
 
+    ColumnUInt8::MutablePtr col_null_map_to;
+    ColumnUInt8::Container * vec_null_map_to = nullptr;
+    if (attribute.is_nullable)
+    {
+        col_null_map_to = ColumnUInt8::create(size, false);
+        vec_null_map_to = &col_null_map_to->getData();
+    }
+
     /// TODO: Check that attribute type is same as result type
     /// TODO: Check if const will work as expected
 
@@ -126,8 +137,6 @@ ColumnPtr FlatDictionary::getColumn(
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
 
-        auto size = ids.size();
-
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
@@ -226,6 +235,21 @@ ColumnPtr FlatDictionary::getColumn(
 
     callOnDictionaryAttributeType(attribute.type, type_call);
 
+    /// TODO: Fix
+    if (attribute.is_nullable)
+    {
+        for (size_t row = 0; row < ids.size(); ++row)
+        {
+            auto id = ids[row];
+            if (attribute.nullable_set->find(id) != attribute.nullable_set->end())
+            {
+                (*vec_null_map_to)[row] = true;
+            }
+        }
+
+        result = ColumnNullable::create(result, std::move(col_null_map_to));
+    }
+
     return result;
 }
 
@@ -259,7 +283,7 @@ void FlatDictionary::createAttributes()
     for (const auto & attribute : dict_struct.attributes)
     {
         attribute_index_by_name.emplace(attribute.name, attributes.size());
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
+        attributes.push_back(createAttribute(attribute, attribute.null_value));
 
         if (attribute.hierarchical)
         {
@@ -432,18 +456,20 @@ void FlatDictionary::createAttributeImpl<String>(Attribute & attribute, const Fi
 }
 
 
-FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
+FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
 {
-    Attribute attr{type, {}, {}, {}};
+    auto nullable_set = attribute.is_nullable ? std::make_unique<NullableSet>() : nullptr;
+    Attribute attr{attribute.underlying_type, attribute.is_nullable, std::move(nullable_set), {}, {}, {}};
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+
         createAttributeImpl<AttributeType>(attr, null_value);
     };
 
-    callOnDictionaryAttributeType(type, type_call);
+    callOnDictionaryAttributeType(attribute.underlying_type, type_call);
 
     return attr;
 }
@@ -501,55 +527,33 @@ void FlatDictionary::setAttributeValueImpl<String>(Attribute & attribute, const
 
 void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
 {
-    switch (attribute.type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
-            break;
-        case AttributeUnderlyingType::utInt8:
-            setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt16:
-            setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt32:
-            setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt64:
-            setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
-            break;
-        case AttributeUnderlyingType::utString:
-            setAttributeValueImpl<String>(attribute, id, value.get<String>());
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
-            break;
-    }
+        if (attribute.is_nullable)
+        {
+            if (value.isNull())
+            {
+                attribute.nullable_set->insert(id);
+                loaded_ids[id] = true;
+                return;
+            }
+            else
+            {
+                auto find_iter = attribute.nullable_set->find(id);
+                if (find_iter != attribute.nullable_set->end())
+                {
+                    attribute.nullable_set->erase(find_iter);
+                }
+            }
+        }
+
+        setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
 }
 
 
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index 72da13b4cd4..1796b721129 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -86,9 +86,13 @@ private:
     template <typename Value>
     using ContainerType = PaddedPODArray<Value>;
 
+    using NullableSet = std::set<size_t>;
+
     struct Attribute final
     {
         AttributeUnderlyingType type;
+        bool is_nullable;
+        std::unique_ptr<NullableSet> nullable_set;
 
         std::variant<
             UInt8,
@@ -141,7 +145,7 @@ private:
     template <typename T>
     void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
+    Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
     void getItemsImpl(
diff --git a/src/Functions/FunctionsExternalDictionaries.cpp b/src/Functions/FunctionsExternalDictionaries.cpp
index dbdbaf0e22a..1bd900f3cbe 100644
--- a/src/Functions/FunctionsExternalDictionaries.cpp
+++ b/src/Functions/FunctionsExternalDictionaries.cpp
@@ -38,8 +38,9 @@ void registerFunctionsExternalDictionaries(FunctionFactory & factory)
     factory.registerFunction<FunctionDictGetDateTimeOrDefault>();
     factory.registerFunction<FunctionDictGetUUIDOrDefault>();
     factory.registerFunction<FunctionDictGetStringOrDefault>();
-    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::withoutDefault>>();
-    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::withDefault>>();
+    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::get>>();
+    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrDefault>>();
+    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrNull>>();
 }
 
 }
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 5e63e41d7df..0734754289c 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -8,6 +8,7 @@
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypeNullable.h>
 
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
@@ -205,8 +206,9 @@ private:
 
 enum class DictionaryGetFunctionType
 {
-    withoutDefault,
-    withDefault
+    get,
+    getOrDefault,
+    getOrNull
 };
 
 template <typename DataType, typename Name, DictionaryGetFunctionType dictionary_get_function_type>
@@ -242,10 +244,19 @@ private:
 
     DataTypePtr getReturnTypeImpl(const DataTypes &) const override
     {
+        DataTypePtr result;
+        /// TODO: Decimal will not work properly during FunctionDictGetImpl call decimal_scale will not be iniitalized.
         if constexpr (IsDataTypeDecimal<DataType>)
-            return std::make_shared<DataType>(DataType::maxPrecision(), decimal_scale);
+            result = std::make_shared<DataType>(DataType::maxPrecision(), decimal_scale);
         else
-            return std::make_shared<DataType>();
+            result = std::make_shared<DataType>();
+
+        if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::getOrNull)
+        {
+            result = std::make_shared<DataTypeNullable>(result);
+        }
+
+        return result;
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
@@ -305,7 +316,7 @@ private:
 
         ColumnPtr default_col = nullptr;
 
-        if (dictionary_get_function_type == DictionaryGetFunctionType::withDefault)
+        if (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault)
         {
             if (current_arguments_index >= arguments.size())
                 throw Exception{"Wrong argument count for function test " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
@@ -348,7 +359,7 @@ private:
 };
 
 template<typename DataType, typename Name>
-using FunctionDictGet = FunctionDictGetImpl<DataType, Name, DictionaryGetFunctionType::withoutDefault>;
+using FunctionDictGet = FunctionDictGetImpl<DataType, Name, DictionaryGetFunctionType::get>;
 
 struct NameDictGetUInt8 { static constexpr auto name = "dictGetUInt8"; };
 struct NameDictGetUInt16 { static constexpr auto name = "dictGetUInt16"; };
@@ -387,7 +398,7 @@ using FunctionDictGetDecimal128 = FunctionDictGet<DataTypeDecimal<Decimal128>, N
 using FunctionDictGetString = FunctionDictGet<DataTypeString, NameDictGetString>;
 
 template<typename DataType, typename Name>
-using FunctionDictGetOrDefault = FunctionDictGetImpl<DataType, Name, DictionaryGetFunctionType::withDefault>;
+using FunctionDictGetOrDefault = FunctionDictGetImpl<DataType, Name, DictionaryGetFunctionType::getOrDefault>;
 
 struct NameDictGetUInt8OrDefault { static constexpr auto name = "dictGetUInt8OrDefault"; };
 struct NameDictGetUInt16OrDefault { static constexpr auto name = "dictGetUInt16OrDefault"; };
@@ -425,6 +436,44 @@ using FunctionDictGetDecimal64OrDefault = FunctionDictGetOrDefault<DataTypeDecim
 using FunctionDictGetDecimal128OrDefault = FunctionDictGetOrDefault<DataTypeDecimal<Decimal128>, NameDictGetDecimal128OrDefault>;
 using FunctionDictGetStringOrDefault = FunctionDictGetOrDefault<DataTypeString, NameDictGetStringOrDefault>;
 
+template<typename DataType, typename Name>
+using FunctionDictGetOrNull = FunctionDictGetImpl<DataType, Name, DictionaryGetFunctionType::getOrNull>;
+
+struct NameDictGetUInt8OrNull { static constexpr auto name = "dictGetUInt8OrNull"; };
+struct NameDictGetUInt16OrNull { static constexpr auto name = "dictGetUInt16OrNull"; };
+struct NameDictGetUInt32OrNull { static constexpr auto name = "dictGetUInt32OrNull"; };
+struct NameDictGetUInt64OrNull { static constexpr auto name = "dictGetUInt64OrNull"; };
+struct NameDictGetInt8OrNull { static constexpr auto name = "dictGetInt8OrNull"; };
+struct NameDictGetInt16OrNull { static constexpr auto name = "dictGetInt16OrNull"; };
+struct NameDictGetInt32OrNull { static constexpr auto name = "dictGetInt32OrNull"; };
+struct NameDictGetInt64OrNull { static constexpr auto name = "dictGetInt64OrNull"; };
+struct NameDictGetFloat32OrNull { static constexpr auto name = "dictGetFloat32OrNull"; };
+struct NameDictGetFloat64OrNull { static constexpr auto name = "dictGetFloat64OrNull"; };
+struct NameDictGetDateOrNull { static constexpr auto name = "dictGetDateOrNull"; };
+struct NameDictGetDateTimeOrNull { static constexpr auto name = "dictGetDateTimeOrNull"; };
+struct NameDictGetUUIDOrNull { static constexpr auto name = "dictGetUUIDOrNull"; };
+struct NameDictGetDecimal32OrNull { static constexpr auto name = "dictGetDecimal32OrNull"; };
+struct NameDictGetDecimal64OrNull { static constexpr auto name = "dictGetDecimal64OrNull"; };
+struct NameDictGetDecimal128OrNull { static constexpr auto name = "dictGetDecimal128OrNull"; };
+struct NameDictGetStringOrNull { static constexpr auto name = "dictGetStringOrNull"; };
+
+using FunctionDictGetUInt8OrNull = FunctionDictGetOrNull<DataTypeUInt8, NameDictGetUInt8OrNull>;
+using FunctionDictGetUInt16OrNull = FunctionDictGetOrNull<DataTypeUInt16, NameDictGetUInt16OrNull>;
+using FunctionDictGetUInt32OrNull = FunctionDictGetOrNull<DataTypeUInt32, NameDictGetUInt32OrNull>;
+using FunctionDictGetUInt64OrNull = FunctionDictGetOrNull<DataTypeUInt64, NameDictGetUInt64OrNull>;
+using FunctionDictGetInt8OrNull = FunctionDictGetOrNull<DataTypeInt8, NameDictGetInt8OrNull>;
+using FunctionDictGetInt16OrNull = FunctionDictGetOrNull<DataTypeInt16, NameDictGetInt16OrNull>;
+using FunctionDictGetInt32OrNull = FunctionDictGetOrNull<DataTypeInt32, NameDictGetInt32OrNull>;
+using FunctionDictGetInt64OrNull = FunctionDictGetOrNull<DataTypeInt64, NameDictGetInt64OrNull>;
+using FunctionDictGetFloat32OrNull = FunctionDictGetOrNull<DataTypeFloat32, NameDictGetFloat32OrNull>;
+using FunctionDictGetFloat64OrNull = FunctionDictGetOrNull<DataTypeFloat64, NameDictGetFloat64OrNull>;
+using FunctionDictGetDateOrNull = FunctionDictGetOrNull<DataTypeDate, NameDictGetDateOrNull>;
+using FunctionDictGetDateTimeOrNull = FunctionDictGetOrNull<DataTypeDateTime, NameDictGetDateTimeOrNull>;
+using FunctionDictGetUUIDOrNull = FunctionDictGetOrNull<DataTypeUUID, NameDictGetUUIDOrNull>;
+using FunctionDictGetDecimal32OrNull = FunctionDictGetOrNull<DataTypeDecimal<Decimal32>, NameDictGetDecimal32OrNull>;
+using FunctionDictGetDecimal64OrNull = FunctionDictGetOrNull<DataTypeDecimal<Decimal64>, NameDictGetDecimal64OrNull>;
+using FunctionDictGetDecimal128OrNull = FunctionDictGetOrNull<DataTypeDecimal<Decimal128>, NameDictGetDecimal128OrNull>;
+using FunctionDictGetStringOrNull = FunctionDictGetOrNull<DataTypeString, NameDictGetStringOrNull>;
 
 /// TODO: Use new API
 /// This variant of function derives the result type automatically.
@@ -432,7 +481,8 @@ template <DictionaryGetFunctionType dictionary_get_function_type>
 class FunctionDictGetNoType final : public IFunction
 {
 public:
-    static constexpr auto name = dictionary_get_function_type == DictionaryGetFunctionType::withDefault ? "dictGetOrDefault" : "dictGet";
+    static constexpr auto name = dictionary_get_function_type == DictionaryGetFunctionType::get ? "dictGet"
+        : (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault ? "dictGetOrDefault" : "dictGetOrNull");
 
     static FunctionPtr create(const Context & context)
     {
@@ -490,9 +540,9 @@ private:
                 continue;
             }
 
-            WhichDataType dt = attribute.type;
+            WhichDataType dt = removeNullable(attribute.type);
 
-            if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault)
+            if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::get)
             {
                 switch (dt.idx)
                 {
@@ -552,7 +602,7 @@ private:
                         throw Exception("Unknown dictGet type", ErrorCodes::UNKNOWN_TYPE);
                 }
             }
-            else
+            else if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault)
             {
                 switch (dt.idx)
                 {
@@ -611,6 +661,65 @@ private:
                         throw Exception("Unknown dictGetOrDefault type", ErrorCodes::UNKNOWN_TYPE);
                 }
             }
+            else if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::getOrNull)
+            {
+                switch (dt.idx)
+                {
+                    case TypeIndex::String:
+                        impl = FunctionDictGetStringOrNull::create(context);
+                        break;
+                    case TypeIndex::UInt8:
+                        impl = FunctionDictGetUInt8OrNull::create(context);
+                        break;
+                    case TypeIndex::UInt16:
+                        impl = FunctionDictGetUInt16OrNull::create(context);
+                        break;
+                    case TypeIndex::UInt32:
+                        impl = FunctionDictGetUInt32OrNull::create(context);
+                        break;
+                    case TypeIndex::UInt64:
+                        impl = FunctionDictGetUInt64OrNull::create(context);
+                        break;
+                    case TypeIndex::Int8:
+                        impl = FunctionDictGetInt8OrNull::create(context);
+                        break;
+                    case TypeIndex::Int16:
+                        impl = FunctionDictGetInt16OrNull::create(context);
+                        break;
+                    case TypeIndex::Int32:
+                        impl = FunctionDictGetInt32OrNull::create(context);
+                        break;
+                    case TypeIndex::Int64:
+                        impl = FunctionDictGetInt64OrNull::create(context);
+                        break;
+                    case TypeIndex::Float32:
+                        impl = FunctionDictGetFloat32OrNull::create(context);
+                        break;
+                    case TypeIndex::Float64:
+                        impl = FunctionDictGetFloat64OrNull::create(context);
+                        break;
+                    case TypeIndex::Date:
+                        impl = FunctionDictGetDateOrNull::create(context);
+                        break;
+                    case TypeIndex::DateTime:
+                        impl = FunctionDictGetDateTimeOrNull::create(context);
+                        break;
+                    case TypeIndex::UUID:
+                        impl = FunctionDictGetUUIDOrNull::create(context);
+                        break;
+                    case TypeIndex::Decimal32:
+                        impl = FunctionDictGetDecimal32OrNull::create(context, getDecimalScale(*attribute.type));
+                        break;
+                    case TypeIndex::Decimal64:
+                        impl = FunctionDictGetDecimal64OrNull::create(context, getDecimalScale(*attribute.type));
+                        break;
+                    case TypeIndex::Decimal128:
+                        impl = FunctionDictGetDecimal128OrNull::create(context, getDecimalScale(*attribute.type));
+                        break;
+                    default:
+                        throw Exception("Unknown dictGetOrNull type", ErrorCodes::UNKNOWN_TYPE);
+                }
+            }
 
             return attribute.type;
         }

From 3e2d615e6245f735fcd19bed6f5e32baa22982fa Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sun, 3 Jan 2021 13:07:21 +0300
Subject: [PATCH 0232/1238] Added Nullable support for HashedDictionary

---
 src/Dictionaries/DictionaryStructure.cpp |   2 +
 src/Dictionaries/FlatDictionary.cpp      |   9 +-
 src/Dictionaries/FlatDictionary.h        |   3 +-
 src/Dictionaries/HashedDictionary.cpp    | 118 ++++++++++++++---------
 src/Dictionaries/HashedDictionary.h      |   8 +-
 5 files changed, 85 insertions(+), 55 deletions(-)

diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index 3ccfdd49a27..df785bcb550 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -70,6 +70,8 @@ AttributeUnderlyingType getAttributeUnderlyingType(const DataTypePtr & type)
         case TypeIndex::DateTime:       return AttributeUnderlyingType::utUInt32;
         case TypeIndex::DateTime64:     return AttributeUnderlyingType::utUInt64;
 
+        case TypeIndex::UUID:           return AttributeUnderlyingType::utUInt128;
+
         case TypeIndex::String:         return AttributeUnderlyingType::utString;
 
         // Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index a7346fb0ad7..9cbbbbd4f83 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -235,13 +235,12 @@ ColumnPtr FlatDictionary::getColumn(
 
     callOnDictionaryAttributeType(attribute.type, type_call);
 
-    /// TODO: Fix
     if (attribute.is_nullable)
     {
         for (size_t row = 0; row < ids.size(); ++row)
         {
             auto id = ids[row];
-            if (attribute.nullable_set->find(id) != attribute.nullable_set->end())
+            if (attribute.nullable_set->find(id) != nullptr)
             {
                 (*vec_null_map_to)[row] = true;
             }
@@ -542,11 +541,7 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, cons
             }
             else
             {
-                auto find_iter = attribute.nullable_set->find(id);
-                if (find_iter != attribute.nullable_set->end())
-                {
-                    attribute.nullable_set->erase(find_iter);
-                }
+                attribute.nullable_set->erase(id);
             }
         }
 
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index 1796b721129..8de90ca0c15 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -4,6 +4,7 @@
 #include <variant>
 #include <vector>
 #include <optional>
+#include <Common/HashTable/HashSet.h>
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnArray.h>
@@ -86,7 +87,7 @@ private:
     template <typename Value>
     using ContainerType = PaddedPODArray<Value>;
 
-    using NullableSet = std::set<size_t>;
+    using NullableSet = HashSet<Key, DefaultHash<Key>>;
 
     struct Attribute final
     {
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 7cb116c7bc9..133d9c0db99 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -6,6 +6,7 @@
 #include <Core/Defines.h>
 #include <Functions/FunctionHelpers.h>
 #include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnNullable.h>
 
 namespace
 {
@@ -139,8 +140,18 @@ ColumnPtr HashedDictionary::getColumn(
     PaddedPODArray<Key> backup_storage;
     const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
 
+    auto size = ids.size();
+
     const auto & attribute = getAttribute(attribute_name);
 
+    ColumnUInt8::MutablePtr col_null_map_to;
+    ColumnUInt8::Container * vec_null_map_to = nullptr;
+    if (attribute.is_nullable)
+    {
+        col_null_map_to = ColumnUInt8::create(size, false);
+        vec_null_map_to = &col_null_map_to->getData();
+    }
+
     /// TODO: Check that attribute type is same as result type
     /// TODO: Check if const will work as expected
 
@@ -149,8 +160,6 @@ ColumnPtr HashedDictionary::getColumn(
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
 
-        auto size = ids.size();
-
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
@@ -249,6 +258,20 @@ ColumnPtr HashedDictionary::getColumn(
 
     callOnDictionaryAttributeType(attribute.type, type_call);
 
+    if (attribute.is_nullable)
+    {
+        for (size_t row = 0; row < ids.size(); ++row)
+        {
+            auto id = ids[row];
+            if (attribute.nullable_set->find(id) != nullptr)
+            {
+                (*vec_null_map_to)[row] = true;
+            }
+        }
+
+        result = ColumnNullable::create(result, std::move(col_null_map_to));
+    }
+
     return result;
 }
 
@@ -286,7 +309,7 @@ void HashedDictionary::createAttributes()
     for (const auto & attribute : dict_struct.attributes)
     {
         attribute_index_by_name.emplace(attribute.name, attributes.size());
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
+        attributes.push_back(createAttribute(attribute, attribute.null_value));
 
         if (attribute.hierarchical)
         {
@@ -549,9 +572,10 @@ void HashedDictionary::createAttributeImpl<String>(Attribute & attribute, const
         attribute.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
 }
 
-HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
+HashedDictionary::Attribute HashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
 {
-    Attribute attr{type, {}, {}, {}, {}};
+    auto nullable_set = attribute.is_nullable ? std::make_unique<NullableSet>() : nullptr;
+    Attribute attr{attribute.underlying_type, attribute.is_nullable, std::move(nullable_set), {}, {}, {}, {}};
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
@@ -560,7 +584,7 @@ HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const Attr
         createAttributeImpl<AttributeType>(attr, null_value);
     };
 
-    callOnDictionaryAttributeType(type, type_call);
+    callOnDictionaryAttributeType(attribute.underlying_type, type_call);
 
     return attr;
 }
@@ -605,58 +629,51 @@ bool HashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id
     }
 }
 
+template <>
+bool HashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String value)
+{
+    const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
+    if (!sparse)
+    {
+        auto & map = *std::get<CollectionPtrType<StringRef>>(attribute.maps);
+        return map.insert({id, StringRef{string_in_arena, value.size()}}).second;
+    }
+    else
+    {
+        auto & map = *std::get<SparseCollectionPtrType<StringRef>>(attribute.sparse_maps);
+        return map.insert({id, StringRef{string_in_arena, value.size()}}).second;
+    }
+}
+
 bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
 {
-    switch (attribute.type)
+    bool result = false;
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            return setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt16:
-            return setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt32:
-            return setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt64:
-            return setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt128:
-            return setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
-        case AttributeUnderlyingType::utInt8:
-            return setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
-        case AttributeUnderlyingType::utInt16:
-            return setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
-        case AttributeUnderlyingType::utInt32:
-            return setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
-        case AttributeUnderlyingType::utInt64:
-            return setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
-        case AttributeUnderlyingType::utFloat32:
-            return setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
-        case AttributeUnderlyingType::utFloat64:
-            return setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            return setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
-        case AttributeUnderlyingType::utDecimal64:
-            return setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
-        case AttributeUnderlyingType::utDecimal128:
-            return setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
-
-        case AttributeUnderlyingType::utString:
+        if (attribute.is_nullable)
         {
-            const auto & string = value.get<String>();
-            const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
-            if (!sparse)
+            if (value.isNull())
             {
-                auto & map = *std::get<CollectionPtrType<StringRef>>(attribute.maps);
-                return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
+                attribute.nullable_set->insert(id);
+                result = true;
+                return;
             }
             else
             {
-                auto & map = *std::get<SparseCollectionPtrType<StringRef>>(attribute.sparse_maps);
-                return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
+                attribute.nullable_set->erase(id);
             }
         }
-    }
 
-    throw Exception{"Invalid attribute type", ErrorCodes::BAD_ARGUMENTS};
+        result = setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    return result;
 }
 
 const HashedDictionary::Attribute & HashedDictionary::getAttribute(const std::string & attribute_name) const
@@ -717,7 +734,16 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        /// TODO: Check if order is satisfied
         result = getIds<AttributeType>(attribute);
+
+        if (attribute.is_nullable)
+        {
+            for (const auto& value: *attribute.nullable_set)
+            {
+                result.push_back(value.getKey());
+            }
+        }
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index ff64fb29f1f..0f718c8132b 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -7,6 +7,7 @@
 #include <Columns/ColumnString.h>
 #include <Core/Block.h>
 #include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashSet.h>
 #include <sparsehash/sparse_hash_map>
 #include <ext/range.h>
 #include "DictionaryStructure.h"
@@ -101,9 +102,14 @@ private:
     template <typename Value>
     using SparseCollectionPtrType = std::unique_ptr<SparseCollectionType<Value>>;
 
+    using NullableSet = HashSet<Key, DefaultHash<Key>>;
+
     struct Attribute final
     {
         AttributeUnderlyingType type;
+        bool is_nullable;
+        std::unique_ptr<NullableSet> nullable_set;
+
         std::variant<
             UInt8,
             UInt16,
@@ -174,7 +180,7 @@ private:
     template <typename T>
     void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
+    Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
     template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
     void getItemsAttrImpl(

From b745c6445982195e623657072e31a186b71614de Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sun, 3 Jan 2021 23:05:14 +0300
Subject: [PATCH 0233/1238] Added Nullable support for DirectDictionary

---
 src/Dictionaries/CacheDictionary.cpp          |   4 +
 .../ComplexKeyCacheDictionary.cpp             |   4 +
 .../ComplexKeyDirectDictionary.cpp            |   4 +
 .../ComplexKeyHashedDictionary.cpp            |   4 +
 src/Dictionaries/DirectDictionary.cpp         | 137 +++++++++++++-----
 src/Dictionaries/DirectDictionary.h           |   3 +-
 src/Dictionaries/FlatDictionary.cpp           |  10 +-
 src/Dictionaries/HashedDictionary.cpp         |   9 ++
 src/Dictionaries/IPAddressDictionary.cpp      |   4 +
 src/Dictionaries/PolygonDictionary.cpp        |   4 +
 src/Dictionaries/RangeHashedDictionary.cpp    |   4 +
 src/Dictionaries/SSDCacheDictionary.cpp       |   4 +
 .../SSDComplexKeyCacheDictionary.cpp          |   4 +
 src/Formats/MySQLBlockInputStream.cpp         |  18 ++-
 14 files changed, 170 insertions(+), 43 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index 521b95243ae..71490897131 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -289,6 +289,8 @@ ColumnPtr CacheDictionary::getColumn(
 
                     getItemsString(attribute, ids, column_string.get(), [&](const size_t) { return StringRef{def}; });
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -339,6 +341,8 @@ ColumnPtr CacheDictionary::getColumn(
                         [&](const size_t) { return def; }
                     );
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index 336c2a9eeef..60d2878f694 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -111,6 +111,8 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
 
                     getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; });
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -161,6 +163,8 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
                         [&](const size_t) { return def; }
                     );
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.cpp b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
index 3d96f1e410a..c476348ccd5 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
@@ -92,6 +92,8 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                         },
                         [&](const size_t) { return def; });
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -150,6 +152,8 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                         [&](const size_t) { return def; }
                     );
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index 8bb1243dd2a..6aee03668ab 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -82,6 +82,8 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t) { return def; });
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -136,6 +138,8 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
                         [&](const size_t) { return def; }
                     );
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index 8995a9f3f3f..ffd1dd2d01b 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -4,6 +4,7 @@
 #include "DictionaryFactory.h"
 #include <Core/Defines.h>
 #include <Functions/FunctionHelpers.h>
+#include <Columns/ColumnNullable.h>
 
 namespace DB
 {
@@ -38,7 +39,7 @@ void DirectDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<
     getItemsImpl<UInt64, UInt64>(
         *hierarchical_attribute,
         ids,
-        [&](const size_t row, const UInt64 value) { out[row] = value; },
+        [&](const size_t row, const UInt64 value, bool) { out[row] = value; },
         [&](const size_t) { return null_value; });
 }
 
@@ -142,6 +143,16 @@ ColumnPtr DirectDictionary::getColumn(
 
     const auto & attribute = getAttribute(attribute_name);
 
+    auto size = ids.size();
+
+    ColumnUInt8::MutablePtr col_null_map_to;
+    ColumnUInt8::Container * vec_null_map_to = nullptr;
+    if (attribute.is_nullable)
+    {
+        col_null_map_to = ColumnUInt8::create(size, false);
+        vec_null_map_to = &col_null_map_to->getData();
+    }
+
     /// TODO: Check that attribute type is same as result type
     /// TODO: Check if const will work as expected
 
@@ -150,8 +161,6 @@ ColumnPtr DirectDictionary::getColumn(
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
 
-        auto size = ids.size();
-
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
@@ -164,8 +173,13 @@ ColumnPtr DirectDictionary::getColumn(
                     getItemsImpl<String, String>(
                         attribute,
                         ids,
-                        [&](const size_t, const String value)
+                        [&](const size_t row, const String value, bool is_null)
                         {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+ 
                             const auto ref = StringRef{value};
                             out->insertData(ref.data, ref.size);
                         },
@@ -182,13 +196,21 @@ ColumnPtr DirectDictionary::getColumn(
                     getItemsImpl<String, String>(
                         attribute,
                         ids,
-                        [&](const size_t, const String value)
+                        [&](const size_t row, const String value, bool is_null)
                         {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+ 
                             const auto ref = StringRef{value};
                             out->insertData(ref.data, ref.size);
                         },
                         [&](const size_t) { return def; });
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
+ 
             }
             else
             {
@@ -197,8 +219,13 @@ ColumnPtr DirectDictionary::getColumn(
                 getItemsImpl<String, String>(
                     attribute,
                     ids,
-                    [&](const size_t, const String value)
+                    [&](const size_t row, const String value, bool is_null)
                     {
+                        if (attribute.is_nullable)
+                        {
+                            (*vec_null_map_to)[row] = is_null;
+                        }
+
                         const auto ref = StringRef{value};
                         out->insertData(ref.data, ref.size);
                     },
@@ -232,7 +259,15 @@ ColumnPtr DirectDictionary::getColumn(
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
                         ids,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row, const auto value, bool is_null)
+                        {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+ 
+                            out[row] = value; 
+                        },
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
@@ -243,10 +278,20 @@ ColumnPtr DirectDictionary::getColumn(
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
                         ids,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row, const auto value, bool is_null)
+                        {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+ 
+                            out[row] = value; 
+                        },
                         [&](const size_t) { return def; }
                     );
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -255,7 +300,15 @@ ColumnPtr DirectDictionary::getColumn(
                 getItemsImpl<AttributeType, AttributeType>(
                     attribute,
                     ids,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t row, const auto value, bool is_null)
+                    {
+                        if (attribute.is_nullable)
+                        {
+                            (*vec_null_map_to)[row] = is_null;
+                        }
+ 
+                        out[row] = value; 
+                    },
                     [&](const size_t) { return null_value; }
                 );
             }
@@ -266,6 +319,11 @@ ColumnPtr DirectDictionary::getColumn(
 
     callOnDictionaryAttributeType(attribute.type, type_call);
 
+    if (attribute.is_nullable)
+    {
+        result = ColumnNullable::create(result, std::move(col_null_map_to));
+    }
+
     return result;
 }
 
@@ -321,7 +379,7 @@ void DirectDictionary::createAttributes()
     {
         attribute_index_by_name.emplace(attribute.name, attributes.size());
         attribute_name_by_index.emplace(attributes.size(), attribute.name);
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
+        attributes.push_back(createAttribute(attribute, attribute.null_value, attribute.name));
 
         if (attribute.hierarchical)
         {
@@ -350,9 +408,9 @@ void DirectDictionary::createAttributeImpl<String>(Attribute & attribute, const
 }
 
 
-DirectDictionary::Attribute DirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
+DirectDictionary::Attribute DirectDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & attr_name)
 {
-    Attribute attr{type, {}, {}, attr_name};
+    Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, attr_name};
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
@@ -361,7 +419,7 @@ DirectDictionary::Attribute DirectDictionary::createAttributeWithType(const Attr
         createAttributeImpl<AttributeType>(attr, null_value);
     };
 
-    callOnDictionaryAttributeType(type, type_call);
+    callOnDictionaryAttributeType(attribute.underlying_type, type_call);
 
     return attr;
 }
@@ -374,8 +432,14 @@ void DirectDictionary::getItemsImpl(
     const auto rows = ext::size(ids);
 
     HashMap<Key, OutputType> value_by_key;
+    HashMap<Key, bool> value_is_null;
+
     for (const auto row : ext::range(0, rows))
-        value_by_key[ids[row]] = get_default(row);
+    {
+        auto key = ids[row];
+        value_by_key[key] = get_default(row);
+        value_is_null[key] = false;
+    }
 
     std::vector<Key> to_load;
     to_load.reserve(value_by_key.size());
@@ -385,35 +449,30 @@ void DirectDictionary::getItemsImpl(
     auto stream = source_ptr->loadIds(to_load);
     stream->readPrefix();
 
+    const auto it = attribute_index_by_name.find(attribute.name);
+    if (it == std::end(attribute_index_by_name))
+        throw Exception{full_name + ": no such attribute '" + attribute.name + "'", ErrorCodes::BAD_ARGUMENTS};
+
+    auto attribute_index = it->second;
+
     while (const auto block = stream->read())
     {
         const IColumn & id_column = *block.safeGetByPosition(0).column;
 
-        for (const size_t attribute_idx : ext::range(0, attributes.size()))
+        const IColumn & attribute_column = *block.safeGetByPosition(attribute_index + 1).column;
+
+        for (const auto row_idx : ext::range(0, id_column.size()))
         {
-            if (attribute.name != attribute_name_by_index.at(attribute_idx))
+            const auto key = id_column[row_idx].get<UInt64>();
+
+            if (value_by_key.find(key) != value_by_key.end())
             {
-                continue;
-            }
+                auto value = attribute_column[row_idx];
 
-            const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
-
-            for (const auto row_idx : ext::range(0, id_column.size()))
-            {
-                const auto key = id_column[row_idx].get<UInt64>();
-
-                if (value_by_key.find(key) != value_by_key.end())
-                {
-                    if (attribute.type == AttributeUnderlyingType::utFloat32)
-                    {
-                        value_by_key[key] = static_cast<Float32>(attribute_column[row_idx].get<Float64>());
-                    }
-                    else
-                    {
-                        value_by_key[key] = static_cast<OutputType>(attribute_column[row_idx].get<AttributeType>());
-                    }
-
-                }
+                if (value.isNull())
+                    value_is_null[key] = true;
+                else
+                    value_by_key[key] = static_cast<OutputType>(value.get<NearestFieldType<AttributeType>>());
             }
         }
     }
@@ -421,7 +480,11 @@ void DirectDictionary::getItemsImpl(
     stream->readSuffix();
 
     for (const auto row : ext::range(0, rows))
-        set_value(row, value_by_key[ids[row]]);
+    {
+        auto key = ids[row];
+        std::cerr << "DirectDictionary set_value " << row << std::endl;
+        set_value(row, value_by_key[key], value_is_null[key]);
+    }
 
     query_count.fetch_add(rows, std::memory_order_relaxed);
 }
diff --git a/src/Dictionaries/DirectDictionary.h b/src/Dictionaries/DirectDictionary.h
index 48250315657..dcfea296778 100644
--- a/src/Dictionaries/DirectDictionary.h
+++ b/src/Dictionaries/DirectDictionary.h
@@ -82,6 +82,7 @@ private:
     struct Attribute final
     {
         AttributeUnderlyingType type;
+        bool is_nullable;
         std::variant<
             UInt8,
             UInt16,
@@ -111,7 +112,7 @@ private:
     template <typename T>
     void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
+    Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & name);
 
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
     void getItemsImpl(
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 9cbbbbd4f83..7278808f773 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -162,6 +162,8 @@ ColumnPtr FlatDictionary::getColumn(
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t) { return def; });
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -201,7 +203,7 @@ ColumnPtr FlatDictionary::getColumn(
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
                         ids,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row, const auto value) { out[row] = value; },
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
@@ -212,10 +214,12 @@ ColumnPtr FlatDictionary::getColumn(
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
                         ids,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row, const auto value) { out[row] = value; },
                         [&](const size_t) { return def; }
                     );
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -224,7 +228,7 @@ ColumnPtr FlatDictionary::getColumn(
                 getItemsImpl<AttributeType, AttributeType>(
                     attribute,
                     ids,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t row, const auto value) { out[row] = value; },
                     [&](const size_t) { return null_value; }
                 );
             }
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 133d9c0db99..571d70eb303 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -185,6 +185,8 @@ ColumnPtr HashedDictionary::getColumn(
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t) { return def; });
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -239,6 +241,8 @@ ColumnPtr HashedDictionary::getColumn(
                         [&](const size_t) { return def; }
                     );
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -692,7 +696,12 @@ void HashedDictionary::has(const Attribute & attribute, const PaddedPODArray<Key
     const auto rows = ext::size(ids);
 
     for (const auto i : ext::range(0, rows))
+    {
         out[i] = attr.find(ids[i]) != nullptr;
+
+        if (attribute.is_nullable && !out[i])
+            out[i] = attribute.nullable_set->find(ids[i]) != nullptr;
+    }
 }
 
 template <>
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index 5706c5bc39a..f611725f740 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -315,6 +315,8 @@ ColumnPtr IPAddressDictionary::getColumn(
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t) { return StringRef { def }; });
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -369,6 +371,8 @@ ColumnPtr IPAddressDictionary::getColumn(
                         [&](const size_t) { return def; }
                     );
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp
index 3bfc8d997ac..3b8d3ad3b5a 100644
--- a/src/Dictionaries/PolygonDictionary.cpp
+++ b/src/Dictionaries/PolygonDictionary.cpp
@@ -140,6 +140,8 @@ ColumnPtr IPolygonDictionary::getColumn(
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t) { return def; });
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -194,6 +196,8 @@ ColumnPtr IPolygonDictionary::getColumn(
                         [&](const size_t) { return def; }
                     );
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp
index 9684eec5cfa..1bee24f6bcb 100644
--- a/src/Dictionaries/RangeHashedDictionary.cpp
+++ b/src/Dictionaries/RangeHashedDictionary.cpp
@@ -143,6 +143,8 @@ ColumnPtr RangeHashedDictionary::getColumn(
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t) { return def; });
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -197,6 +199,8 @@ ColumnPtr RangeHashedDictionary::getColumn(
                         [&](const size_t) { return def; }
                     );
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp
index aa3893daaeb..4cac3ffb94d 100644
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@@ -1368,6 +1368,8 @@ ColumnPtr SSDCacheDictionary::getColumn(
 
                     getItemsStringImpl(index, ids, column_string.get(), [&](const size_t) { return StringRef{def}; });
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -1418,6 +1420,8 @@ ColumnPtr SSDCacheDictionary::getColumn(
                         [&](const size_t) { return def; }
                     );
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
index f878c25fef5..17c9bd137af 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
@@ -1417,6 +1417,8 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
 
                     getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return StringRef{def}; });
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
@@ -1469,6 +1471,8 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
                         [&](const size_t) { return def; }
                     );
                 }
+                else
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
             }
             else
             {
diff --git a/src/Formats/MySQLBlockInputStream.cpp b/src/Formats/MySQLBlockInputStream.cpp
index 5a1af418b7d..9b4e37df353 100644
--- a/src/Formats/MySQLBlockInputStream.cpp
+++ b/src/Formats/MySQLBlockInputStream.cpp
@@ -70,6 +70,7 @@ namespace
                 assert_cast<ColumnUInt32 &>(column).insertValue(value.getUInt());
                 break;
             case ValueType::vtUInt64:
+                std::cerr << "Insert uint64 " << value.getUInt() << std::endl;
                 assert_cast<ColumnUInt64 &>(column).insertValue(value.getUInt());
                 break;
             case ValueType::vtInt8:
@@ -91,6 +92,7 @@ namespace
                 assert_cast<ColumnFloat64 &>(column).insertValue(value.getDouble());
                 break;
             case ValueType::vtString:
+                std::cerr << "Insert string " << std::string(value.data(), value.size()) << std::endl;
                 assert_cast<ColumnString &>(column).insertData(value.data(), value.size());
                 break;
             case ValueType::vtDate:
@@ -146,20 +148,32 @@ Block MySQLBlockInputStream::readImpl()
             const auto value = row[position_mapping[index]];
             const auto & sample = description.sample_block.getByPosition(index);
 
+            bool is_type_nullable = description.types[index].second;
+
             if (!value.isNull())
             {
-                if (description.types[index].second)
+                if (is_type_nullable)
                 {
                     ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
                     const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
                     insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value);
-                    column_nullable.getNullMapData().emplace_back(0);
+                    column_nullable.getNullMapData().emplace_back(false);
                 }
                 else
+                {
                     insertValue(*sample.type, *columns[index], description.types[index].first, value);
+                }
             }
             else
+            {
                 insertDefaultValue(*columns[index], *sample.column);
+
+                if (is_type_nullable)
+                {
+                    ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
+                    column_nullable.getNullMapData().back() = true;
+                }
+            }
         }
 
         ++num_rows;

From 6e51b2f60a4cdf44f264975c39ef76de0bec6186 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 5 Jan 2021 00:07:45 +0300
Subject: [PATCH 0234/1238] Added Nullable support for RangeHashedDictionary

---
 src/Dictionaries/RangeHashedDictionary.cpp | 270 ++++++++++-----------
 src/Dictionaries/RangeHashedDictionary.h   |  13 +-
 2 files changed, 140 insertions(+), 143 deletions(-)

diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp
index 1bee24f6bcb..d1ce99b6c3a 100644
--- a/src/Dictionaries/RangeHashedDictionary.cpp
+++ b/src/Dictionaries/RangeHashedDictionary.cpp
@@ -113,6 +113,14 @@ ColumnPtr RangeHashedDictionary::getColumn(
     auto range_column_storage_type = std::make_shared<DataTypeInt64>();
     modified_key_columns[1] = castColumnAccurate(column_to_cast, range_column_storage_type);
 
+    ColumnUInt8::MutablePtr col_null_map_to;
+    ColumnUInt8::Container * vec_null_map_to = nullptr;
+    if (attribute.is_nullable)
+    {
+        col_null_map_to = ColumnUInt8::create(size, false);
+        vec_null_map_to = &col_null_map_to->getData();
+    }
+
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
@@ -130,7 +138,15 @@ ColumnPtr RangeHashedDictionary::getColumn(
                     getItemsImpl<StringRef, StringRef>(
                         attribute,
                         modified_key_columns,
-                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t row, const StringRef value, bool is_null)
+                        {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+
+                            out->insertData(value.data, value.size);
+                        },
                         [&](const size_t row) { return default_col->getDataAt(row); });
                 }
                 else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
@@ -140,7 +156,15 @@ ColumnPtr RangeHashedDictionary::getColumn(
                     getItemsImpl<StringRef, StringRef>(
                         attribute,
                         modified_key_columns,
-                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t row, const StringRef value, bool is_null)
+                        {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+
+                            out->insertData(value.data, value.size);
+                        },
                         [&](const size_t) { return def; });
                 }
                 else
@@ -153,7 +177,15 @@ ColumnPtr RangeHashedDictionary::getColumn(
                 getItemsImpl<StringRef, StringRef>(
                     attribute,
                     modified_key_columns,
-                    [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                    [&](const size_t row, const StringRef value, bool is_null)
+                    {
+                        if (attribute.is_nullable)
+                        {
+                            (*vec_null_map_to)[row] = is_null;
+                        }
+
+                        out->insertData(value.data, value.size);
+                    },
                     [&](const size_t) { return null_value; });
             }
 
@@ -184,7 +216,15 @@ ColumnPtr RangeHashedDictionary::getColumn(
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
                         modified_key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row, const auto value, bool is_null)
+                        {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+
+                            out[row] = value; 
+                        },
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
@@ -195,7 +235,15 @@ ColumnPtr RangeHashedDictionary::getColumn(
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
                         modified_key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row, const auto value, bool is_null)
+                        {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+
+                            out[row] = value; 
+                        },
                         [&](const size_t) { return def; }
                     );
                 }
@@ -209,9 +257,16 @@ ColumnPtr RangeHashedDictionary::getColumn(
                 getItemsImpl<AttributeType, AttributeType>(
                     attribute,
                     modified_key_columns,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
-                    [&](const size_t) { return null_value; }
-                );
+                    [&](const size_t row, const auto value, bool is_null)
+                    {
+                        if (attribute.is_nullable)
+                        {
+                            (*vec_null_map_to)[row] = is_null;
+                        }
+
+                        out[row] = value;
+                    },
+                    [&](const size_t) { return null_value; });
             }
 
             result = std::move(column);
@@ -220,6 +275,11 @@ ColumnPtr RangeHashedDictionary::getColumn(
 
     callOnDictionaryAttributeType(attribute.type, type_call);
 
+    if (attribute.is_nullable)
+    {
+        result = ColumnNullable::create(result, std::move(col_null_map_to));
+    }
+
     return result;
 }
 
@@ -237,7 +297,7 @@ void RangeHashedDictionary::createAttributes()
     for (const auto & attribute : dict_struct.attributes)
     {
         attribute_index_by_name.emplace(attribute.name, attributes.size());
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
+        attributes.push_back(createAttribute(attribute, attribute.null_value));
 
         if (attribute.hierarchical)
             throw Exception{ErrorCodes::BAD_ARGUMENTS, "Hierarchical attributes not supported by {} dictionary.",
@@ -306,9 +366,7 @@ void RangeHashedDictionary::addAttributeSize(const Attribute & attribute)
 template <>
 void RangeHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
 {
-    const auto & map_ref = std::get<Ptr<StringRef>>(attribute.maps);
-    bytes_allocated += sizeof(Collection<StringRef>) + map_ref->getBufferSizeInBytes();
-    bucket_count = map_ref->getBufferSizeInCells();
+    addAttributeSize<StringRef>(attribute);
     bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
 }
 
@@ -347,9 +405,9 @@ void RangeHashedDictionary::createAttributeImpl<String>(Attribute & attribute, c
 }
 
 RangeHashedDictionary::Attribute
-RangeHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
+RangeHashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
 {
-    Attribute attr{type, {}, {}, {}};
+    Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, {}};
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
@@ -358,7 +416,7 @@ RangeHashedDictionary::createAttributeWithType(const AttributeUnderlyingType typ
         createAttributeImpl<AttributeType>(attr, null_value);
     };
 
-    callOnDictionaryAttributeType(type, type_call);
+    callOnDictionaryAttributeType(attribute.underlying_type, type_call);
 
     return attr;
 }
@@ -391,11 +449,23 @@ void RangeHashedDictionary::getItemsImpl(
                       return v.range.contains(date);
                   });
 
-            set_value(row, static_cast<OutputType>(val_it != std::end(ranges_and_values) ? val_it->value : get_default(row))); // NOLINT
+            if (val_it != std::end(ranges_and_values))
+            {
+                auto& value = val_it->value;
+
+                if (value)
+                    set_value(row, static_cast<OutputType>(*value), false); // NOLINT
+                else
+                    set_value(row, get_default(row), true);
+            }
+            else
+            {
+                set_value(row, get_default(row), false);
+            }
         }
         else
         {
-            set_value(row, get_default(row));
+            set_value(row, get_default(row), false);
         }
     }
 
@@ -404,9 +474,32 @@ void RangeHashedDictionary::getItemsImpl(
 
 
 template <typename T>
-void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value)
+void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value)
 {
-    auto & map = *std::get<Ptr<T>>(attribute.maps);
+    using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>;
+    auto & map = *std::get<Ptr<ValueType>>(attribute.maps);
+    
+    Value<ValueType> value_to_insert;
+
+    if (attribute.is_nullable && value.isNull())
+    {
+        value_to_insert = { range, {} };
+    }
+    else
+    {
+        if constexpr (std::is_same_v<T, String>)
+        {
+            const auto & string = value.get<String>();
+            const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
+            const StringRef string_ref{string_in_arena, string.size()};
+            value_to_insert = Value<ValueType>{ range, { string_ref }};
+        }
+        else
+        {
+            value_to_insert = Value<ValueType>{ range, { value.get<NearestFieldType<ValueType>>() }};
+        }
+    }
+
     const auto it = map.find(id);
 
     if (it)
@@ -414,92 +507,28 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
         auto & values = it->getMapped();
 
         const auto insert_it
-            = std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
+            = std::lower_bound(std::begin(values), std::end(values), range, [](const Value<ValueType> & lhs, const Range & rhs_range)
               {
                   return lhs.range < rhs_range;
               });
 
-        values.insert(insert_it, Value<T>{range, value});
+        values.insert(insert_it, std::move(value_to_insert));
     }
     else
-        map.insert({id, Values<T>{Value<T>{range, value}}});
+        map.insert({id, Values<ValueType>{std::move(value_to_insert)}});
 }
 
 void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value)
 {
-    switch (attribute.type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            setAttributeValueImpl<UInt8>(attribute, id, range, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            setAttributeValueImpl<UInt16>(attribute, id, range, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            setAttributeValueImpl<UInt32>(attribute, id, range, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            setAttributeValueImpl<UInt64>(attribute, id, range, value.get<UInt64>());
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            setAttributeValueImpl<UInt128>(attribute, id, range, value.get<UInt128>());
-            break;
-        case AttributeUnderlyingType::utInt8:
-            setAttributeValueImpl<Int8>(attribute, id, range, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt16:
-            setAttributeValueImpl<Int16>(attribute, id, range, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt32:
-            setAttributeValueImpl<Int32>(attribute, id, range, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utInt64:
-            setAttributeValueImpl<Int64>(attribute, id, range, value.get<Int64>());
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            setAttributeValueImpl<Float32>(attribute, id, range, value.get<Float64>());
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            setAttributeValueImpl<Float64>(attribute, id, range, value.get<Float64>());
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            setAttributeValueImpl<Decimal32>(attribute, id, range, value.get<Decimal32>());
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            setAttributeValueImpl<Decimal64>(attribute, id, range, value.get<Decimal64>());
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            setAttributeValueImpl<Decimal128>(attribute, id, range, value.get<Decimal128>());
-            break;
+        setAttributeValueImpl<AttributeType>(attribute, id, range, value);
+    };
 
-        case AttributeUnderlyingType::utString:
-        {
-            auto & map = *std::get<Ptr<StringRef>>(attribute.maps);
-            const auto & string = value.get<String>();
-            const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
-            const StringRef string_ref{string_in_arena, string.size()};
-
-            auto * it = map.find(id);
-
-            if (it)
-            {
-                auto & values = it->getMapped();
-
-                const auto insert_it = std::lower_bound(
-                    std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
-                    {
-                        return lhs.range < rhs_range;
-                    });
-
-                values.insert(insert_it, Value<StringRef>{range, string_ref});
-            }
-            else
-                map.insert({id, Values<StringRef>{Value<StringRef>{range, string_ref}}});
-
-            break;
-        }
-    }
+    callOnDictionaryAttributeType(attribute.type, type_call);
 }
 
 const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttribute(const std::string & attribute_name) const
@@ -528,55 +557,18 @@ void RangeHashedDictionary::getIdsAndDates(
 {
     const auto & attribute = attributes.front();
 
-    switch (attribute.type)
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            getIdsAndDates<UInt8>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utUInt16:
-            getIdsAndDates<UInt16>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utUInt32:
-            getIdsAndDates<UInt32>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utUInt64:
-            getIdsAndDates<UInt64>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utUInt128:
-            getIdsAndDates<UInt128>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utInt8:
-            getIdsAndDates<Int8>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utInt16:
-            getIdsAndDates<Int16>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utInt32:
-            getIdsAndDates<Int32>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utInt64:
-            getIdsAndDates<Int64>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utFloat32:
-            getIdsAndDates<Float32>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utFloat64:
-            getIdsAndDates<Float64>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utString:
-            getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
-            break;
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            getIdsAndDates<Decimal32>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utDecimal64:
-            getIdsAndDates<Decimal64>(attribute, ids, start_dates, end_dates);
-            break;
-        case AttributeUnderlyingType::utDecimal128:
-            getIdsAndDates<Decimal128>(attribute, ids, start_dates, end_dates);
-            break;
-    }
+        if constexpr (std::is_same_v<AttributeType, String>)
+            getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
+        else
+            getIdsAndDates<AttributeType>(attribute, ids, start_dates, end_dates);
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
 }
 
 template <typename T, typename RangeType>
diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h
index 5588bdb3ced..1a3bfd65d33 100644
--- a/src/Dictionaries/RangeHashedDictionary.h
+++ b/src/Dictionaries/RangeHashedDictionary.h
@@ -3,6 +3,7 @@
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashSet.h>
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
@@ -10,7 +11,7 @@
 #include <atomic>
 #include <memory>
 #include <variant>
-
+#include <optional>
 
 namespace DB
 {
@@ -81,7 +82,7 @@ private:
     struct Value final
     {
         Range range;
-        T value;
+        std::optional<T> value;
     };
 
     template <typename T>
@@ -91,10 +92,14 @@ private:
     template <typename T>
     using Ptr = std::unique_ptr<Collection<T>>;
 
+    using NullableSet = HashSet<Key, DefaultHash<Key>>;
+
     struct Attribute final
     {
     public:
         AttributeUnderlyingType type;
+        bool is_nullable;
+
         std::variant<
             UInt8,
             UInt16,
@@ -144,7 +149,7 @@ private:
     template <typename T>
     void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
+    Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
     void getItemsImpl(
@@ -154,7 +159,7 @@ private:
         DefaultGetter && get_default) const;
 
     template <typename T>
-    void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value);
+    void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value);
 
     void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
 

From 25d6e18ec0aaa18afbfb66fed49ba8ad6228b7bc Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 5 Jan 2021 00:45:58 +0300
Subject: [PATCH 0235/1238] Added Nullable support for
 ComplexKeyHashedDictionary

---
 .../ComplexKeyHashedDictionary.cpp            | 175 +++++++++++++-----
 src/Dictionaries/ComplexKeyHashedDictionary.h |   8 +-
 src/Dictionaries/HashedDictionary.cpp         |  13 +-
 3 files changed, 132 insertions(+), 64 deletions(-)

diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index 6aee03668ab..787ba459123 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -2,6 +2,7 @@
 #include <ext/map.h>
 #include <ext/range.h>
 #include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnNullable.h>
 #include <Functions/FunctionHelpers.h>
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
@@ -52,6 +53,14 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
 
     auto size = key_columns.front()->size();
 
+    ColumnUInt8::MutablePtr col_null_map_to;
+    ColumnUInt8::Container * vec_null_map_to = nullptr;
+    if (attribute.is_nullable)
+    {
+        col_null_map_to = ColumnUInt8::create(size, false);
+        vec_null_map_to = &col_null_map_to->getData();
+    }
+
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
@@ -69,7 +78,15 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
                     getItemsImpl<StringRef, StringRef>(
                         attribute,
                         key_columns,
-                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t row, const StringRef value, bool is_null)
+                        {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+
+                            out->insertData(value.data, value.size);
+                        },
                         [&](const size_t row) { return default_col->getDataAt(row); });
                 }
                 else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
@@ -79,7 +96,15 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
                     getItemsImpl<StringRef, StringRef>(
                         attribute,
                         key_columns,
-                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                        [&](const size_t row, const StringRef value, bool is_null)
+                        {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+
+                            out->insertData(value.data, value.size);
+                        },
                         [&](const size_t) { return def; });
                 }
                 else
@@ -92,7 +117,15 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
                 getItemsImpl<StringRef, StringRef>(
                     attribute,
                     key_columns,
-                    [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                    [&](const size_t row, const StringRef value, bool is_null)
+                    {
+                        if (attribute.is_nullable)
+                        {
+                            (*vec_null_map_to)[row] = is_null;
+                        }
+
+                        out->insertData(value.data, value.size);
+                    },
                     [&](const size_t) { return null_value; });
             }
 
@@ -123,7 +156,15 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
                         key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row, const auto value, bool is_null)
+                        {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+
+                            out[row] = value;
+                        },
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
@@ -134,7 +175,15 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
                         key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row, const auto value, bool is_null)
+                        { 
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+                            
+                            out[row] = value;
+                        },
                         [&](const size_t) { return def; }
                     );
                 }
@@ -148,9 +197,16 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
                 getItemsImpl<AttributeType, AttributeType>(
                     attribute,
                     key_columns,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
-                    [&](const size_t) { return null_value; }
-                );
+                    [&](const size_t row, const auto value, bool is_null)
+                    {
+                        if (attribute.is_nullable)
+                        {
+                            (*vec_null_map_to)[row] = is_null;
+                        }
+
+                        out[row] = value;
+                    },
+                    [&](const size_t) { return null_value; });
             }
 
             result = std::move(column);
@@ -159,6 +215,11 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
 
     callOnDictionaryAttributeType(attribute.type, type_call);
 
+    if (attribute.is_nullable)
+    {
+        result = ColumnNullable::create(result, std::move(col_null_map_to));
+    }
+
     return result;
 }
 
@@ -200,7 +261,7 @@ void ComplexKeyHashedDictionary::createAttributes()
     for (const auto & attribute : dict_struct.attributes)
     {
         attribute_index_by_name.emplace(attribute.name, attributes.size());
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
+        attributes.push_back(createAttribute(attribute, attribute.null_value));
 
         if (attribute.hierarchical)
             throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
@@ -402,9 +463,10 @@ void ComplexKeyHashedDictionary::createAttributeImpl<String>(Attribute & attribu
 }
 
 ComplexKeyHashedDictionary::Attribute
-ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
+ComplexKeyHashedDictionary::createAttribute(const DictionaryAttribute & attribute, const Field & null_value)
 {
-    Attribute attr{type, {}, {}, {}};
+    auto nullable_set = attribute.is_nullable ? std::make_unique<NullableSet>() : nullptr;
+    Attribute attr{attribute.underlying_type, attribute.is_nullable, std::move(nullable_set), {}, {}, {}};
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
@@ -413,7 +475,7 @@ ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingTyp
         createAttributeImpl<AttributeType>(attr, null_value);
     };
 
-    callOnDictionaryAttributeType(type, type_call);
+    callOnDictionaryAttributeType(attribute.underlying_type, type_call);
 
     return attr;
 }
@@ -436,7 +498,18 @@ void ComplexKeyHashedDictionary::getItemsImpl(
         const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool);
 
         const auto it = attr.find(key);
-        set_value(i, it ? static_cast<OutputType>(it->getMapped()) : get_default(i));
+
+        if (it)
+        {
+            set_value(i, static_cast<OutputType>(it->getMapped()), false); 
+        }
+        else
+        {
+            if (attribute.is_nullable && attribute.nullable_set->find(key) != nullptr)
+                set_value(i, get_default(i), true);
+            else
+                set_value(i, get_default(i), false);
+        }
 
         /// free memory allocated for the key
         temporary_keys_pool.rollback(key.size);
@@ -454,51 +527,42 @@ bool ComplexKeyHashedDictionary::setAttributeValueImpl(Attribute & attribute, co
     return pair.second;
 }
 
+template <>
+bool ComplexKeyHashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const StringRef key, const String value)
+{
+    const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
+    return setAttributeValueImpl<StringRef>(attribute, key, string_in_arena);
+}
+
 bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const StringRef key, const Field & value)
 {
-    switch (attribute.type)
+    bool result = false;
+
+    auto type_call = [&](const auto &dictionary_attribute_type)
     {
-        case AttributeUnderlyingType::utUInt8:
-            return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt16:
-            return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt32:
-            return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt64:
-            return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>());
-        case AttributeUnderlyingType::utUInt128:
-            return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>());
-        case AttributeUnderlyingType::utInt8:
-            return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>());
-        case AttributeUnderlyingType::utInt16:
-            return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>());
-        case AttributeUnderlyingType::utInt32:
-            return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>());
-        case AttributeUnderlyingType::utInt64:
-            return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>());
-        case AttributeUnderlyingType::utFloat32:
-            return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>());
-        case AttributeUnderlyingType::utFloat64:
-            return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>());
+        using Type = std::decay_t<decltype(dictionary_attribute_type)>;
+        using AttributeType = typename Type::AttributeType;
 
-        case AttributeUnderlyingType::utDecimal32:
-            return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>());
-        case AttributeUnderlyingType::utDecimal64:
-            return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>());
-        case AttributeUnderlyingType::utDecimal128:
-            return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>());
-
-        case AttributeUnderlyingType::utString:
+        if (attribute.is_nullable)
         {
-            auto & map = std::get<ContainerType<StringRef>>(attribute.maps);
-            const auto & string = value.get<String>();
-            const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
-            const auto pair = map.insert({key, StringRef{string_in_arena, string.size()}});
-            return pair.second;
+            if (value.isNull())
+            {
+                attribute.nullable_set->insert(key);
+                result = true;
+                return;
+            }
+            else
+            {
+                attribute.nullable_set->erase(key);
+            }
         }
-    }
 
-    return {};
+        result = setAttributeValueImpl<AttributeType>(attribute, key, value.get<NearestFieldType<AttributeType>>());
+    };
+
+    callOnDictionaryAttributeType(attribute.type, type_call);
+
+    return result;
 }
 
 const ComplexKeyHashedDictionary::Attribute & ComplexKeyHashedDictionary::getAttribute(const std::string & attribute_name) const
@@ -549,6 +613,9 @@ void ComplexKeyHashedDictionary::has(const Attribute & attribute, const Columns
         const auto it = attr.find(key);
         out[i] = static_cast<bool>(it);
 
+        if (attribute.is_nullable && !out[i])
+            out[i] = attribute.nullable_set->find(key) != nullptr;
+
         /// free memory allocated for the key
         temporary_keys_pool.rollback(key.size);
     }
@@ -591,6 +658,12 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
     for (const auto & key : attr)
         keys.push_back(key.getKey());
 
+    if (attribute.is_nullable)
+    {
+        for (const auto & key: *attribute.nullable_set)
+            keys.push_back(key.getKey());
+    }
+
     return keys;
 }
 
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.h b/src/Dictionaries/ComplexKeyHashedDictionary.h
index 01a222d8629..a21ad223f19 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.h
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.h
@@ -7,6 +7,7 @@
 #include <Columns/ColumnString.h>
 #include <Common/Arena.h>
 #include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashSet.h>
 #include <Core/Block.h>
 #include <common/StringRef.h>
 #include <ext/range.h>
@@ -77,9 +78,14 @@ private:
     template <typename Value>
     using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
 
+    using NullableSet = HashSetWithSavedHash<StringRef, StringRefHash>;
+
     struct Attribute final
     {
         AttributeUnderlyingType type;
+        bool is_nullable;
+        std::unique_ptr<NullableSet> nullable_set;
+
         std::variant<
             UInt8,
             UInt16,
@@ -133,7 +139,7 @@ private:
     template <typename T>
     void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
+    Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value);
 
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
     void
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 571d70eb303..d13e1b11b17 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -637,16 +637,7 @@ template <>
 bool HashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String value)
 {
     const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
-    if (!sparse)
-    {
-        auto & map = *std::get<CollectionPtrType<StringRef>>(attribute.maps);
-        return map.insert({id, StringRef{string_in_arena, value.size()}}).second;
-    }
-    else
-    {
-        auto & map = *std::get<SparseCollectionPtrType<StringRef>>(attribute.sparse_maps);
-        return map.insert({id, StringRef{string_in_arena, value.size()}}).second;
-    }
+    return setAttributeValueImpl<StringRef>(attribute, id, string_in_arena);
 }
 
 bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
@@ -749,9 +740,7 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
         if (attribute.is_nullable)
         {
             for (const auto& value: *attribute.nullable_set)
-            {
                 result.push_back(value.getKey());
-            }
         }
     };
 

From 357b0b95a94e4d409fcc45a76962a879c50e5636 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 5 Jan 2021 20:25:34 +0300
Subject: [PATCH 0236/1238] Added Nullable support for ComplexDirectDictionary

---
 .../ComplexKeyDirectDictionary.cpp            | 89 +++++++++++++++----
 src/Dictionaries/ComplexKeyDirectDictionary.h |  4 +-
 src/Dictionaries/DirectDictionary.cpp         |  1 -
 3 files changed, 77 insertions(+), 17 deletions(-)

diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.cpp b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
index c476348ccd5..cfc01fee42a 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
@@ -3,6 +3,7 @@
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
 #include <Core/Defines.h>
+#include <Columns/ColumnNullable.h>
 #include <Functions/FunctionHelpers.h>
 
 namespace DB
@@ -50,6 +51,14 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
 
     auto size = key_columns.front()->size();
 
+    ColumnUInt8::MutablePtr col_null_map_to;
+    ColumnUInt8::Container * vec_null_map_to = nullptr;
+    if (attribute.is_nullable)
+    {
+        col_null_map_to = ColumnUInt8::create(size, false);
+        vec_null_map_to = &col_null_map_to->getData();
+    }
+
     auto type_call = [&](const auto & dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
@@ -67,8 +76,13 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                     getItemsImpl<String, String>(
                         attribute,
                         key_columns,
-                        [&](const size_t, const String value)
+                        [&](const size_t row, const String value, bool is_null)
                         {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+ 
                             const auto ref = StringRef{value};
                             out->insertData(ref.data, ref.size);
                         },
@@ -85,8 +99,13 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                     getItemsImpl<String, String>(
                         attribute,
                         key_columns,
-                        [&](const size_t, const String value)
+                        [&](const size_t row, const String value, bool is_null)
                         {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+ 
                             const auto ref = StringRef{value};
                             out->insertData(ref.data, ref.size);
                         },
@@ -102,8 +121,13 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                 getItemsImpl<String, String>(
                     attribute,
                     key_columns,
-                    [&](const size_t, const String value)
+                    [&](const size_t row, const String value, bool is_null)
                     {
+                        if (attribute.is_nullable)
+                        {
+                            (*vec_null_map_to)[row] = is_null;
+                        } 
+
                         const auto ref = StringRef{value};
                         out->insertData(ref.data, ref.size);
                     },
@@ -137,7 +161,15 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
                         key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row, const auto value, bool is_null)
+                        {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+ 
+                            out[row] = value;
+                        },
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
@@ -148,7 +180,15 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
                         key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
+                        [&](const size_t row, const auto value, bool is_null)
+                        {
+                            if (attribute.is_nullable)
+                            {
+                                (*vec_null_map_to)[row] = is_null;
+                            }
+
+                            out[row] = value;
+                        },
                         [&](const size_t) { return def; }
                     );
                 }
@@ -162,7 +202,15 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                 getItemsImpl<AttributeType, AttributeType>(
                     attribute,
                     key_columns,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
+                    [&](const size_t row, const auto value, bool is_null)
+                    { 
+                        if (attribute.is_nullable)
+                        {
+                            (*vec_null_map_to)[row] = is_null;
+                        } 
+
+                        out[row] = value;
+                    },
                     [&](const size_t) { return null_value; }
                 );
             }
@@ -173,6 +221,11 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
 
     callOnDictionaryAttributeType(attribute.type, type_call);
 
+    if (attribute.is_nullable)
+    {
+        result = ColumnNullable::create(result, std::move(col_null_map_to));
+    }
+
     return result;
 }
 
@@ -246,7 +299,7 @@ void ComplexKeyDirectDictionary::createAttributes()
     {
         attribute_index_by_name.emplace(attribute.name, attributes.size());
         attribute_name_by_index.emplace(attributes.size(), attribute.name);
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
+        attributes.push_back(createAttribute(attribute, attribute.null_value, attribute.name));
 
         if (attribute.hierarchical)
             throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
@@ -270,9 +323,10 @@ void ComplexKeyDirectDictionary::createAttributeImpl<String>(Attribute & attribu
 }
 
 
-ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
+ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttribute(
+    const DictionaryAttribute & attribute, const Field & null_value, const std::string & attr_name)
 {
-    Attribute attr{type, {}, {}, attr_name};
+    Attribute attr{attribute.underlying_type, attribute.is_nullable, {}, {}, attr_name};
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
@@ -281,7 +335,7 @@ ComplexKeyDirectDictionary::Attribute ComplexKeyDirectDictionary::createAttribut
         createAttributeImpl<AttributeType>(attr, null_value);
     };
 
-    callOnDictionaryAttributeType(type, type_call);
+    callOnDictionaryAttributeType(attribute.underlying_type, type_call);
 
     return attr;
 }
@@ -339,6 +393,7 @@ void ComplexKeyDirectDictionary::getItemsImpl(
     const auto keys_size = dict_struct.key->size();
     StringRefs keys_array(keys_size);
     MapType<OutputType> value_by_key;
+    HashMapWithSavedHash<StringRef, bool, StringRefHash> value_is_null;
     Arena temporary_keys_pool;
     std::vector<size_t> to_load(rows);
     PODArray<StringRef> keys(rows);
@@ -349,6 +404,7 @@ void ComplexKeyDirectDictionary::getItemsImpl(
         keys[row] = key;
         value_by_key[key] = get_default(row);
         to_load[row] = row;
+        value_is_null[key] = false;
     }
 
     auto stream = source_ptr->loadKeys(key_columns, to_load);
@@ -382,17 +438,19 @@ void ComplexKeyDirectDictionary::getItemsImpl(
             for (const auto row_idx : ext::range(0, columns_size))
             {
                 const StringRef key = placeKeysInPool(row_idx, columns, keys_temp, *dict_struct.key, pool);
+
                 if (value_by_key.has(key))
                 {
-                    if (attribute.type == AttributeUnderlyingType::utFloat32)
+                    auto value = attribute_column[row_idx];
+
+                    if (value.isNull())
                     {
-                        value_by_key[key] = static_cast<Float32>(attribute_column[row_idx].template get<Float64>());
+                        value_is_null[key] = true;
                     }
                     else
                     {
-                        value_by_key[key] = static_cast<OutputType>(attribute_column[row_idx].template get<AttributeType>());
+                        value_by_key[key] = static_cast<OutputType>(value.template get<NearestFieldType<AttributeType>>());
                     }
-
                 }
             }
         }
@@ -402,7 +460,8 @@ void ComplexKeyDirectDictionary::getItemsImpl(
 
     for (const auto row : ext::range(0, rows))
     {
-        set_value(row, value_by_key[keys[row]]);
+        auto key = keys[row];
+        set_value(row, value_by_key[key], value_is_null[key]);
     }
 
     query_count.fetch_add(rows, std::memory_order_relaxed);
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.h b/src/Dictionaries/ComplexKeyDirectDictionary.h
index 6c1f73f12f7..ddc72483dcb 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.h
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.h
@@ -80,6 +80,8 @@ private:
     struct Attribute final
     {
         AttributeUnderlyingType type;
+        bool is_nullable;
+        
         std::variant<
             UInt8,
             UInt16,
@@ -109,7 +111,7 @@ private:
     template <typename T>
     void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
+    Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value, const std::string & name);
 
     template <typename Pool>
     StringRef placeKeysInPool(
diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index ffd1dd2d01b..830cbc9967d 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -482,7 +482,6 @@ void DirectDictionary::getItemsImpl(
     for (const auto row : ext::range(0, rows))
     {
         auto key = ids[row];
-        std::cerr << "DirectDictionary set_value " << row << std::endl;
         set_value(row, value_by_key[key], value_is_null[key]);
     }
 

From a45459e09552c45aaca5d3609a401da960962c98 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 7 Jan 2021 19:24:18 +0300
Subject: [PATCH 0237/1238] Fixed tests

---
 .../ComplexKeyCacheDictionary.cpp             | 379 ++++++++++++++++++
 src/Dictionaries/ComplexKeyCacheDictionary.h  | 366 +----------------
 .../ComplexKeyDirectDictionary.cpp            |  12 +-
 src/Dictionaries/ComplexKeyDirectDictionary.h |   2 +-
 .../ComplexKeyHashedDictionary.cpp            |   8 +-
 src/Dictionaries/DirectDictionary.cpp         |  18 +-
 src/Dictionaries/HashedDictionary.cpp         |   3 +-
 src/Dictionaries/RangeHashedDictionary.cpp    |   6 +-
 src/Formats/MySQLBlockInputStream.cpp         |   2 -
 9 files changed, 407 insertions(+), 389 deletions(-)

diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index 60d2878f694..60457f81027 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -316,6 +316,385 @@ ColumnUInt8::Ptr ComplexKeyCacheDictionary::has(const Columns & key_columns, con
     return result;
 }
 
+
+template <typename AttributeType, typename OutputType, typename DefaultGetter>
+void ComplexKeyCacheDictionary::getItemsNumberImpl(
+    Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const
+{
+    /// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
+    MapType<std::vector<size_t>> outdated_keys;
+    auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
+
+    const auto rows_num = key_columns.front()->size();
+    const auto keys_size = dict_struct.key->size();
+    StringRefs keys(keys_size);
+    Arena temporary_keys_pool;
+    PODArray<StringRef> keys_array(rows_num);
+
+    size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
+    {
+        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
+
+        const auto now = std::chrono::system_clock::now();
+        /// fetch up-to-date values, decide which ones require update
+        for (const auto row : ext::range(0, rows_num))
+        {
+            const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
+            keys_array[row] = key;
+            const auto find_result = findCellIdx(key, now);
+
+            /** cell should be updated if either:
+                *    1. keys (or hash) do not match,
+                *    2. cell has expired,
+                *    3. explicit defaults were specified and cell was set default. */
+
+            if (!find_result.valid)
+            {
+                outdated_keys[key].push_back(row);
+                if (find_result.outdated)
+                    ++cache_expired;
+                else
+                    ++cache_not_found;
+            }
+            else
+            {
+                ++cache_hit;
+                const auto & cell_idx = find_result.cell_idx;
+                const auto & cell = cells[cell_idx];
+                out[row] = cell.isDefault() ? get_default(row) : static_cast<OutputType>(attribute_array[cell_idx]);
+            }
+        }
+    }
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
+    query_count.fetch_add(rows_num, std::memory_order_relaxed);
+    hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
+
+    if (outdated_keys.empty())
+        return;
+
+    std::vector<size_t> required_rows(outdated_keys.size());
+    std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
+    {
+        return pair.getMapped().front();
+    });
+
+    /// request new values
+    update(
+        key_columns,
+        keys_array,
+        required_rows,
+        [&](const StringRef key, const size_t cell_idx)
+        {
+            for (const auto row : outdated_keys[key])
+                out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
+        },
+        [&](const StringRef key, const size_t)
+        {
+            for (const auto row : outdated_keys[key])
+                out[row] = get_default(row);
+        });
+}
+
+template <typename DefaultGetter>
+void ComplexKeyCacheDictionary::getItemsString(Attribute & attribute, const Columns & key_columns, ColumnString * out, DefaultGetter && get_default) const
+{
+    const auto rows_num = key_columns.front()->size();
+    /// save on some allocations
+    out->getOffsets().reserve(rows_num);
+
+    const auto keys_size = dict_struct.key->size();
+    StringRefs keys(keys_size);
+    Arena temporary_keys_pool;
+
+    auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
+
+    auto found_outdated_values = false;
+
+    /// perform optimistic version, fallback to pessimistic if failed
+    {
+        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
+
+        const auto now = std::chrono::system_clock::now();
+        /// fetch up-to-date values, discard on fail
+        for (const auto row : ext::range(0, rows_num))
+        {
+            const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
+            SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
+            const auto find_result = findCellIdx(key, now);
+
+            if (!find_result.valid)
+            {
+                found_outdated_values = true;
+                break;
+            }
+            else
+            {
+                const auto & cell_idx = find_result.cell_idx;
+                const auto & cell = cells[cell_idx];
+                const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
+                out->insertData(string_ref.data, string_ref.size);
+            }
+        }
+    }
+
+    /// optimistic code completed successfully
+    if (!found_outdated_values)
+    {
+        query_count.fetch_add(rows_num, std::memory_order_relaxed);
+        hit_count.fetch_add(rows_num, std::memory_order_release);
+        return;
+    }
+
+    /// now onto the pessimistic one, discard possible partial results from the optimistic path
+    out->getChars().resize_assume_reserved(0);
+    out->getOffsets().resize_assume_reserved(0);
+
+    /// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
+    MapType<std::vector<size_t>> outdated_keys;
+    /// we are going to store every string separately
+    MapType<StringRef> map;
+    PODArray<StringRef> keys_array(rows_num);
+
+    size_t total_length = 0;
+    size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
+    {
+        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
+
+        const auto now = std::chrono::system_clock::now();
+        for (const auto row : ext::range(0, rows_num))
+        {
+            const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
+            keys_array[row] = key;
+            const auto find_result = findCellIdx(key, now);
+
+            if (!find_result.valid)
+            {
+                outdated_keys[key].push_back(row);
+                if (find_result.outdated)
+                    ++cache_expired;
+                else
+                    ++cache_not_found;
+            }
+            else
+            {
+                ++cache_hit;
+                const auto & cell_idx = find_result.cell_idx;
+                const auto & cell = cells[cell_idx];
+                const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
+
+                if (!cell.isDefault())
+                    map[key] = copyIntoArena(string_ref, temporary_keys_pool);
+
+                total_length += string_ref.size + 1;
+            }
+        }
+    }
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
+
+    query_count.fetch_add(rows_num, std::memory_order_relaxed);
+    hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
+
+    /// request new values
+    if (!outdated_keys.empty())
+    {
+        std::vector<size_t> required_rows(outdated_keys.size());
+        std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
+        {
+            return pair.getMapped().front();
+        });
+
+        update(
+            key_columns,
+            keys_array,
+            required_rows,
+            [&](const StringRef key, const size_t cell_idx)
+            {
+                const StringRef attribute_value = attribute_array[cell_idx];
+
+                /// We must copy key and value to own memory, because it may be replaced with another
+                ///  in next iterations of inner loop of update.
+                const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
+                const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
+
+                map[copied_key] = copied_value;
+                total_length += (attribute_value.size + 1) * outdated_keys[key].size();
+            },
+            [&](const StringRef key, const size_t)
+            {
+                for (const auto row : outdated_keys[key])
+                    total_length += get_default(row).size + 1;
+            });
+    }
+
+    out->getChars().reserve(total_length);
+
+    for (const auto row : ext::range(0, ext::size(keys_array)))
+    {
+        const StringRef key = keys_array[row];
+        const auto it = map.find(key);
+        const auto string_ref = it ? it->getMapped() : get_default(row);
+        out->insertData(string_ref.data, string_ref.size);
+    }
+}
+
+template <typename PresentKeyHandler, typename AbsentKeyHandler>
+void ComplexKeyCacheDictionary::update(
+    const Columns & in_key_columns,
+    const PODArray<StringRef> & in_keys,
+    const std::vector<size_t> & in_requested_rows,
+    PresentKeyHandler && on_cell_updated,
+    AbsentKeyHandler && on_key_not_found) const
+{
+    MapType<bool> remaining_keys{in_requested_rows.size()};
+    for (const auto row : in_requested_rows)
+        remaining_keys.insert({in_keys[row], false});
+
+    std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
+
+    const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
+    {
+        Stopwatch watch;
+        auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
+        stream->readPrefix();
+
+        const auto keys_size = dict_struct.key->size();
+        StringRefs keys(keys_size);
+
+        const auto attributes_size = attributes.size();
+        const auto now = std::chrono::system_clock::now();
+
+        while (const auto block = stream->read())
+        {
+            /// cache column pointers
+            const auto key_columns = ext::map<Columns>(
+                ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
+
+            const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
+            {
+                return block.safeGetByPosition(keys_size + attribute_idx).column;
+            });
+
+            const auto rows_num = block.rows();
+
+            for (const auto row : ext::range(0, rows_num))
+            {
+                auto key = allocKey(row, key_columns, keys);
+                const auto hash = StringRefHash{}(key);
+                const auto find_result = findCellIdx(key, now, hash);
+                const auto & cell_idx = find_result.cell_idx;
+                auto & cell = cells[cell_idx];
+
+                for (const auto attribute_idx : ext::range(0, attributes.size()))
+                {
+                    const auto & attribute_column = *attribute_columns[attribute_idx];
+                    auto & attribute = attributes[attribute_idx];
+
+                    setAttributeValue(attribute, cell_idx, attribute_column[row]);
+                }
+
+                /// if cell id is zero and zero does not map to this cell, then the cell is unused
+                if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
+                    element_count.fetch_add(1, std::memory_order_relaxed);
+
+                /// handle memory allocated for old key
+                if (key == cell.key)
+                {
+                    freeKey(key);
+                    key = cell.key;
+                }
+                else
+                {
+                    /// new key is different from the old one
+                    if (cell.key.data)
+                        freeKey(cell.key);
+
+                    cell.key = key;
+                }
+
+                cell.hash = hash;
+
+                if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
+                    cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
+                else
+                    cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
+
+                /// inform caller
+                on_cell_updated(key, cell_idx);
+                /// mark corresponding id as found
+                remaining_keys[key] = true;
+            }
+        }
+
+        stream->readSuffix();
+
+        ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
+        ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
+    }
+
+    size_t found_num = 0;
+    size_t not_found_num = 0;
+
+    const auto now = std::chrono::system_clock::now();
+
+    /// Check which ids have not been found and require setting null_value
+    for (const auto & key_found_pair : remaining_keys)
+    {
+        if (key_found_pair.getMapped())
+        {
+            ++found_num;
+            continue;
+        }
+
+        ++not_found_num;
+
+        auto key = key_found_pair.getKey();
+        const auto hash = StringRefHash{}(key);
+        const auto find_result = findCellIdx(key, now, hash);
+        const auto & cell_idx = find_result.cell_idx;
+        auto & cell = cells[cell_idx];
+
+        /// Set null_value for each attribute
+        for (auto & attribute : attributes)
+            setDefaultAttributeValue(attribute, cell_idx);
+
+        /// Check if cell had not been occupied before and increment element counter if it hadn't
+        if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
+            element_count.fetch_add(1, std::memory_order_relaxed);
+
+        if (key == cell.key)
+            key = cell.key;
+        else
+        {
+            if (cell.key.data)
+                freeKey(cell.key);
+
+            /// copy key from temporary pool
+            key = copyKey(key);
+            cell.key = key;
+        }
+
+        cell.hash = hash;
+
+        if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
+            cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
+        else
+            cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
+
+        cell.setDefault();
+
+        /// inform caller that the cell has not been found
+        on_key_not_found(key, cell_idx);
+    }
+
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
+}
+
+
 void ComplexKeyCacheDictionary::createAttributes()
 {
     const auto attributes_size = dict_struct.attributes.size();
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.h b/src/Dictionaries/ComplexKeyCacheDictionary.h
index 1f8248c594a..45719171478 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.h
@@ -177,225 +177,10 @@ private:
 
     template <typename AttributeType, typename OutputType, typename DefaultGetter>
     void getItemsNumberImpl(
-        Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const
-    {
-        /// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
-        MapType<std::vector<size_t>> outdated_keys;
-        auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
-
-        const auto rows_num = key_columns.front()->size();
-        const auto keys_size = dict_struct.key->size();
-        StringRefs keys(keys_size);
-        Arena temporary_keys_pool;
-        PODArray<StringRef> keys_array(rows_num);
-
-        size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
-        {
-            const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
-
-            const auto now = std::chrono::system_clock::now();
-            /// fetch up-to-date values, decide which ones require update
-            for (const auto row : ext::range(0, rows_num))
-            {
-                const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
-                keys_array[row] = key;
-                const auto find_result = findCellIdx(key, now);
-
-                /** cell should be updated if either:
-                *    1. keys (or hash) do not match,
-                *    2. cell has expired,
-                *    3. explicit defaults were specified and cell was set default. */
-
-                if (!find_result.valid)
-                {
-                    outdated_keys[key].push_back(row);
-                    if (find_result.outdated)
-                        ++cache_expired;
-                    else
-                        ++cache_not_found;
-                }
-                else
-                {
-                    ++cache_hit;
-                    const auto & cell_idx = find_result.cell_idx;
-                    const auto & cell = cells[cell_idx];
-                    out[row] = cell.isDefault() ? get_default(row) : static_cast<OutputType>(attribute_array[cell_idx]);
-                }
-            }
-        }
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
-        query_count.fetch_add(rows_num, std::memory_order_relaxed);
-        hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
-
-        if (outdated_keys.empty())
-            return;
-
-        std::vector<size_t> required_rows(outdated_keys.size());
-        std::transform(
-            std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); });
-
-        /// request new values
-        update(
-            key_columns,
-            keys_array,
-            required_rows,
-            [&](const StringRef key, const size_t cell_idx)
-            {
-                for (const auto row : outdated_keys[key])
-                    out[row] = static_cast<OutputType>(attribute_array[cell_idx]);
-            },
-            [&](const StringRef key, const size_t)
-            {
-                for (const auto row : outdated_keys[key])
-                    out[row] = get_default(row);
-            });
-    }
+        Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const;
 
     template <typename DefaultGetter>
-    void getItemsString(Attribute & attribute, const Columns & key_columns, ColumnString * out, DefaultGetter && get_default) const
-    {
-        const auto rows_num = key_columns.front()->size();
-        /// save on some allocations
-        out->getOffsets().reserve(rows_num);
-
-        const auto keys_size = dict_struct.key->size();
-        StringRefs keys(keys_size);
-        Arena temporary_keys_pool;
-
-        auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
-
-        auto found_outdated_values = false;
-
-        /// perform optimistic version, fallback to pessimistic if failed
-        {
-            const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
-
-            const auto now = std::chrono::system_clock::now();
-            /// fetch up-to-date values, discard on fail
-            for (const auto row : ext::range(0, rows_num))
-            {
-                const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
-                SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
-                const auto find_result = findCellIdx(key, now);
-
-                if (!find_result.valid)
-                {
-                    found_outdated_values = true;
-                    break;
-                }
-                else
-                {
-                    const auto & cell_idx = find_result.cell_idx;
-                    const auto & cell = cells[cell_idx];
-                    const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
-                    out->insertData(string_ref.data, string_ref.size);
-                }
-            }
-        }
-
-        /// optimistic code completed successfully
-        if (!found_outdated_values)
-        {
-            query_count.fetch_add(rows_num, std::memory_order_relaxed);
-            hit_count.fetch_add(rows_num, std::memory_order_release);
-            return;
-        }
-
-        /// now onto the pessimistic one, discard possible partial results from the optimistic path
-        out->getChars().resize_assume_reserved(0);
-        out->getOffsets().resize_assume_reserved(0);
-
-        /// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
-        MapType<std::vector<size_t>> outdated_keys;
-        /// we are going to store every string separately
-        MapType<StringRef> map;
-        PODArray<StringRef> keys_array(rows_num);
-
-        size_t total_length = 0;
-        size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
-        {
-            const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
-
-            const auto now = std::chrono::system_clock::now();
-            for (const auto row : ext::range(0, rows_num))
-            {
-                const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
-                keys_array[row] = key;
-                const auto find_result = findCellIdx(key, now);
-
-                if (!find_result.valid)
-                {
-                    outdated_keys[key].push_back(row);
-                    if (find_result.outdated)
-                        ++cache_expired;
-                    else
-                        ++cache_not_found;
-                }
-                else
-                {
-                    ++cache_hit;
-                    const auto & cell_idx = find_result.cell_idx;
-                    const auto & cell = cells[cell_idx];
-                    const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
-
-                    if (!cell.isDefault())
-                        map[key] = copyIntoArena(string_ref, temporary_keys_pool);
-
-                    total_length += string_ref.size + 1;
-                }
-            }
-        }
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
-
-        query_count.fetch_add(rows_num, std::memory_order_relaxed);
-        hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release);
-
-        /// request new values
-        if (!outdated_keys.empty())
-        {
-            std::vector<size_t> required_rows(outdated_keys.size());
-            std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
-            {
-                return pair.getMapped().front();
-            });
-
-            update(
-                key_columns,
-                keys_array,
-                required_rows,
-                [&](const StringRef key, const size_t cell_idx)
-                {
-                    const StringRef attribute_value = attribute_array[cell_idx];
-
-                    /// We must copy key and value to own memory, because it may be replaced with another
-                    ///  in next iterations of inner loop of update.
-                    const StringRef copied_key = copyIntoArena(key, temporary_keys_pool);
-                    const StringRef copied_value = copyIntoArena(attribute_value, temporary_keys_pool);
-
-                    map[copied_key] = copied_value;
-                    total_length += (attribute_value.size + 1) * outdated_keys[key].size();
-                },
-                [&](const StringRef key, const size_t)
-                {
-                    for (const auto row : outdated_keys[key])
-                        total_length += get_default(row).size + 1;
-                });
-        }
-
-        out->getChars().reserve(total_length);
-
-        for (const auto row : ext::range(0, ext::size(keys_array)))
-        {
-            const StringRef key = keys_array[row];
-            const auto it = map.find(key);
-            const auto string_ref = it ? it->getMapped() : get_default(row);
-            out->insertData(string_ref.data, string_ref.size);
-        }
-    }
+    void getItemsString(Attribute & attribute, const Columns & key_columns, ColumnString * out, DefaultGetter && get_default) const;
 
     template <typename PresentKeyHandler, typename AbsentKeyHandler>
     void update(
@@ -403,152 +188,7 @@ private:
         const PODArray<StringRef> & in_keys,
         const std::vector<size_t> & in_requested_rows,
         PresentKeyHandler && on_cell_updated,
-        AbsentKeyHandler && on_key_not_found) const
-    {
-        MapType<bool> remaining_keys{in_requested_rows.size()};
-        for (const auto row : in_requested_rows)
-            remaining_keys.insert({in_keys[row], false});
-
-        std::uniform_int_distribution<UInt64> distribution(dict_lifetime.min_sec, dict_lifetime.max_sec);
-
-        const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
-        {
-            Stopwatch watch;
-            auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows);
-            stream->readPrefix();
-
-            const auto keys_size = dict_struct.key->size();
-            StringRefs keys(keys_size);
-
-            const auto attributes_size = attributes.size();
-            const auto now = std::chrono::system_clock::now();
-
-            while (const auto block = stream->read())
-            {
-                /// cache column pointers
-                const auto key_columns = ext::map<Columns>(
-                    ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
-
-                const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
-                {
-                    return block.safeGetByPosition(keys_size + attribute_idx).column;
-                });
-
-                const auto rows_num = block.rows();
-
-                for (const auto row : ext::range(0, rows_num))
-                {
-                    auto key = allocKey(row, key_columns, keys);
-                    const auto hash = StringRefHash{}(key);
-                    const auto find_result = findCellIdx(key, now, hash);
-                    const auto & cell_idx = find_result.cell_idx;
-                    auto & cell = cells[cell_idx];
-
-                    for (const auto attribute_idx : ext::range(0, attributes.size()))
-                    {
-                        const auto & attribute_column = *attribute_columns[attribute_idx];
-                        auto & attribute = attributes[attribute_idx];
-
-                        setAttributeValue(attribute, cell_idx, attribute_column[row]);
-                    }
-
-                    /// if cell id is zero and zero does not map to this cell, then the cell is unused
-                    if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
-                        element_count.fetch_add(1, std::memory_order_relaxed);
-
-                    /// handle memory allocated for old key
-                    if (key == cell.key)
-                    {
-                        freeKey(key);
-                        key = cell.key;
-                    }
-                    else
-                    {
-                        /// new key is different from the old one
-                        if (cell.key.data)
-                            freeKey(cell.key);
-
-                        cell.key = key;
-                    }
-
-                    cell.hash = hash;
-
-                    if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
-                        cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
-                    else
-                        cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
-
-                    /// inform caller
-                    on_cell_updated(key, cell_idx);
-                    /// mark corresponding id as found
-                    remaining_keys[key] = true;
-                }
-            }
-
-            stream->readSuffix();
-
-            ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size());
-            ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed());
-        }
-
-        size_t found_num = 0;
-        size_t not_found_num = 0;
-
-        const auto now = std::chrono::system_clock::now();
-
-        /// Check which ids have not been found and require setting null_value
-        for (const auto & key_found_pair : remaining_keys)
-        {
-            if (key_found_pair.getMapped())
-            {
-                ++found_num;
-                continue;
-            }
-
-            ++not_found_num;
-
-            auto key = key_found_pair.getKey();
-            const auto hash = StringRefHash{}(key);
-            const auto find_result = findCellIdx(key, now, hash);
-            const auto & cell_idx = find_result.cell_idx;
-            auto & cell = cells[cell_idx];
-
-            /// Set null_value for each attribute
-            for (auto & attribute : attributes)
-                setDefaultAttributeValue(attribute, cell_idx);
-
-            /// Check if cell had not been occupied before and increment element counter if it hadn't
-            if (cell.key == StringRef{} && cell_idx != zero_cell_idx)
-                element_count.fetch_add(1, std::memory_order_relaxed);
-
-            if (key == cell.key)
-                key = cell.key;
-            else
-            {
-                if (cell.key.data)
-                    freeKey(cell.key);
-
-                /// copy key from temporary pool
-                key = copyKey(key);
-                cell.key = key;
-            }
-
-            cell.hash = hash;
-
-            if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0)
-                cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)});
-            else
-                cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
-
-            cell.setDefault();
-
-            /// inform caller that the cell has not been found
-            on_key_not_found(key, cell_idx);
-        }
-
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num);
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);
-    }
+        AbsentKeyHandler && on_key_not_found) const;
 
     UInt64 getCellIdx(const StringRef key) const;
 
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.cpp b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
index cfc01fee42a..35ca970889f 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
@@ -82,7 +82,7 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                             {
                                 (*vec_null_map_to)[row] = is_null;
                             }
- 
+
                             const auto ref = StringRef{value};
                             out->insertData(ref.data, ref.size);
                         },
@@ -105,7 +105,7 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                             {
                                 (*vec_null_map_to)[row] = is_null;
                             }
- 
+
                             const auto ref = StringRef{value};
                             out->insertData(ref.data, ref.size);
                         },
@@ -126,7 +126,7 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                         if (attribute.is_nullable)
                         {
                             (*vec_null_map_to)[row] = is_null;
-                        } 
+                        }
 
                         const auto ref = StringRef{value};
                         out->insertData(ref.data, ref.size);
@@ -167,7 +167,7 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                             {
                                 (*vec_null_map_to)[row] = is_null;
                             }
- 
+
                             out[row] = value;
                         },
                         [&](const size_t row) { return default_col->getData()[row]; }
@@ -203,11 +203,11 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
                     attribute,
                     key_columns,
                     [&](const size_t row, const auto value, bool is_null)
-                    { 
+                    {
                         if (attribute.is_nullable)
                         {
                             (*vec_null_map_to)[row] = is_null;
-                        } 
+                        }
 
                         out[row] = value;
                     },
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.h b/src/Dictionaries/ComplexKeyDirectDictionary.h
index ddc72483dcb..fddf34694ba 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.h
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.h
@@ -81,7 +81,7 @@ private:
     {
         AttributeUnderlyingType type;
         bool is_nullable;
-        
+
         std::variant<
             UInt8,
             UInt16,
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index 787ba459123..85540a8a7f3 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -176,12 +176,12 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
                         attribute,
                         key_columns,
                         [&](const size_t row, const auto value, bool is_null)
-                        { 
+                        {
                             if (attribute.is_nullable)
                             {
                                 (*vec_null_map_to)[row] = is_null;
                             }
-                            
+
                             out[row] = value;
                         },
                         [&](const size_t) { return def; }
@@ -501,7 +501,7 @@ void ComplexKeyHashedDictionary::getItemsImpl(
 
         if (it)
         {
-            set_value(i, static_cast<OutputType>(it->getMapped()), false); 
+            set_value(i, static_cast<OutputType>(it->getMapped()), false);
         }
         else
         {
@@ -531,7 +531,7 @@ template <>
 bool ComplexKeyHashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const StringRef key, const String value)
 {
     const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
-    return setAttributeValueImpl<StringRef>(attribute, key, string_in_arena);
+    return setAttributeValueImpl<StringRef>(attribute, key, StringRef{string_in_arena, value.size()});
 }
 
 bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const StringRef key, const Field & value)
diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index 830cbc9967d..15020d11b0d 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -179,7 +179,7 @@ ColumnPtr DirectDictionary::getColumn(
                             {
                                 (*vec_null_map_to)[row] = is_null;
                             }
- 
+
                             const auto ref = StringRef{value};
                             out->insertData(ref.data, ref.size);
                         },
@@ -202,7 +202,7 @@ ColumnPtr DirectDictionary::getColumn(
                             {
                                 (*vec_null_map_to)[row] = is_null;
                             }
- 
+
                             const auto ref = StringRef{value};
                             out->insertData(ref.data, ref.size);
                         },
@@ -210,7 +210,7 @@ ColumnPtr DirectDictionary::getColumn(
                 }
                 else
                     throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
- 
+
             }
             else
             {
@@ -265,8 +265,8 @@ ColumnPtr DirectDictionary::getColumn(
                             {
                                 (*vec_null_map_to)[row] = is_null;
                             }
- 
-                            out[row] = value; 
+
+                            out[row] = value;
                         },
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
@@ -284,8 +284,8 @@ ColumnPtr DirectDictionary::getColumn(
                             {
                                 (*vec_null_map_to)[row] = is_null;
                             }
- 
-                            out[row] = value; 
+
+                            out[row] = value;
                         },
                         [&](const size_t) { return def; }
                     );
@@ -306,8 +306,8 @@ ColumnPtr DirectDictionary::getColumn(
                         {
                             (*vec_null_map_to)[row] = is_null;
                         }
- 
-                        out[row] = value; 
+
+                        out[row] = value;
                     },
                     [&](const size_t) { return null_value; }
                 );
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index d13e1b11b17..c32dc4492a0 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -608,6 +608,7 @@ void HashedDictionary::getItemsAttrImpl(
 
     query_count.fetch_add(rows, std::memory_order_relaxed);
 }
+
 template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
 void HashedDictionary::getItemsImpl(
     const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
@@ -637,7 +638,7 @@ template <>
 bool HashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String value)
 {
     const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
-    return setAttributeValueImpl<StringRef>(attribute, id, string_in_arena);
+    return setAttributeValueImpl<StringRef>(attribute, id, StringRef{string_in_arena, value.size()});
 }
 
 bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp
index d1ce99b6c3a..de36419ffba 100644
--- a/src/Dictionaries/RangeHashedDictionary.cpp
+++ b/src/Dictionaries/RangeHashedDictionary.cpp
@@ -223,7 +223,7 @@ ColumnPtr RangeHashedDictionary::getColumn(
                                 (*vec_null_map_to)[row] = is_null;
                             }
 
-                            out[row] = value; 
+                            out[row] = value;
                         },
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
@@ -242,7 +242,7 @@ ColumnPtr RangeHashedDictionary::getColumn(
                                 (*vec_null_map_to)[row] = is_null;
                             }
 
-                            out[row] = value; 
+                            out[row] = value;
                         },
                         [&](const size_t) { return def; }
                     );
@@ -478,7 +478,7 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
 {
     using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>;
     auto & map = *std::get<Ptr<ValueType>>(attribute.maps);
-    
+
     Value<ValueType> value_to_insert;
 
     if (attribute.is_nullable && value.isNull())
diff --git a/src/Formats/MySQLBlockInputStream.cpp b/src/Formats/MySQLBlockInputStream.cpp
index 9b4e37df353..87df0c1f4b1 100644
--- a/src/Formats/MySQLBlockInputStream.cpp
+++ b/src/Formats/MySQLBlockInputStream.cpp
@@ -70,7 +70,6 @@ namespace
                 assert_cast<ColumnUInt32 &>(column).insertValue(value.getUInt());
                 break;
             case ValueType::vtUInt64:
-                std::cerr << "Insert uint64 " << value.getUInt() << std::endl;
                 assert_cast<ColumnUInt64 &>(column).insertValue(value.getUInt());
                 break;
             case ValueType::vtInt8:
@@ -92,7 +91,6 @@ namespace
                 assert_cast<ColumnFloat64 &>(column).insertValue(value.getDouble());
                 break;
             case ValueType::vtString:
-                std::cerr << "Insert string " << std::string(value.data(), value.size()) << std::endl;
                 assert_cast<ColumnString &>(column).insertData(value.data(), value.size());
                 break;
             case ValueType::vtDate:

From 3d0778cba49913a94be5be45318a388b73cf096d Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sat, 9 Jan 2021 00:02:47 +0300
Subject: [PATCH 0238/1238] FunctionsExternalDictionaries refactored

---
 .../FunctionsExternalDictionaries.cpp         |   1 -
 src/Functions/FunctionsExternalDictionaries.h | 497 +++++-------------
 2 files changed, 134 insertions(+), 364 deletions(-)

diff --git a/src/Functions/FunctionsExternalDictionaries.cpp b/src/Functions/FunctionsExternalDictionaries.cpp
index 1bd900f3cbe..f037a3bd808 100644
--- a/src/Functions/FunctionsExternalDictionaries.cpp
+++ b/src/Functions/FunctionsExternalDictionaries.cpp
@@ -40,7 +40,6 @@ void registerFunctionsExternalDictionaries(FunctionFactory & factory)
     factory.registerFunction<FunctionDictGetStringOrDefault>();
     factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::get>>();
     factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrDefault>>();
-    factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::getOrNull>>();
 }
 
 }
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 0734754289c..11adf63a4d9 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -9,6 +9,7 @@
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeLowCardinality.h>
 
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
@@ -118,6 +119,20 @@ public:
         return getDictionary(dict_name_col->getValue<String>())->isInjective(attr_name_col->getValue<String>());
     }
 
+    DictionaryAttribute getDictionaryAttribute(std::shared_ptr<const IDictionaryBase> dictionary, const String& attribute_name) const
+    {
+        const DictionaryStructure & structure = dictionary->getStructure();
+
+        auto find_iter = std::find_if(structure.attributes.begin(), structure.attributes.end(), [&](const auto &attribute)
+        {
+            return attribute.name == attribute_name;
+        });
+
+        if (find_iter == structure.attributes.end())
+            throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
+
+        return *find_iter;
+    }
 private:
     const Context & context;
     const ExternalDictionariesLoader & external_loader;
@@ -179,15 +194,24 @@ private:
 
         const ColumnWithTypeAndName & key_column_with_type = arguments[1];
         const auto key_column = key_column_with_type.column;
+        const auto key_column_type = WhichDataType(key_column_with_type.type);
 
         if (dictionary_identifier_type == DictionaryIdentifierType::simple)
         {
+            if (!key_column_type.isUInt64())
+                throw Exception{"Second argument of function " + getName() + " must be " + dictionary->getStructure().getKeyDescription(),
+                    ErrorCodes::TYPE_MISMATCH};
+
             return dictionary->has({key_column}, {std::make_shared<DataTypeUInt64>()});
         }
         else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
         {
+            if (!key_column_type.isTuple())
+                throw Exception{"Second argument of function " + getName() + " must be " + dictionary->getStructure().getKeyDescription(),
+                    ErrorCodes::TYPE_MISMATCH};
+
             /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
-            ColumnPtr key_column_full = key_column_with_type.column->convertToFullColumnIfConst();
+            ColumnPtr key_column_full = key_column->convertToFullColumnIfConst();
 
             const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_column_full).getColumnsCopy();
             const auto & key_types = static_cast<const DataTypeTuple &>(*key_column_with_type.type).getElements();
@@ -195,10 +219,7 @@ private:
             return dictionary->has(key_columns, key_types);
         }
         else
-        {
-            /// TODO: Add support for range
-            return nullptr;
-        }
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Has not supported for range dictionary", dictionary->getDictionaryID().getNameForLogs());
     }
 
     mutable FunctionDictHelper helper;
@@ -207,82 +228,66 @@ private:
 enum class DictionaryGetFunctionType
 {
     get,
-    getOrDefault,
-    getOrNull
+    getOrDefault
 };
 
-template <typename DataType, typename Name, DictionaryGetFunctionType dictionary_get_function_type>
-class FunctionDictGetImpl final : public IFunction
+/// This variant of function derives the result type automatically.
+template <DictionaryGetFunctionType dictionary_get_function_type>
+class FunctionDictGetNoType final : public IFunction
 {
-    using Type = typename DataType::FieldType;
-
 public:
-    static constexpr auto name = Name::name;
+    static constexpr auto name = dictionary_get_function_type == DictionaryGetFunctionType::get ? "dictGet" : "dictGetOrDefault";
 
-    static FunctionPtr create(const Context & context, UInt32 dec_scale = 0)
+    static FunctionPtr create(const Context & context)
     {
-        return std::make_shared<FunctionDictGetImpl>(context, dec_scale);
+        return std::make_shared<FunctionDictGetNoType>(context);
     }
 
-    explicit FunctionDictGetImpl(const Context & context_, UInt32 dec_scale = 0)
-        : helper(context_)
-        , decimal_scale(dec_scale)
-    {}
+    explicit FunctionDictGetNoType(const Context & context_) : helper(context_) {}
 
     String getName() const override { return name; }
 
-private:
+    bool isVariadic() const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
 
-    bool isVariadic() const override { return true; }
-
     bool useDefaultImplementationForConstants() const final { return true; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
 
     bool isDeterministic() const override { return false; }
 
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes &) const override
+    bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override
     {
-        DataTypePtr result;
-        /// TODO: Decimal will not work properly during FunctionDictGetImpl call decimal_scale will not be iniitalized.
-        if constexpr (IsDataTypeDecimal<DataType>)
-            result = std::make_shared<DataType>(DataType::maxPrecision(), decimal_scale);
-        else
-            result = std::make_shared<DataType>();
-
-        if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::getOrNull)
-        {
-            result = std::make_shared<DataTypeNullable>(result);
-        }
-
-        return result;
+        return helper.isDictGetFunctionInjective(sample_columns);
     }
 
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
         if (arguments.size() < 3)
             throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
 
+        String dictionary_name;
+        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
+            dictionary_name = name_col->getValue<String>();
+        else
+            throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
+                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+
+        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
+            attribute_name = name_col->getValue<String>();
+        else
+            throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName()
+                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+
+        dictionary = helper.getDictionary(dictionary_name);
+
+        return helper.getDictionaryAttribute(dictionary, attribute_name).type;
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
+    {
         if (input_rows_count == 0)
             return result_type->createColumn();
 
-        const auto * dictionary_name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
-        if (!dictionary_name_col)
-            throw Exception{"First argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String dictionary_name = dictionary_name_col->getValue<String>();
-
-        auto dictionary = helper.getDictionary(dictionary_name);
-        if (!dictionary)
-            throw Exception("First argument of function " + getName() + " does not name a dictionary", ErrorCodes::ILLEGAL_COLUMN);
-
-        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-        if (!attr_name_col)
-            throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
-
-        String attr_name = attr_name_col->getValue<String>();
-
         /// TODO: Use accurateCast if argument is integer
         if (!WhichDataType(arguments[2].type).isUInt64() && !isTuple(arguments[2].type))
             throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function "
@@ -319,19 +324,19 @@ private:
         if (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault)
         {
             if (current_arguments_index >= arguments.size())
-                throw Exception{"Wrong argument count for function test " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+                throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
 
             default_col = arguments[current_arguments_index].column;
         }
 
-        ColumnPtr res;
+        ColumnPtr result;
 
         const ColumnWithTypeAndName & key_col_with_type = arguments[2];
         const auto key_column = key_col_with_type.column;
 
         if (dictionary_identifier_type == DictionaryIdentifierType::simple)
         {
-            res = dictionary->getColumn(attr_name, result_type, {key_column}, {std::make_shared<DataTypeUInt64>()}, default_col);
+            result = dictionary->getColumn(attribute_name, result_type, {key_column}, {std::make_shared<DataTypeUInt64>()}, default_col);
         }
         else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
         {
@@ -341,21 +346,88 @@ private:
             const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_column_full).getColumnsCopy();
             const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
 
-            res = dictionary->getColumn(attr_name, result_type, key_columns, key_types, default_col);
+            result = dictionary->getColumn(attribute_name, result_type, key_columns, key_types, default_col);
         }
         else if (dictionary_identifier_type == DictionaryIdentifierType::range)
         {
-            res = dictionary->getColumn(
-                attr_name, result_type, {key_column, range_col}, {std::make_shared<DataTypeUInt64>(), range_col_type}, default_col);
+            result = dictionary->getColumn(
+                attribute_name, result_type, {key_column, range_col}, {std::make_shared<DataTypeUInt64>(), range_col_type}, default_col);
         }
         else
             throw Exception{"Unknown dictionary identifier type", ErrorCodes::BAD_ARGUMENTS};
 
-        return res;
+        return result;
     }
 
+private:
     mutable FunctionDictHelper helper;
-    UInt32 decimal_scale;
+    /// Initialized in getReturnTypeImpl
+    mutable std::shared_ptr<const IDictionaryBase> dictionary;
+    mutable String attribute_name;
+};
+
+template <typename DataType, typename Name, DictionaryGetFunctionType dictionary_get_function_type>
+class FunctionDictGetImpl final : public IFunction
+{
+    using Type = typename DataType::FieldType;
+
+public:
+    static constexpr auto name = Name::name;
+
+    static FunctionPtr create(const Context &context)
+    {
+        return std::make_shared<FunctionDictGetImpl>(context);
+    }
+
+    explicit FunctionDictGetImpl(const Context & context_) : impl(context_) {}
+
+    String getName() const override { return name; }
+
+private:
+    size_t getNumberOfArguments() const override { return 0; }
+
+    bool isVariadic() const override { return true; }
+
+    bool useDefaultImplementationForConstants() const final { return true; }
+
+    bool isDeterministic() const override { return false; }
+
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
+
+    bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override
+    {
+        return impl.isInjective(sample_columns);
+    }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes &) const override
+    {
+        DataTypePtr result;
+        
+        if constexpr (IsDataTypeDecimal<DataType>)
+            result = std::make_shared<DataType>(DataType::maxPrecision(), 0);
+        else
+            result = std::make_shared<DataType>();
+
+        return result;
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
+    {
+        auto return_type = impl.getReturnTypeImpl(arguments);
+
+        if (!areTypesEqual(return_type, result_type))
+            throw Exception{"Dictionary attribute has different type " + return_type->getName() + " expected " + result_type->getName(),
+                    ErrorCodes::TYPE_MISMATCH};
+
+        return impl.executeImpl(arguments, return_type, input_rows_count);
+    }
+
+    static bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs)
+    {
+        return removeNullable(recursiveRemoveLowCardinality(lhs))->equals(*removeNullable(recursiveRemoveLowCardinality(rhs)));
+    }
+
+    const FunctionDictGetNoType<dictionary_get_function_type> impl;
 };
 
 template<typename DataType, typename Name>
@@ -436,307 +508,6 @@ using FunctionDictGetDecimal64OrDefault = FunctionDictGetOrDefault<DataTypeDecim
 using FunctionDictGetDecimal128OrDefault = FunctionDictGetOrDefault<DataTypeDecimal<Decimal128>, NameDictGetDecimal128OrDefault>;
 using FunctionDictGetStringOrDefault = FunctionDictGetOrDefault<DataTypeString, NameDictGetStringOrDefault>;
 
-template<typename DataType, typename Name>
-using FunctionDictGetOrNull = FunctionDictGetImpl<DataType, Name, DictionaryGetFunctionType::getOrNull>;
-
-struct NameDictGetUInt8OrNull { static constexpr auto name = "dictGetUInt8OrNull"; };
-struct NameDictGetUInt16OrNull { static constexpr auto name = "dictGetUInt16OrNull"; };
-struct NameDictGetUInt32OrNull { static constexpr auto name = "dictGetUInt32OrNull"; };
-struct NameDictGetUInt64OrNull { static constexpr auto name = "dictGetUInt64OrNull"; };
-struct NameDictGetInt8OrNull { static constexpr auto name = "dictGetInt8OrNull"; };
-struct NameDictGetInt16OrNull { static constexpr auto name = "dictGetInt16OrNull"; };
-struct NameDictGetInt32OrNull { static constexpr auto name = "dictGetInt32OrNull"; };
-struct NameDictGetInt64OrNull { static constexpr auto name = "dictGetInt64OrNull"; };
-struct NameDictGetFloat32OrNull { static constexpr auto name = "dictGetFloat32OrNull"; };
-struct NameDictGetFloat64OrNull { static constexpr auto name = "dictGetFloat64OrNull"; };
-struct NameDictGetDateOrNull { static constexpr auto name = "dictGetDateOrNull"; };
-struct NameDictGetDateTimeOrNull { static constexpr auto name = "dictGetDateTimeOrNull"; };
-struct NameDictGetUUIDOrNull { static constexpr auto name = "dictGetUUIDOrNull"; };
-struct NameDictGetDecimal32OrNull { static constexpr auto name = "dictGetDecimal32OrNull"; };
-struct NameDictGetDecimal64OrNull { static constexpr auto name = "dictGetDecimal64OrNull"; };
-struct NameDictGetDecimal128OrNull { static constexpr auto name = "dictGetDecimal128OrNull"; };
-struct NameDictGetStringOrNull { static constexpr auto name = "dictGetStringOrNull"; };
-
-using FunctionDictGetUInt8OrNull = FunctionDictGetOrNull<DataTypeUInt8, NameDictGetUInt8OrNull>;
-using FunctionDictGetUInt16OrNull = FunctionDictGetOrNull<DataTypeUInt16, NameDictGetUInt16OrNull>;
-using FunctionDictGetUInt32OrNull = FunctionDictGetOrNull<DataTypeUInt32, NameDictGetUInt32OrNull>;
-using FunctionDictGetUInt64OrNull = FunctionDictGetOrNull<DataTypeUInt64, NameDictGetUInt64OrNull>;
-using FunctionDictGetInt8OrNull = FunctionDictGetOrNull<DataTypeInt8, NameDictGetInt8OrNull>;
-using FunctionDictGetInt16OrNull = FunctionDictGetOrNull<DataTypeInt16, NameDictGetInt16OrNull>;
-using FunctionDictGetInt32OrNull = FunctionDictGetOrNull<DataTypeInt32, NameDictGetInt32OrNull>;
-using FunctionDictGetInt64OrNull = FunctionDictGetOrNull<DataTypeInt64, NameDictGetInt64OrNull>;
-using FunctionDictGetFloat32OrNull = FunctionDictGetOrNull<DataTypeFloat32, NameDictGetFloat32OrNull>;
-using FunctionDictGetFloat64OrNull = FunctionDictGetOrNull<DataTypeFloat64, NameDictGetFloat64OrNull>;
-using FunctionDictGetDateOrNull = FunctionDictGetOrNull<DataTypeDate, NameDictGetDateOrNull>;
-using FunctionDictGetDateTimeOrNull = FunctionDictGetOrNull<DataTypeDateTime, NameDictGetDateTimeOrNull>;
-using FunctionDictGetUUIDOrNull = FunctionDictGetOrNull<DataTypeUUID, NameDictGetUUIDOrNull>;
-using FunctionDictGetDecimal32OrNull = FunctionDictGetOrNull<DataTypeDecimal<Decimal32>, NameDictGetDecimal32OrNull>;
-using FunctionDictGetDecimal64OrNull = FunctionDictGetOrNull<DataTypeDecimal<Decimal64>, NameDictGetDecimal64OrNull>;
-using FunctionDictGetDecimal128OrNull = FunctionDictGetOrNull<DataTypeDecimal<Decimal128>, NameDictGetDecimal128OrNull>;
-using FunctionDictGetStringOrNull = FunctionDictGetOrNull<DataTypeString, NameDictGetStringOrNull>;
-
-/// TODO: Use new API
-/// This variant of function derives the result type automatically.
-template <DictionaryGetFunctionType dictionary_get_function_type>
-class FunctionDictGetNoType final : public IFunction
-{
-public:
-    static constexpr auto name = dictionary_get_function_type == DictionaryGetFunctionType::get ? "dictGet"
-        : (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault ? "dictGetOrDefault" : "dictGetOrNull");
-
-    static FunctionPtr create(const Context & context)
-    {
-        return std::make_shared<FunctionDictGetNoType>(context);
-    }
-
-    explicit FunctionDictGetNoType(const Context & context_) : context(context_), helper(context_) {}
-
-    String getName() const override { return name; }
-
-private:
-    bool isVariadic() const override { return true; }
-    size_t getNumberOfArguments() const override { return 0; }
-
-    bool useDefaultImplementationForConstants() const final { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
-
-    bool isDeterministic() const override { return false; }
-
-    bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override
-    {
-        return helper.isDictGetFunctionInjective(sample_columns);
-    }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        if (arguments.size() < 3)
-            throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
-
-        String dict_name;
-        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
-        {
-            dict_name = name_col->getValue<String>();
-        }
-        else
-            throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
-                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        String attr_name;
-        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
-        {
-            attr_name = name_col->getValue<String>();
-        }
-        else
-            throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName()
-                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
-
-        auto dict = helper.getDictionary(dict_name);
-        const DictionaryStructure & structure = dict->getStructure();
-
-        for (const auto& attribute : structure.attributes)
-        {
-            if (attribute.name != attr_name)
-            {
-                continue;
-            }
-
-            WhichDataType dt = removeNullable(attribute.type);
-
-            if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::get)
-            {
-                switch (dt.idx)
-                {
-                    case TypeIndex::String:
-                    case TypeIndex::FixedString:
-                        impl = FunctionDictGetString::create(context);
-                        break;
-                    case TypeIndex::UInt8:
-                        impl = FunctionDictGetUInt8::create(context);
-                        break;
-                    case TypeIndex::UInt16:
-                        impl = FunctionDictGetUInt16::create(context);
-                        break;
-                    case TypeIndex::UInt32:
-                        impl = FunctionDictGetUInt32::create(context);
-                        break;
-                    case TypeIndex::UInt64:
-                        impl = FunctionDictGetUInt64::create(context);
-                        break;
-                    case TypeIndex::Int8:
-                        impl = FunctionDictGetInt8::create(context);
-                        break;
-                    case TypeIndex::Int16:
-                        impl = FunctionDictGetInt16::create(context);
-                        break;
-                    case TypeIndex::Int32:
-                        impl = FunctionDictGetInt32::create(context);
-                        break;
-                    case TypeIndex::Int64:
-                        impl = FunctionDictGetInt64::create(context);
-                        break;
-                    case TypeIndex::Float32:
-                        impl = FunctionDictGetFloat32::create(context);
-                        break;
-                    case TypeIndex::Float64:
-                        impl = FunctionDictGetFloat64::create(context);
-                        break;
-                    case TypeIndex::Date:
-                        impl = FunctionDictGetDate::create(context);
-                        break;
-                    case TypeIndex::DateTime:
-                        impl = FunctionDictGetDateTime::create(context);
-                        break;
-                    case TypeIndex::UUID:
-                        impl = FunctionDictGetUUID::create(context);
-                        break;
-                    case TypeIndex::Decimal32:
-                        impl = FunctionDictGetDecimal32::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    case TypeIndex::Decimal64:
-                        impl = FunctionDictGetDecimal64::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    case TypeIndex::Decimal128:
-                        impl = FunctionDictGetDecimal128::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    default:
-                        throw Exception("Unknown dictGet type", ErrorCodes::UNKNOWN_TYPE);
-                }
-            }
-            else if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault)
-            {
-                switch (dt.idx)
-                {
-                    case TypeIndex::String:
-                        impl = FunctionDictGetStringOrDefault::create(context);
-                        break;
-                    case TypeIndex::UInt8:
-                        impl = FunctionDictGetUInt8OrDefault::create(context);
-                        break;
-                    case TypeIndex::UInt16:
-                        impl = FunctionDictGetUInt16OrDefault::create(context);
-                        break;
-                    case TypeIndex::UInt32:
-                        impl = FunctionDictGetUInt32OrDefault::create(context);
-                        break;
-                    case TypeIndex::UInt64:
-                        impl = FunctionDictGetUInt64OrDefault::create(context);
-                        break;
-                    case TypeIndex::Int8:
-                        impl = FunctionDictGetInt8OrDefault::create(context);
-                        break;
-                    case TypeIndex::Int16:
-                        impl = FunctionDictGetInt16OrDefault::create(context);
-                        break;
-                    case TypeIndex::Int32:
-                        impl = FunctionDictGetInt32OrDefault::create(context);
-                        break;
-                    case TypeIndex::Int64:
-                        impl = FunctionDictGetInt64OrDefault::create(context);
-                        break;
-                    case TypeIndex::Float32:
-                        impl = FunctionDictGetFloat32OrDefault::create(context);
-                        break;
-                    case TypeIndex::Float64:
-                        impl = FunctionDictGetFloat64OrDefault::create(context);
-                        break;
-                    case TypeIndex::Date:
-                        impl = FunctionDictGetDateOrDefault::create(context);
-                        break;
-                    case TypeIndex::DateTime:
-                        impl = FunctionDictGetDateTimeOrDefault::create(context);
-                        break;
-                    case TypeIndex::UUID:
-                        impl = FunctionDictGetUUIDOrDefault::create(context);
-                        break;
-                    case TypeIndex::Decimal32:
-                        impl = FunctionDictGetDecimal32OrDefault::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    case TypeIndex::Decimal64:
-                        impl = FunctionDictGetDecimal64OrDefault::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    case TypeIndex::Decimal128:
-                        impl = FunctionDictGetDecimal128OrDefault::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    default:
-                        throw Exception("Unknown dictGetOrDefault type", ErrorCodes::UNKNOWN_TYPE);
-                }
-            }
-            else if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::getOrNull)
-            {
-                switch (dt.idx)
-                {
-                    case TypeIndex::String:
-                        impl = FunctionDictGetStringOrNull::create(context);
-                        break;
-                    case TypeIndex::UInt8:
-                        impl = FunctionDictGetUInt8OrNull::create(context);
-                        break;
-                    case TypeIndex::UInt16:
-                        impl = FunctionDictGetUInt16OrNull::create(context);
-                        break;
-                    case TypeIndex::UInt32:
-                        impl = FunctionDictGetUInt32OrNull::create(context);
-                        break;
-                    case TypeIndex::UInt64:
-                        impl = FunctionDictGetUInt64OrNull::create(context);
-                        break;
-                    case TypeIndex::Int8:
-                        impl = FunctionDictGetInt8OrNull::create(context);
-                        break;
-                    case TypeIndex::Int16:
-                        impl = FunctionDictGetInt16OrNull::create(context);
-                        break;
-                    case TypeIndex::Int32:
-                        impl = FunctionDictGetInt32OrNull::create(context);
-                        break;
-                    case TypeIndex::Int64:
-                        impl = FunctionDictGetInt64OrNull::create(context);
-                        break;
-                    case TypeIndex::Float32:
-                        impl = FunctionDictGetFloat32OrNull::create(context);
-                        break;
-                    case TypeIndex::Float64:
-                        impl = FunctionDictGetFloat64OrNull::create(context);
-                        break;
-                    case TypeIndex::Date:
-                        impl = FunctionDictGetDateOrNull::create(context);
-                        break;
-                    case TypeIndex::DateTime:
-                        impl = FunctionDictGetDateTimeOrNull::create(context);
-                        break;
-                    case TypeIndex::UUID:
-                        impl = FunctionDictGetUUIDOrNull::create(context);
-                        break;
-                    case TypeIndex::Decimal32:
-                        impl = FunctionDictGetDecimal32OrNull::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    case TypeIndex::Decimal64:
-                        impl = FunctionDictGetDecimal64OrNull::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    case TypeIndex::Decimal128:
-                        impl = FunctionDictGetDecimal128OrNull::create(context, getDecimalScale(*attribute.type));
-                        break;
-                    default:
-                        throw Exception("Unknown dictGetOrNull type", ErrorCodes::UNKNOWN_TYPE);
-                }
-            }
-
-            return attribute.type;
-        }
-
-        throw Exception{"No such attribute '" + attr_name + "'", ErrorCodes::BAD_ARGUMENTS};
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
-    {
-        return impl->executeImpl(arguments, result_type, input_rows_count);
-    }
-
-    const Context & context;
-    mutable FunctionDictHelper helper;
-    mutable FunctionPtr impl; // underlying function used by dictGet function without explicit type info
-};
-
 /// Functions to work with hierarchies.
 
 class FunctionDictGetHierarchy final : public IFunction

From 42d4fc1bbd8c926b23c8d18f888467928104495b Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sun, 10 Jan 2021 00:23:59 +0300
Subject: [PATCH 0239/1238] Updated FlatDictionary

---
 src/Dictionaries/FlatDictionary.cpp | 11 ++++++-----
 src/Dictionaries/IDictionary.h      |  3 ---
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 7278808f773..0b19b117fec 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -512,7 +512,6 @@ void FlatDictionary::resize(Attribute & attribute, const Key id)
 template <typename T>
 void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const T & value)
 {
-    resize<T>(attribute, id);
     auto & array = std::get<ContainerType<T>>(attribute.arrays);
     array[id] = value;
     loaded_ids[id] = true;
@@ -521,11 +520,8 @@ void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const Key id,
 template <>
 void FlatDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String & value)
 {
-    resize<StringRef>(attribute, id);
     const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
-    auto & array = std::get<ContainerType<StringRef>>(attribute.arrays);
-    array[id] = StringRef{string_in_arena, value.size()};
-    loaded_ids[id] = true;
+    setAttributeValueImpl(attribute, id, StringRef{string_in_arena, value.size()});
 }
 
 void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
@@ -534,6 +530,9 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, cons
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        using ResizeType = std::conditional_t<std::is_same_v<AttributeType, String>, StringRef, AttributeType>;
+
+        resize<ResizeType>(attribute, id);
 
         if (attribute.is_nullable)
         {
@@ -570,6 +569,8 @@ PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
     const auto ids_count = ext::size(loaded_ids);
 
     PaddedPODArray<Key> ids;
+    ids.reserve(ids_count);
+
     for (auto idx : ext::range(0, ids_count))
         if (loaded_ids[idx])
             ids.push_back(idx);
diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h
index 61f249bad44..0de251ff8b4 100644
--- a/src/Dictionaries/IDictionary.h
+++ b/src/Dictionaries/IDictionary.h
@@ -28,9 +28,6 @@ namespace ErrorCodes
 struct IDictionaryBase;
 using DictionaryPtr = std::unique_ptr<IDictionaryBase>;
 
-struct DictionaryStructure;
-class ColumnString;
-
 enum class DictionaryIdentifierType
 {
     simple,

From 77d2e00dd88a7a84e6e0b5ff6d74bf64eeb46873 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 11 Jan 2021 00:15:55 +0300
Subject: [PATCH 0240/1238] Updated DictionaryStructure

---
 src/Dictionaries/DictionaryStructure.cpp | 24 ++++++++++++++++++++++--
 src/Dictionaries/DictionaryStructure.h   |  2 ++
 src/Dictionaries/FlatDictionary.cpp      | 21 ++++++++++++---------
 src/Dictionaries/FlatDictionary.h        |  7 ++++---
 4 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index df785bcb550..12c33cbb58d 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -379,11 +379,31 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
 
         has_hierarchy = has_hierarchy || hierarchical;
 
-        res_attributes.emplace_back(
-            DictionaryAttribute{name, underlying_type, initial_type, expression, null_value, hierarchical, injective, is_object_id, is_nullable, is_array});
+        res_attributes.emplace_back(DictionaryAttribute{
+            name,
+            underlying_type,
+            initial_type,
+            type,
+            expression,
+            null_value,
+            hierarchical,
+            injective,
+            is_object_id,
+            is_nullable,
+            is_array});
     }
 
     return res_attributes;
 }
 
+const DictionaryAttribute & DictionaryStructure::getAttribute(const String& attribute_name) const
+{
+    auto find_iter
+        = std::find_if(attributes.begin(), attributes.end(), [&](const auto & attribute) { return attribute.name == attribute_name; });
+
+    if (find_iter == attributes.end())
+        throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
+
+    return *find_iter;
+}
 }
diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h
index db5c3fa0103..5e7964e1c6f 100644
--- a/src/Dictionaries/DictionaryStructure.h
+++ b/src/Dictionaries/DictionaryStructure.h
@@ -56,6 +56,7 @@ struct DictionaryAttribute final
     const std::string name;
     const AttributeUnderlyingType underlying_type;
     const DataTypePtr type;
+    const DataTypePtr nested_type;
     const std::string expression;
     const Field null_value;
     const bool hierarchical;
@@ -157,6 +158,7 @@ struct DictionaryStructure final
     bool isKeySizeFixed() const;
     size_t getKeySize() const;
 
+    const DictionaryAttribute &getAttribute(const String& attribute_name) const;
 private:
     /// range_min and range_max have to be parsed before this function call
     std::vector<DictionaryAttribute> getAttributes(
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 0b19b117fec..07dfe498b67 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -1,12 +1,14 @@
 #include "FlatDictionary.h"
+
+#include <Core/Defines.h>
+#include <DataTypes/DataTypesDecimal.h>
 #include <IO/WriteHelpers.h>
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnNullable.h>
 #include <Functions/FunctionHelpers.h>
+
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
-#include <Core/Defines.h>
-
 
 namespace DB
 {
@@ -120,10 +122,11 @@ ColumnPtr FlatDictionary::getColumn(
     auto size = ids.size();
 
     const auto & attribute = getAttribute(attribute_name);
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name);
 
     ColumnUInt8::MutablePtr col_null_map_to;
     ColumnUInt8::Container * vec_null_map_to = nullptr;
-    if (attribute.is_nullable)
+    if (attribute.nullable_set)
     {
         col_null_map_to = ColumnUInt8::create(size, false);
         vec_null_map_to = &col_null_map_to->getData();
@@ -188,8 +191,8 @@ ColumnPtr FlatDictionary::getColumn(
 
             if constexpr (IsDecimalNumber<AttributeType>)
             {
-                // auto scale = getDecimalScale(*attribute.type);
-                column = ColumnDecimal<AttributeType>::create(size, 0);
+                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+                column = ColumnDecimal<AttributeType>::create(size, scale);
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
@@ -239,7 +242,7 @@ ColumnPtr FlatDictionary::getColumn(
 
     callOnDictionaryAttributeType(attribute.type, type_call);
 
-    if (attribute.is_nullable)
+    if (attribute.nullable_set)
     {
         for (size_t row = 0; row < ids.size(); ++row)
         {
@@ -461,8 +464,8 @@ void FlatDictionary::createAttributeImpl<String>(Attribute & attribute, const Fi
 
 FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
 {
-    auto nullable_set = attribute.is_nullable ? std::make_unique<NullableSet>() : nullptr;
-    Attribute attr{attribute.underlying_type, attribute.is_nullable, std::move(nullable_set), {}, {}, {}};
+    auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
+    Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}};
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
@@ -534,7 +537,7 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, cons
 
         resize<ResizeType>(attribute, id);
 
-        if (attribute.is_nullable)
+        if (attribute.nullable_set)
         {
             if (value.isNull())
             {
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index 8de90ca0c15..ee461ba51ee 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -4,15 +4,17 @@
 #include <variant>
 #include <vector>
 #include <optional>
+
 #include <Common/HashTable/HashSet.h>
+#include <Common/Arena.h>
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnArray.h>
 #include <DataTypes/IDataType.h>
-#include <Common/Arena.h>
 #include <Core/Block.h>
 #include <ext/range.h>
 #include <ext/size.h>
+
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
@@ -92,8 +94,7 @@ private:
     struct Attribute final
     {
         AttributeUnderlyingType type;
-        bool is_nullable;
-        std::unique_ptr<NullableSet> nullable_set;
+        std::optional<NullableSet> nullable_set;
 
         std::variant<
             UInt8,

From 498df53ebfba5e450d7c99bf3800393fe4dfadea Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 13 Jan 2021 00:26:32 +0300
Subject: [PATCH 0241/1238] Fixed style issues

---
 src/Dictionaries/SSDCacheDictionary.cpp           | 1 -
 src/Dictionaries/SSDComplexKeyCacheDictionary.cpp | 1 -
 src/Functions/FunctionsExternalDictionaries.h     | 2 +-
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp
index 4cac3ffb94d..c0e1b8c935d 100644
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@@ -22,7 +22,6 @@
 #include <filesystem>
 #include <city.h>
 #include <fcntl.h>
-
 #include <Functions/FunctionHelpers.h>
 
 namespace ProfileEvents
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
index 17c9bd137af..e36ec446845 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
@@ -23,7 +23,6 @@
 #include <filesystem>
 #include <city.h>
 #include <fcntl.h>
-
 #include <Functions/FunctionHelpers.h>
 
 namespace ProfileEvents
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 11adf63a4d9..0a87b4e2c3f 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -402,7 +402,7 @@ private:
     DataTypePtr getReturnTypeImpl(const DataTypes &) const override
     {
         DataTypePtr result;
-        
+
         if constexpr (IsDataTypeDecimal<DataType>)
             result = std::make_shared<DataType>(DataType::maxPrecision(), 0);
         else

From 7cb7d4dbce0a23e20d42fb92a3deb49c170d1ce7 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Thu, 21 Jan 2021 17:42:50 +0300
Subject: [PATCH 0242/1238] Fixed dicitionaries todo

---
 src/Dictionaries/CacheDictionary.cpp          | 11 ++---
 .../ComplexKeyCacheDictionary.cpp             | 12 ++---
 .../ComplexKeyDirectDictionary.cpp            | 11 ++---
 .../ComplexKeyHashedDictionary.cpp            | 11 ++---
 src/Dictionaries/DictionaryStructure.cpp      | 46 +++++++++----------
 src/Dictionaries/DictionaryStructure.h        |  3 +-
 src/Dictionaries/DirectDictionary.cpp         | 12 ++---
 src/Dictionaries/FlatDictionary.cpp           | 25 ++++------
 src/Dictionaries/HashedDictionary.cpp         | 43 +++++++----------
 src/Dictionaries/HashedDictionary.h           |  4 +-
 src/Dictionaries/IPAddressDictionary.cpp      | 11 ++---
 src/Dictionaries/PolygonDictionary.cpp        | 10 ++--
 src/Dictionaries/RangeHashedDictionary.cpp    | 10 ++--
 src/Dictionaries/SSDCacheDictionary.cpp       | 11 ++---
 .../SSDComplexKeyCacheDictionary.cpp          | 15 +++---
 src/Functions/FunctionHelpers.cpp             |  8 ++++
 src/Functions/FunctionHelpers.h               |  4 ++
 src/Functions/FunctionsExternalDictionaries.h |  2 +
 18 files changed, 117 insertions(+), 132 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index 71490897131..d5f3c0b1c4a 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -14,6 +14,7 @@
 #include <ext/range.h>
 #include <ext/size.h>
 #include <Common/setThreadName.h>
+#include <DataTypes/DataTypesDecimal.h>
 #include "CacheDictionary.inc.h"
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
@@ -251,7 +252,7 @@ void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArra
 
 ColumnPtr CacheDictionary::getColumn(
     const std::string & attribute_name,
-    const DataTypePtr &,
+    const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes &,
     const ColumnPtr default_untyped) const
@@ -262,9 +263,7 @@ ColumnPtr CacheDictionary::getColumn(
     const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
 
     auto & attribute = getAttribute(attribute_name);
-
-    /// TODO: Check that attribute type is same as result type
-    /// TODO: Check if const will work as expected
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
@@ -311,8 +310,8 @@ ColumnPtr CacheDictionary::getColumn(
 
             if constexpr (IsDecimalNumber<AttributeType>)
             {
-                // auto scale = getDecimalScale(*attribute.type);
-                column = ColumnDecimal<AttributeType>::create(identifiers_size, 0);
+                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+                column = ColumnDecimal<AttributeType>::create(size, scale);
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(identifiers_size);
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index 60457f81027..d27a8c1483e 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -11,7 +11,7 @@
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
 #include <Functions/FunctionHelpers.h>
-
+#include <DataTypes/DataTypesDecimal.h>
 
 namespace ProfileEvents
 {
@@ -73,7 +73,7 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(
 
 ColumnPtr ComplexKeyCacheDictionary::getColumn(
     const std::string & attribute_name,
-    const DataTypePtr &,
+    const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
     const ColumnPtr default_untyped) const
@@ -83,9 +83,7 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
     ColumnPtr result;
 
     auto & attribute = getAttribute(attribute_name);
-
-    /// TODO: Check that attribute type is same as result type
-    /// TODO: Check if const will work as expected
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
     auto keys_size = key_columns.front()->size();
 
@@ -133,8 +131,8 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
 
             if constexpr (IsDecimalNumber<AttributeType>)
             {
-                // auto scale = getDecimalScale(*attribute.type);
-                column = ColumnDecimal<AttributeType>::create(keys_size, 0);
+                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+                column = ColumnDecimal<AttributeType>::create(size, scale);
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(keys_size);
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.cpp b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
index 35ca970889f..e719c247279 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
@@ -5,6 +5,7 @@
 #include <Core/Defines.h>
 #include <Columns/ColumnNullable.h>
 #include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypesDecimal.h>
 
 namespace DB
 {
@@ -35,7 +36,7 @@ ComplexKeyDirectDictionary::ComplexKeyDirectDictionary(
 
 ColumnPtr ComplexKeyDirectDictionary::getColumn(
     const std::string & attribute_name,
-    const DataTypePtr &,
+    const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
     const ColumnPtr default_untyped) const
@@ -45,9 +46,7 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
     ColumnPtr result;
 
     const auto & attribute = getAttribute(attribute_name);
-
-    /// TODO: Check that attribute type is same as result type
-    /// TODO: Check if const will work as expected
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
     auto size = key_columns.front()->size();
 
@@ -146,8 +145,8 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
 
             if constexpr (IsDecimalNumber<AttributeType>)
             {
-                // auto scale = getDecimalScale(*attribute.type);
-                column = ColumnDecimal<AttributeType>::create(size, 0);
+                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+                column = ColumnDecimal<AttributeType>::create(size, scale);
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index 85540a8a7f3..3d48638a1b2 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -4,6 +4,7 @@
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnNullable.h>
 #include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypesDecimal.h>
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
 
@@ -37,7 +38,7 @@ ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(
 
 ColumnPtr ComplexKeyHashedDictionary::getColumn(
     const std::string & attribute_name,
-    const DataTypePtr &,
+    const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
     const ColumnPtr default_untyped) const
@@ -47,9 +48,7 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
     ColumnPtr result;
 
     const auto & attribute = getAttribute(attribute_name);
-
-    /// TODO: Check that attribute type is same as result type
-    /// TODO: Check if const will work as expected
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
     auto size = key_columns.front()->size();
 
@@ -141,8 +140,8 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
 
             if constexpr (IsDecimalNumber<AttributeType>)
             {
-                // auto scale = getDecimalScale(*attribute.type);
-                column = ColumnDecimal<AttributeType>::create(size, 0);
+                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+                column = ColumnDecimal<AttributeType>::create(size, scale);
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index 12c33cbb58d..5c6863da651 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -208,16 +208,32 @@ void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
 
     for (const auto i : ext::range(0, key_types.size()))
     {
-        const auto & expected_type = (*key)[i].type->getName();
-        const auto & actual_type = key_types[i]->getName();
+        const auto & expected_type = (*key)[i].type;
+        const auto & actual_type = key_types[i];
 
-        if (expected_type != actual_type)
-            throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + ", found "
-                                + actual_type,
-                            ErrorCodes::TYPE_MISMATCH};
+        if (!areTypesEqual(expected_type, actual_type))
+            throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type->getName() + ", found "
+                    + actual_type->getName(),
+                ErrorCodes::TYPE_MISMATCH};
     }
 }
 
+const DictionaryAttribute & DictionaryStructure::getAttribute(const String& attribute_name, const DataTypePtr & type) const
+{
+    auto find_iter
+        = std::find_if(attributes.begin(), attributes.end(), [&](const auto & attribute) { return attribute.name == attribute_name; });
+
+    if (find_iter == attributes.end())
+        throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
+
+    auto & attribute = *find_iter;
+
+    if (!areTypesEqual(attribute.type, type))
+        throw Exception{"Attribute type does not match, expected " + attribute.type->getName() + ", found " + type->getName(),
+            ErrorCodes::TYPE_MISMATCH};
+
+    return *find_iter;
+}
 
 std::string DictionaryStructure::getKeyDescription() const
 {
@@ -312,20 +328,12 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
             continue;
 
 
-        /// TODO: Rewrite
         const auto type_string = config.getString(prefix + "type");
         const auto initial_type = DataTypeFactory::instance().get(type_string);
         auto type = initial_type;
         bool is_array = false;
         bool is_nullable = false;
 
-        // const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(type.get());
-        // if (array_type)
-        // {
-        //     is_array = true;
-        //     type = array_type->getNestedType();
-        // }
-
         if (type->isNullable())
         {
             is_nullable = true;
@@ -396,14 +404,4 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
     return res_attributes;
 }
 
-const DictionaryAttribute & DictionaryStructure::getAttribute(const String& attribute_name) const
-{
-    auto find_iter
-        = std::find_if(attributes.begin(), attributes.end(), [&](const auto & attribute) { return attribute.name == attribute_name; });
-
-    if (find_iter == attributes.end())
-        throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
-
-    return *find_iter;
-}
 }
diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h
index 5e7964e1c6f..945e1c55494 100644
--- a/src/Dictionaries/DictionaryStructure.h
+++ b/src/Dictionaries/DictionaryStructure.h
@@ -154,11 +154,10 @@ struct DictionaryStructure final
     DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
 
     void validateKeyTypes(const DataTypes & key_types) const;
+    const DictionaryAttribute &getAttribute(const String& attribute_name, const DataTypePtr & type) const;
     std::string getKeyDescription() const;
     bool isKeySizeFixed() const;
     size_t getKeySize() const;
-
-    const DictionaryAttribute &getAttribute(const String& attribute_name) const;
 private:
     /// range_min and range_max have to be parsed before this function call
     std::vector<DictionaryAttribute> getAttributes(
diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index 15020d11b0d..5f556aa2057 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -5,6 +5,7 @@
 #include <Core/Defines.h>
 #include <Functions/FunctionHelpers.h>
 #include <Columns/ColumnNullable.h>
+#include <DataTypes/DataTypesDecimal.h>
 
 namespace DB
 {
@@ -130,8 +131,8 @@ void DirectDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
 }
 
 ColumnPtr DirectDictionary::getColumn(
-        const std::string& attribute_name,
-        const DataTypePtr &,
+        const std::string & attribute_name,
+        const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes &,
         const ColumnPtr default_untyped) const
@@ -153,8 +154,7 @@ ColumnPtr DirectDictionary::getColumn(
         vec_null_map_to = &col_null_map_to->getData();
     }
 
-    /// TODO: Check that attribute type is same as result type
-    /// TODO: Check if const will work as expected
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
@@ -244,8 +244,8 @@ ColumnPtr DirectDictionary::getColumn(
 
             if constexpr (IsDecimalNumber<AttributeType>)
             {
-                // auto scale = getDecimalScale(*attribute.type);
-                column = ColumnDecimal<AttributeType>::create(size, 0);
+                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+                column = ColumnDecimal<AttributeType>::create(size, scale);
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 07dfe498b67..b19164a4ebc 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -108,8 +108,8 @@ void FlatDictionary::isInConstantVector(const Key child_id, const PaddedPODArray
 }
 
 ColumnPtr FlatDictionary::getColumn(
-        const std::string& attribute_name,
-        const DataTypePtr &,
+        const std::string & attribute_name,
+        const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes &,
         const ColumnPtr default_untyped) const
@@ -122,18 +122,7 @@ ColumnPtr FlatDictionary::getColumn(
     auto size = ids.size();
 
     const auto & attribute = getAttribute(attribute_name);
-    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name);
-
-    ColumnUInt8::MutablePtr col_null_map_to;
-    ColumnUInt8::Container * vec_null_map_to = nullptr;
-    if (attribute.nullable_set)
-    {
-        col_null_map_to = ColumnUInt8::create(size, false);
-        vec_null_map_to = &col_null_map_to->getData();
-    }
-
-    /// TODO: Check that attribute type is same as result type
-    /// TODO: Check if const will work as expected
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
@@ -244,13 +233,15 @@ ColumnPtr FlatDictionary::getColumn(
 
     if (attribute.nullable_set)
     {
+        ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, false);
+        ColumnUInt8::Container& vec_null_map_to = col_null_map_to->getData();
+
         for (size_t row = 0; row < ids.size(); ++row)
         {
             auto id = ids[row];
+
             if (attribute.nullable_set->find(id) != nullptr)
-            {
-                (*vec_null_map_to)[row] = true;
-            }
+                vec_null_map_to[row] = true;
         }
 
         result = ColumnNullable::create(result, std::move(col_null_map_to));
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index c32dc4492a0..eb742834bac 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -7,6 +7,7 @@
 #include <Functions/FunctionHelpers.h>
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnNullable.h>
+#include <DataTypes/DataTypesDecimal.h>
 
 namespace
 {
@@ -129,12 +130,11 @@ void HashedDictionary::isInConstantVector(const Key child_id, const PaddedPODArr
 
 ColumnPtr HashedDictionary::getColumn(
     const std::string & attribute_name,
-    const DataTypePtr &,
+    const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes &,
     const ColumnPtr default_untyped) const
 {
-    // dict_struct.validateKeyTypes(key_types);
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
@@ -143,17 +143,7 @@ ColumnPtr HashedDictionary::getColumn(
     auto size = ids.size();
 
     const auto & attribute = getAttribute(attribute_name);
-
-    ColumnUInt8::MutablePtr col_null_map_to;
-    ColumnUInt8::Container * vec_null_map_to = nullptr;
-    if (attribute.is_nullable)
-    {
-        col_null_map_to = ColumnUInt8::create(size, false);
-        vec_null_map_to = &col_null_map_to->getData();
-    }
-
-    /// TODO: Check that attribute type is same as result type
-    /// TODO: Check if const will work as expected
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
     auto type_call = [&](const auto & dictionary_attribute_type)
     {
@@ -211,8 +201,8 @@ ColumnPtr HashedDictionary::getColumn(
 
             if constexpr (IsDecimalNumber<AttributeType>)
             {
-                // auto scale = getDecimalScale(*attribute.type);
-                column = ColumnDecimal<AttributeType>::create(size, 0);
+                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+                column = ColumnDecimal<AttributeType>::create(size, scale);
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
@@ -262,15 +252,17 @@ ColumnPtr HashedDictionary::getColumn(
 
     callOnDictionaryAttributeType(attribute.type, type_call);
 
-    if (attribute.is_nullable)
+    if (attribute.nullable_set)
     {
+        ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, false);
+        ColumnUInt8::Container& vec_null_map_to = col_null_map_to->getData();
+
         for (size_t row = 0; row < ids.size(); ++row)
         {
             auto id = ids[row];
+
             if (attribute.nullable_set->find(id) != nullptr)
-            {
-                (*vec_null_map_to)[row] = true;
-            }
+                vec_null_map_to[row] = true;
         }
 
         result = ColumnNullable::create(result, std::move(col_null_map_to));
@@ -578,8 +570,8 @@ void HashedDictionary::createAttributeImpl<String>(Attribute & attribute, const
 
 HashedDictionary::Attribute HashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
 {
-    auto nullable_set = attribute.is_nullable ? std::make_unique<NullableSet>() : nullptr;
-    Attribute attr{attribute.underlying_type, attribute.is_nullable, std::move(nullable_set), {}, {}, {}, {}};
+    auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
+    Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}, {}};
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
@@ -650,12 +642,11 @@ bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, co
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
 
-        if (attribute.is_nullable)
+        if (attribute.nullable_set)
         {
             if (value.isNull())
             {
-                attribute.nullable_set->insert(id);
-                result = true;
+                result = attribute.nullable_set->insert(id).second;
                 return;
             }
             else
@@ -691,7 +682,7 @@ void HashedDictionary::has(const Attribute & attribute, const PaddedPODArray<Key
     {
         out[i] = attr.find(ids[i]) != nullptr;
 
-        if (attribute.is_nullable && !out[i])
+        if (attribute.nullable_set && !out[i])
             out[i] = attribute.nullable_set->find(ids[i]) != nullptr;
     }
 }
@@ -738,7 +729,7 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
         /// TODO: Check if order is satisfied
         result = getIds<AttributeType>(attribute);
 
-        if (attribute.is_nullable)
+        if (attribute.nullable_set)
         {
             for (const auto& value: *attribute.nullable_set)
                 result.push_back(value.getKey());
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index 0f718c8132b..25024a28d73 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -3,6 +3,7 @@
 #include <atomic>
 #include <memory>
 #include <variant>
+#include <optional>
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Core/Block.h>
@@ -107,8 +108,7 @@ private:
     struct Attribute final
     {
         AttributeUnderlyingType type;
-        bool is_nullable;
-        std::unique_ptr<NullableSet> nullable_set;
+        std::optional<NullableSet> nullable_set;
 
         std::variant<
             UInt8,
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index f611725f740..93451635cb2 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -8,6 +8,7 @@
 #include <Common/typeid_cast.h>
 #include <DataTypes/DataTypeFixedString.h>
 #include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesDecimal.h>
 #include <IO/WriteIntText.h>
 #include <Poco/ByteOrder.h>
 #include <Common/formatIPv6.h>
@@ -269,7 +270,7 @@ IPAddressDictionary::IPAddressDictionary(
 
 ColumnPtr IPAddressDictionary::getColumn(
     const std::string & attribute_name,
-    const DataTypePtr &,
+    const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
     const ColumnPtr default_untyped) const
@@ -279,9 +280,7 @@ ColumnPtr IPAddressDictionary::getColumn(
     ColumnPtr result;
 
     const auto & attribute = getAttribute(attribute_name);
-
-    /// TODO: Check that attribute type is same as result type
-    /// TODO: Check if const will work as expected
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
     auto size = key_columns.front()->size();
 
@@ -341,8 +340,8 @@ ColumnPtr IPAddressDictionary::getColumn(
 
             if constexpr (IsDecimalNumber<AttributeType>)
             {
-                // auto scale = getDecimalScale(*attribute.type);
-                column = ColumnDecimal<AttributeType>::create(size, 0);
+                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+                column = ColumnDecimal<AttributeType>::create(size, scale);
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp
index 3b8d3ad3b5a..c1969ea7b35 100644
--- a/src/Dictionaries/PolygonDictionary.cpp
+++ b/src/Dictionaries/PolygonDictionary.cpp
@@ -6,6 +6,7 @@
 #include <Columns/ColumnTuple.h>
 #include <DataTypes/DataTypeArray.h>
 #include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypesDecimal.h>
 
 #include <numeric>
 
@@ -95,7 +96,7 @@ bool IPolygonDictionary::isInjective(const std::string &) const
 
 ColumnPtr IPolygonDictionary::getColumn(
     const std::string & attribute_name,
-    const DataTypePtr &,
+    const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes &,
     const ColumnPtr default_untyped) const
@@ -105,8 +106,7 @@ ColumnPtr IPolygonDictionary::getColumn(
     ColumnPtr result;
 
     const auto index = getAttributeIndex(attribute_name);
-
-    /// TODO: Check that attribute type is same as result type
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
     auto size = key_columns.front()->size();
 
@@ -166,8 +166,8 @@ ColumnPtr IPolygonDictionary::getColumn(
 
             if constexpr (IsDecimalNumber<AttributeType>)
             {
-                // auto scale = getDecimalScale(*attribute.type);
-                column = ColumnDecimal<AttributeType>::create(size, 0);
+                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+                column = ColumnDecimal<AttributeType>::create(size, scale);
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp
index de36419ffba..02fb71ca3d7 100644
--- a/src/Dictionaries/RangeHashedDictionary.cpp
+++ b/src/Dictionaries/RangeHashedDictionary.cpp
@@ -6,6 +6,7 @@
 #include "DictionaryFactory.h"
 #include "RangeDictionaryBlockInputStream.h"
 #include <Interpreters/castColumn.h>
+#include <DataTypes/DataTypesDecimal.h>
 
 namespace
 {
@@ -89,7 +90,7 @@ RangeHashedDictionary::RangeHashedDictionary(
 
 ColumnPtr RangeHashedDictionary::getColumn(
     const std::string & attribute_name,
-    const DataTypePtr &,
+    const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
     const ColumnPtr default_untyped) const
@@ -99,8 +100,7 @@ ColumnPtr RangeHashedDictionary::getColumn(
     ColumnPtr result;
 
     const auto & attribute = getAttribute(attribute_name);
-
-    /// TODO: Check that attribute type is same as result type
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
     auto size = key_columns.front()->size();
 
@@ -201,8 +201,8 @@ ColumnPtr RangeHashedDictionary::getColumn(
 
             if constexpr (IsDecimalNumber<AttributeType>)
             {
-                // auto scale = getDecimalScale(*attribute.type);
-                column = ColumnDecimal<AttributeType>::create(size, 0);
+                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+                column = ColumnDecimal<AttributeType>::create(size, scale);
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(size);
diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp
index c0e1b8c935d..642e956f97e 100644
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@@ -23,6 +23,7 @@
 #include <city.h>
 #include <fcntl.h>
 #include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypesDecimal.h>
 
 namespace ProfileEvents
 {
@@ -1329,7 +1330,7 @@ SSDCacheDictionary::SSDCacheDictionary(
 
 ColumnPtr SSDCacheDictionary::getColumn(
     const std::string & attribute_name,
-    const DataTypePtr &,
+    const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes &,
     const ColumnPtr default_untyped) const
@@ -1340,9 +1341,7 @@ ColumnPtr SSDCacheDictionary::getColumn(
     const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
 
     const auto index = getAttributeIndex(attribute_name);
-
-    /// TODO: Check that attribute type is same as result type
-    /// TODO: Check if const will work as expected
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
@@ -1389,8 +1388,8 @@ ColumnPtr SSDCacheDictionary::getColumn(
 
             if constexpr (IsDecimalNumber<AttributeType>)
             {
-                // auto scale = getDecimalScale(*attribute.type);
-                column = ColumnDecimal<AttributeType>::create(identifiers_size, 0);
+                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+                column = ColumnDecimal<AttributeType>::create(identifiers_size, scale);
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(identifiers_size);
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
index e36ec446845..5372e0e1afb 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
@@ -9,6 +9,7 @@
 #include <Common/ProfilingScopedRWLock.h>
 #include <Common/MemorySanitizer.h>
 #include <DataStreams/IBlockInputStream.h>
+#include <DataTypes/DataTypesDecimal.h>
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
 #include <IO/AIO.h>
@@ -1378,19 +1379,17 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary(
 
 ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
     const std::string & attribute_name,
-    const DataTypePtr &,
+    const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
     const ColumnPtr default_untyped) const
 {
+    ColumnPtr result;
+
     dict_struct.validateKeyTypes(key_types);
 
     const auto index = getAttributeIndex(attribute_name);
-
-    ColumnPtr result;
-
-    /// TODO: Check that attribute type is same as result type
-    /// TODO: Check if const will work as expected
+    const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
     auto keys_size = key_columns.front()->size();
 
@@ -1438,8 +1437,8 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
 
             if constexpr (IsDecimalNumber<AttributeType>)
             {
-                // auto scale = getDecimalScale(*attribute.type);
-                column = ColumnDecimal<AttributeType>::create(keys_size, 0);
+                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+                column = ColumnDecimal<AttributeType>::create(keys_size, scale);
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(keys_size);
diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp
index f5ace858665..d64646ecaf1 100644
--- a/src/Functions/FunctionHelpers.cpp
+++ b/src/Functions/FunctionHelpers.cpp
@@ -212,4 +212,12 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments)
     return {nested_columns, offsets->data()};
 }
 
+bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs)
+{
+    const auto & lhs_name = lhs->getName();
+    const auto & rhs_name = rhs->getName();
+
+    return lhs_name == rhs_name;
+}
+
 }
diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h
index ac10fc14d39..76e420957be 100644
--- a/src/Functions/FunctionHelpers.h
+++ b/src/Functions/FunctionHelpers.h
@@ -152,4 +152,8 @@ void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithType
 std::pair<std::vector<const IColumn *>, const ColumnArray::Offset *>
 checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments);
 
+
+/// Check if two types are equal
+bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs);
+
 }
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 0a87b4e2c3f..2130771251a 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -58,6 +58,8 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int ILLEGAL_COLUMN;
     extern const int BAD_ARGUMENTS;
+    extern const int TYPE_MISMATCH;
+    extern const int NOT_IMPLEMENTED;
 }
 
 
From b11905f493b95390db71aa1c16d2a23219b0ab54 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Thu, 21 Jan 2021 21:21:07 +0300
Subject: [PATCH 0243/1238] Fixed tests

---
 src/Dictionaries/CacheDictionary.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index d5f3c0b1c4a..5b4690c206e 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -289,7 +289,7 @@ ColumnPtr CacheDictionary::getColumn(
                     getItemsString(attribute, ids, column_string.get(), [&](const size_t) { return StringRef{def}; });
                 }
                 else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
+                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH}; 
             }
             else
             {
@@ -311,7 +311,7 @@ ColumnPtr CacheDictionary::getColumn(
             if constexpr (IsDecimalNumber<AttributeType>)
             {
                 auto scale = getDecimalScale(*dictionary_attribute.nested_type);
-                column = ColumnDecimal<AttributeType>::create(size, scale);
+                column = ColumnDecimal<AttributeType>::create(identifiers_size, scale);
             }
             else if constexpr (IsNumber<AttributeType>)
                 column = ColumnVector<AttributeType>::create(identifiers_size);

From b0d3f32a3697d662ec4455bf4f3856ad25b96284 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Fri, 22 Jan 2021 17:54:51 +0300
Subject: [PATCH 0244/1238] Added DefaultValueExtractor

---
 src/Dictionaries/FlatDictionary.cpp   | 95 +++++++--------------------
 src/Dictionaries/FlatDictionary.h     | 50 +++++++++++++-
 src/Dictionaries/HashedDictionary.cpp | 14 ++--
 src/Dictionaries/HashedDictionary.h   |  2 +-
 src/Dictionaries/IDictionary.h        |  2 +-
 5 files changed, 79 insertions(+), 84 deletions(-)

diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index b19164a4ebc..307b231071e 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -49,12 +49,13 @@ FlatDictionary::FlatDictionary(
 void FlatDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
 {
     const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
+    DefaultValueExtractor<UInt64> extractor(null_value, nullptr);
 
     getItemsImpl<UInt64, UInt64>(
         *hierarchical_attribute,
         ids,
         [&](const size_t row, const UInt64 value) { out[row] = value; },
-        [&](const size_t) { return null_value; });
+        extractor);
 }
 
 
@@ -112,7 +113,7 @@ ColumnPtr FlatDictionary::getColumn(
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes &,
-        const ColumnPtr default_untyped) const
+        const ColumnPtr default_values_column) const
 {
     ColumnPtr result;
 
@@ -134,39 +135,14 @@ ColumnPtr FlatDictionary::getColumn(
             auto column_string = ColumnString::create();
             auto * out = column_string.get();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
-                {
-                    getItemsImpl<StringRef, StringRef>(
-                        attribute,
-                        ids,
-                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-                        [&](const size_t row) { return default_col->getDataAt(row); });
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<String>();
+            const auto & null_value = std::get<StringRef>(attribute.null_values);
+            DefaultValueExtractor<StringRef> extractor(null_value, default_values_column);
 
-                    getItemsImpl<StringRef, StringRef>(
-                        attribute,
-                        ids,
-                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-                        [&](const size_t) { return def; });
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto & null_value = std::get<StringRef>(attribute.null_values);
-
-                getItemsImpl<StringRef, StringRef>(
-                    attribute,
-                    ids,
-                    [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-                    [&](const size_t) { return null_value; });
-            }
+            getItemsImpl<StringRef, StringRef>(
+                attribute,
+                ids,
+                [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                extractor);
 
             result = std::move(column_string);
         }
@@ -188,42 +164,14 @@ ColumnPtr FlatDictionary::getColumn(
 
             auto & out = column->getData();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
-                {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const auto value) { out[row] = value; },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
+            const auto null_value = std::get<AttributeType>(attribute.null_values);
+            DefaultValueExtractor<AttributeType> extractor(null_value, default_values_column);
 
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const auto value) { out[row] = value; },
-                        [&](const size_t) { return def; }
-                    );
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    ids,
-                    [&](const size_t row, const auto value) { out[row] = value; },
-                    [&](const size_t) { return null_value; }
-                );
-            }
+            getItemsImpl<AttributeType, AttributeType>(
+                attribute, 
+                ids, 
+                [&](const size_t row, const auto value) { out[row] = value; },
+                extractor);
 
             result = std::move(column);
         }
@@ -472,9 +420,12 @@ FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttrib
 }
 
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter>
 void FlatDictionary::getItemsImpl(
-    const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
+    const Attribute & attribute,
+    const PaddedPODArray<Key> & ids,
+    ValueSetter && set_value,
+    DefaultValueExtractor<AttributeType> & default_value_extractor) const
 {
     const auto & attr = std::get<ContainerType<AttributeType>>(attribute.arrays);
     const auto rows = ext::size(ids);
@@ -482,7 +433,7 @@ void FlatDictionary::getItemsImpl(
     for (const auto row : ext::range(0, rows))
     {
         const auto id = ids[row];
-        set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : get_default(row));
+        set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : default_value_extractor[row]);
     }
 
     query_count.fetch_add(rows, std::memory_order_relaxed);
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index ee461ba51ee..2aa32a1d8fe 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -24,6 +24,50 @@ namespace DB
 {
 using BlockPtr = std::shared_ptr<Block>;
 
+template <typename DefaultValue>
+class DefaultValueExtractor
+{
+public:
+    using ResultColumnType = std::conditional_t<
+        std::is_same_v<DefaultValue, StringRef>,
+        ColumnString,
+        std::conditional_t<IsDecimalNumber<DefaultValue>, ColumnDecimal<DefaultValue>, ColumnVector<DefaultValue>>>;
+
+    DefaultValueExtractor(DefaultValue default_value_, ColumnPtr default_values_)
+    {
+        if (default_values_ != nullptr)
+        {
+            if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_values_))
+            {
+                default_values = default_col;
+            }
+            else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_values_.get()))
+            {
+                using ConstColumnValue = std::conditional_t<std::is_same_v<DefaultValue, StringRef>, String, DefaultValue>;
+                default_value = std::make_optional<DefaultValue>(default_col_const->template getValue<ConstColumnValue>());
+            }
+            else
+                throw Exception{"Type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
+        }
+        else
+            default_value = std::make_optional<DefaultValue>(default_value_);
+    }
+
+    DefaultValue operator[](size_t row)
+    {
+        if (default_value)
+            return *default_value;
+        
+        if constexpr (std::is_same_v<ResultColumnType, ColumnString>)
+            return default_values->getDataAt(row);
+        else
+            return default_values->getData()[row];
+    }
+private:
+    const ResultColumnType * default_values = nullptr;
+    std::optional<DefaultValue> default_value = {};
+};
+
 class FlatDictionary final : public IDictionary
 {
 public:
@@ -79,7 +123,7 @@ public:
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnPtr default_untyped) const override;
+        const ColumnPtr default_values_column) const override;
 
     ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
 
@@ -149,9 +193,9 @@ private:
 
     Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter>
     void getItemsImpl(
-        const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
+        const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultValueExtractor<AttributeType> & default_value_extractor) const;
 
     template <typename T>
     void resize(Attribute & attribute, const Key id);
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index eb742834bac..b71ee666d9e 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -133,7 +133,7 @@ ColumnPtr HashedDictionary::getColumn(
     const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes &,
-    const ColumnPtr default_untyped) const
+    const ColumnPtr default_values_column) const
 {
     ColumnPtr result;
 
@@ -155,9 +155,9 @@ ColumnPtr HashedDictionary::getColumn(
             auto column_string = ColumnString::create();
             auto * out = column_string.get();
 
-            if (default_untyped != nullptr)
+            if (default_values_column != nullptr)
             {
-                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_values_column))
                 {
                     getItemsImpl<StringRef, StringRef>(
                         attribute,
@@ -165,7 +165,7 @@ ColumnPtr HashedDictionary::getColumn(
                         [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
                         [&](const size_t row) { return default_col->getDataAt(row); });
                 }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_values_column.get()))
                 {
                     const auto & def = default_col_const->template getValue<String>();
 
@@ -209,9 +209,9 @@ ColumnPtr HashedDictionary::getColumn(
 
             auto & out = column->getData();
 
-            if (default_untyped != nullptr)
+            if (default_values_column != nullptr)
             {
-                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_values_column))
                 {
                     getItemsImpl<AttributeType, AttributeType>(
                         attribute,
@@ -220,7 +220,7 @@ ColumnPtr HashedDictionary::getColumn(
                         [&](const size_t row) { return default_col->getData()[row]; }
                     );
                 }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
+                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_values_column.get()))
                 {
                     const auto & def = default_col_const->template getValue<AttributeType>();
 
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index 25024a28d73..776b2f37f36 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -75,7 +75,7 @@ public:
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnPtr default_untyped) const override;
+        const ColumnPtr default_values_column) const override;
 
     ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
 
diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h
index 0de251ff8b4..c774ab7f676 100644
--- a/src/Dictionaries/IDictionary.h
+++ b/src/Dictionaries/IDictionary.h
@@ -102,7 +102,7 @@ struct IDictionaryBase : public IExternalLoadable
         const DataTypePtr & result_type [[maybe_unused]],
         const Columns & key_columns [[maybe_unused]],
         const DataTypes & key_types [[maybe_unused]],
-        const ColumnPtr default_untyped [[maybe_unused]]) const /* = 0; */
+        const ColumnPtr default_values_column [[maybe_unused]]) const /* = 0; */
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED,
                         "Get column not supported", getDictionaryID().getNameForLogs());

From c4ffa2160f05144440fb1dcf350f531e13d74bb0 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sat, 23 Jan 2021 16:18:24 +0300
Subject: [PATCH 0245/1238] Updated interfaces. Added documentation.

---
 src/Core/Block.h                              |   1 +
 src/Dictionaries/CacheDictionary.cpp          | 438 ++++++++++++++----
 src/Dictionaries/CacheDictionary.h            |  24 +-
 src/Dictionaries/CacheDictionary.inc.h        | 368 ---------------
 .../ComplexKeyCacheDictionary.cpp             | 121 ++---
 src/Dictionaries/ComplexKeyCacheDictionary.h  |  20 +-
 .../ComplexKeyDirectDictionary.cpp            | 192 ++------
 src/Dictionaries/ComplexKeyDirectDictionary.h |  18 +-
 .../ComplexKeyHashedDictionary.cpp            | 189 ++------
 src/Dictionaries/ComplexKeyHashedDictionary.h |  20 +-
 src/Dictionaries/DictionaryBlockInputStream.h |  35 +-
 src/Dictionaries/DictionaryHelpers.h          | 142 ++++++
 src/Dictionaries/DirectDictionary.cpp         | 204 ++------
 src/Dictionaries/DirectDictionary.h           |  16 +-
 src/Dictionaries/FlatDictionary.cpp           |  58 +--
 src/Dictionaries/FlatDictionary.h             |  56 +--
 src/Dictionaries/HashedDictionary.cpp         | 138 ++----
 src/Dictionaries/HashedDictionary.h           |  21 +-
 src/Dictionaries/IDictionary.h                |  96 ++--
 src/Dictionaries/IPAddressDictionary.cpp      | 138 ++----
 src/Dictionaries/IPAddressDictionary.h        |  23 +-
 src/Dictionaries/PolygonDictionary.cpp        | 125 ++---
 src/Dictionaries/PolygonDictionary.h          |  15 +-
 src/Dictionaries/RangeHashedDictionary.cpp    | 198 ++------
 src/Dictionaries/RangeHashedDictionary.h      |  21 +-
 src/Dictionaries/SSDCacheDictionary.cpp       | 124 ++---
 src/Dictionaries/SSDCacheDictionary.h         |  41 +-
 .../SSDComplexKeyCacheDictionary.cpp          | 143 ++----
 .../SSDComplexKeyCacheDictionary.h            |  45 +-
 src/Functions/FunctionsExternalDictionaries.h |  22 +-
 30 files changed, 1133 insertions(+), 1919 deletions(-)
 delete mode 100644 src/Dictionaries/CacheDictionary.inc.h
 create mode 100644 src/Dictionaries/DictionaryHelpers.h

diff --git a/src/Core/Block.h b/src/Core/Block.h
index eef3c27363b..f588373aaed 100644
--- a/src/Core/Block.h
+++ b/src/Core/Block.h
@@ -163,6 +163,7 @@ private:
     friend class ActionsDAG;
 };
 
+using BlockPtr = std::shared_ptr<Block>;
 using Blocks = std::vector<Block>;
 using BlocksList = std::list<Block>;
 using BlocksPtr = std::shared_ptr<Blocks>;
diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index 5b4690c206e..eac2cd1b41d 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -13,9 +13,10 @@
 #include <IO/WriteBufferFromOStream.h>
 #include <ext/range.h>
 #include <ext/size.h>
+#include <ext/map.h>
+#include <ext/chrono_io.h>
 #include <Common/setThreadName.h>
 #include <DataTypes/DataTypesDecimal.h>
-#include "CacheDictionary.inc.h"
 #include "DictionaryBlockInputStream.h"
 #include "DictionaryFactory.h"
 #include <Functions/FunctionHelpers.h>
@@ -131,8 +132,8 @@ const IDictionarySource * CacheDictionary::getSource() const
 void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
 {
     const auto null_value = std::get<UInt64>(hierarchical_attribute->null_value);
-
-    getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; });
+    DictionaryDefaultValueExtractor<UInt64> default_value_extractor(null_value);
+    getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, default_value_extractor);
 }
 
 
@@ -255,12 +256,13 @@ ColumnPtr CacheDictionary::getColumn(
     const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes &,
-    const ColumnPtr default_untyped) const
+    const ColumnPtr default_values_column) const
 {
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
-    const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    const auto & keys = getColumnVectorData(this, key_columns.front(), backup_storage);
+    auto keys_size = keys.size();
 
     auto & attribute = getAttribute(attribute_name);
     const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
@@ -269,94 +271,25 @@ ColumnPtr CacheDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+ 
+        const auto null_value = static_cast<ValueType>(std::get<AttributeType>(attribute.null_value));
+        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
 
-        auto identifiers_size = ids.size();
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
         if constexpr (std::is_same_v<AttributeType, String>)
         {
-            auto column_string = ColumnString::create();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
-                {
-                    getItemsString(attribute, ids, column_string.get(), [&](const size_t row) { return default_col->getDataAt(row); });
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<String>();
-
-                    getItemsString(attribute, ids, column_string.get(), [&](const size_t) { return StringRef{def}; });
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH}; 
-            }
-            else
-            {
-                const auto null_value = StringRef{std::get<String>(attribute.null_value)};
-
-                getItemsString(attribute, ids, column_string.get(), [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column_string);
+            getItemsString(attribute, keys, column.get(), default_value_extractor);
         }
         else
         {
-            using ResultColumnType
-                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
-            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
-
-            ResultColumnPtr column;
-
-            if constexpr (IsDecimalNumber<AttributeType>)
-            {
-                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
-                column = ColumnDecimal<AttributeType>::create(identifiers_size, scale);
-            }
-            else if constexpr (IsNumber<AttributeType>)
-                column = ColumnVector<AttributeType>::create(identifiers_size);
-
             auto & out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
-                {
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        out,
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        out,
-                        [&](const size_t) { return def; }
-                    );
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_value);
-
-                getItemsNumberImpl<AttributeType, AttributeType>(
-                    attribute,
-                    ids,
-                    out,
-                    [&](const size_t) { return null_value; }
-                );
-            }
-
-            result = std::move(column);
+            getItemsNumberImpl<AttributeType, AttributeType>(attribute, keys, out, default_value_extractor);
         }
+
+        result = std::move(column);
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
@@ -364,6 +297,339 @@ ColumnPtr CacheDictionary::getColumn(
     return result;
 }
 
+template <typename AttributeType, typename OutputType>
+void CacheDictionary::getItemsNumberImpl(
+    Attribute & attribute, 
+    const PaddedPODArray<Key> & ids,
+    ResultArrayType<OutputType> & out,
+    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
+{
+    /// First fill everything with default values
+    const auto rows = ext::size(ids);
+    for (const auto row : ext::range(0, rows))
+        out[row] = default_value_extractor[row];
+
+    /// Maybe there are duplicate keys, so we remember their indices.
+    std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
+
+    auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
+
+    size_t cache_hit = 0;
+    size_t cache_not_found_count = 0;
+    size_t cache_expired_cound = 0;
+
+    {
+        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
+
+        const auto now = std::chrono::system_clock::now();
+
+        auto insert_to_answer_routine = [&](size_t row, size_t idx)
+        {
+            auto & cell = cells[idx];
+            if (!cell.isDefault())
+                out[row] = static_cast<OutputType>(attribute_array[idx]);
+        };
+
+        /// fetch up-to-date values, decide which ones require update
+        for (const auto row : ext::range(0, rows))
+        {
+            const auto id = ids[row];
+
+            /** cell should be updated if either:
+                *    1. ids do not match,
+                *    2. cell has expired,
+                *    3. explicit defaults were specified and cell was set default. */
+
+            const auto [cell_idx, state] = findCellIdxForGet(id, now);
+
+            if (state == ResultState::FoundAndValid)
+            {
+                ++cache_hit;
+                insert_to_answer_routine(row, cell_idx);
+            }
+            else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
+            {
+                ++cache_not_found_count;
+                cache_expired_or_not_found_ids[id].push_back(row);
+            }
+            else if (state == ResultState::FoundButExpired)
+            {
+                cache_expired_cound++;
+                cache_expired_or_not_found_ids[id].push_back(row);
+
+                if (allow_read_expired_keys)
+                    insert_to_answer_routine(row, cell_idx);
+            }
+        }
+    }
+
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
+
+    query_count.fetch_add(rows, std::memory_order_relaxed);
+    hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
+
+    if (!cache_not_found_count)
+    {
+        /// Nothing to update - return
+        if (!cache_expired_cound)
+            return;
+
+        /// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
+        if (allow_read_expired_keys)
+        {
+            std::vector<Key> required_expired_ids;
+            required_expired_ids.reserve(cache_expired_cound);
+            std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
+                           std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
+
+            /// request new values
+            auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
+
+            tryPushToUpdateQueueOrThrow(update_unit_ptr);
+
+            /// Nothing to do - return
+            return;
+        }
+    }
+
+    /// From this point we have to update all keys sync.
+    /// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
+    /// and there no cache_not_found_ids but some cache_expired.
+
+    std::vector<Key> required_ids;
+    required_ids.reserve(cache_not_found_count + cache_expired_cound);
+    std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
+                   std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
+
+    /// Request new values
+    auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
+
+    tryPushToUpdateQueueOrThrow(update_unit_ptr);
+    waitForCurrentUpdateFinish(update_unit_ptr);
+
+    /// Add updated keys to answer.
+
+    const size_t attribute_index = getAttributeIndex(attribute.name);
+
+    for (auto & [key, value] : update_unit_ptr->found_ids)
+    {
+        if (value.found)
+        {
+            for (const size_t row : cache_expired_or_not_found_ids[key])
+                out[row] = std::get<OutputType>(value.values[attribute_index]);
+        }
+    }
+}
+
+void CacheDictionary::getItemsString(
+    Attribute & attribute,
+    const PaddedPODArray<Key> & ids,
+    ColumnString * out,
+    DictionaryDefaultValueExtractor<StringRef> & default_value_extractor) const
+{
+    const auto rows = ext::size(ids);
+
+    /// Save on some allocations.
+    out->getOffsets().reserve(rows);
+
+    auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
+
+    auto found_outdated_values = false;
+
+    /// Perform optimistic version, fallback to pessimistic if failed.
+    {
+        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
+
+        const auto now = std::chrono::system_clock::now();
+
+        /// Fetch up-to-date values, discard on fail.
+        for (const auto row : ext::range(0, rows))
+        {
+            const auto id = ids[row];
+            const auto [cell_idx, state] = findCellIdxForGet(id, now);
+
+            if (state == ResultState::FoundAndValid)
+            {
+                auto & cell = cells[cell_idx];
+                const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
+                out->insertData(string_ref.data, string_ref.size);
+            }
+            else
+            {
+                found_outdated_values = true;
+                break;
+            }
+        }
+    }
+
+    /// Optimistic code completed successfully.
+    if (!found_outdated_values)
+    {
+        query_count.fetch_add(rows, std::memory_order_relaxed);
+        hit_count.fetch_add(rows, std::memory_order_release);
+        ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
+        return;
+    }
+
+    /// Now onto the pessimistic one, discard possible partial results from the optimistic path.
+    out->getChars().resize_assume_reserved(0);
+    out->getOffsets().resize_assume_reserved(0);
+
+    /// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
+    std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
+    /// we are going to store every string separately
+    std::unordered_map<Key, String> local_cache;
+
+    size_t cache_not_found_count = 0;
+    size_t cache_expired_count = 0;
+
+    size_t total_length = 0;
+    size_t cache_hit = 0;
+    {
+        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
+
+        const auto now = std::chrono::system_clock::now();
+
+        auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
+        {
+            const auto & cell = cells[cell_idx];
+            const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
+
+            /// Do not store default, but count it in total length.
+            if (!cell.isDefault())
+                local_cache[id] = String{string_ref};
+
+            total_length += string_ref.size + 1;
+        };
+
+        for (const auto row : ext::range(0, ids.size()))
+        {
+            const auto id = ids[row];
+            const auto [cell_idx, state] = findCellIdxForGet(id, now);
+
+            if (state == ResultState::FoundAndValid)
+            {
+                ++cache_hit;
+                insert_value_routine(row, id, cell_idx);
+            }
+            else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
+            {
+                ++cache_not_found_count;
+                cache_expired_or_not_found_ids[id].push_back(row);
+            }
+            else if (state == ResultState::FoundButExpired)
+            {
+                ++cache_expired_count;
+                cache_expired_or_not_found_ids[id].push_back(row);
+
+                if (allow_read_expired_keys)
+                    insert_value_routine(row, id, cell_idx);
+            }
+        }
+    }
+
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
+    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
+
+    query_count.fetch_add(rows, std::memory_order_relaxed);
+    hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
+
+    /// Async update of expired keys.
+    if (!cache_not_found_count)
+    {
+        if (allow_read_expired_keys && cache_expired_count)
+        {
+            std::vector<Key> required_expired_ids;
+            required_expired_ids.reserve(cache_expired_count);
+            std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
+                           std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
+
+            auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
+
+            tryPushToUpdateQueueOrThrow(update_unit_ptr);
+
+            /// Insert all found keys and defaults to output array.
+            out->getChars().reserve(total_length);
+
+            for (const auto row : ext::range(0, ext::size(ids)))
+            {
+                const auto id = ids[row];
+                StringRef value;
+
+                /// Previously we stored found keys in map.
+                const auto it = local_cache.find(id);
+                if (it != local_cache.end())
+                    value = StringRef(it->second);
+                else
+                    value = default_value_extractor[row];
+
+                out->insertData(value.data, value.size);
+            }
+
+            /// Nothing to do else.
+            return;
+        }
+    }
+
+    /// We will request both cache_not_found_ids and cache_expired_ids sync.
+    std::vector<Key> required_ids;
+    required_ids.reserve(cache_not_found_count + cache_expired_count);
+    std::transform(
+        std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
+        std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
+
+    auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
+
+    tryPushToUpdateQueueOrThrow(update_unit_ptr);
+    waitForCurrentUpdateFinish(update_unit_ptr);
+
+    const size_t attribute_index = getAttributeIndex(attribute.name);
+
+    /// Only calculate the total length.
+    for (auto & [key, value] : update_unit_ptr->found_ids)
+    {
+        if (value.found)
+        {
+            const auto found_value_ref = std::get<String>(value.values[attribute_index]);
+            total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
+        }
+        else
+        {
+            for (const auto row : cache_expired_or_not_found_ids[key])
+                total_length += default_value_extractor[row].size + 1;
+        }
+    }
+
+    out->getChars().reserve(total_length);
+
+    for (const auto row : ext::range(0, ext::size(ids)))
+    {
+        const auto id = ids[row];
+        StringRef value;
+
+        /// We have two maps: found in cache and found in source.
+        const auto local_it = local_cache.find(id);
+        if (local_it != local_cache.end())
+            value = StringRef(local_it->second);
+        else
+        {
+            const auto found_it = update_unit_ptr->found_ids.find(id);
+
+            /// Previously we didn't store defaults in local cache.
+            if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
+                value = std::get<String>(found_it->second.values[attribute_index]);
+            else
+                value = default_value_extractor[row];
+        }
+
+        out->insertData(value.data, value.size);
+    }
+}
+
+
 template<class... Ts>
 struct Overloaded : Ts... {using Ts::operator()...;};
 
@@ -462,10 +728,10 @@ size_t CacheDictionary::findCellIdxForSet(const Key & id) const
     return oldest_id;
 }
 
-ColumnUInt8::Ptr CacheDictionary::has(const Columns & key_columns, const DataTypes &) const
+ColumnUInt8::Ptr CacheDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
 {
     PaddedPODArray<Key> backup_storage;
-    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
 
     auto result = ColumnUInt8::create(ext::size(ids));
     auto& out = result->getData();
diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h
index 70bea884ae4..9f0b3fa4f63 100644
--- a/src/Dictionaries/CacheDictionary.h
+++ b/src/Dictionaries/CacheDictionary.h
@@ -21,6 +21,7 @@
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
+#include "DictionaryHelpers.h"
 
 namespace CurrentMetrics
 {
@@ -119,16 +120,16 @@ public:
 
     std::exception_ptr getLastException() const override;
 
-    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::simple; }
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
 
     ColumnPtr getColumn(
         const std::string& attribute_name,
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnPtr default_untyped) const override;
+        const ColumnPtr default_values_column) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     template <typename T>
     using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
@@ -203,12 +204,18 @@ private:
     /* NOLINTNEXTLINE(readability-convert-member-functions-to-static) */
     Attribute createAttributeWithTypeAndName(const AttributeUnderlyingType type, const String & name, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType>
     void getItemsNumberImpl(
-        Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
+        Attribute & attribute,
+        const PaddedPODArray<Key> & ids,
+        ResultArrayType<OutputType> & out,
+        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
 
-    template <typename DefaultGetter>
-    void getItemsString(Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const;
+    void getItemsString(
+        Attribute & attribute,
+        const PaddedPODArray<Key> & ids,
+        ColumnString * out,
+        DictionaryDefaultValueExtractor<StringRef> & default_value_extractor) const;
 
     PaddedPODArray<Key> getCachedIds() const;
 
@@ -399,5 +406,6 @@ private:
     mutable std::condition_variable is_update_finished;
 
     std::atomic<bool> finished{false};
-    };
+};
+
 }
diff --git a/src/Dictionaries/CacheDictionary.inc.h b/src/Dictionaries/CacheDictionary.inc.h
deleted file mode 100644
index 803b3b2566f..00000000000
--- a/src/Dictionaries/CacheDictionary.inc.h
+++ /dev/null
@@ -1,368 +0,0 @@
-#pragma once
-
-#include <stdexcept>
-
-#include "CacheDictionary.h"
-#include <Columns/ColumnsNumber.h>
-#include <Common/ProfilingScopedRWLock.h>
-#include <Common/typeid_cast.h>
-#include <DataStreams/IBlockInputStream.h>
-#include <ext/chrono_io.h>
-#include <ext/map.h>
-#include <ext/range.h>
-#include <ext/size.h>
-
-
-namespace ProfileEvents
-{
-extern const Event DictCacheKeysRequested;
-extern const Event DictCacheKeysRequestedMiss;
-extern const Event DictCacheKeysRequestedFound;
-extern const Event DictCacheKeysExpired;
-extern const Event DictCacheKeysNotFound;
-extern const Event DictCacheKeysHit;
-extern const Event DictCacheRequestTimeNs;
-extern const Event DictCacheRequests;
-extern const Event DictCacheLockWriteNs;
-extern const Event DictCacheLockReadNs;
-}
-
-namespace CurrentMetrics
-{
-extern const Metric DictCacheRequests;
-}
-
-namespace DB
-{
-namespace ErrorCodes
-{
-}
-
-template <typename AttributeType, typename OutputType, typename DefaultGetter>
-void CacheDictionary::getItemsNumberImpl(
-    Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
-{
-    /// First fill everything with default values
-    const auto rows = ext::size(ids);
-    for (const auto row : ext::range(0, rows))
-        out[row] = get_default(row);
-
-    /// Maybe there are duplicate keys, so we remember their indices.
-    std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
-
-    auto & attribute_array = std::get<ContainerPtrType<AttributeType>>(attribute.arrays);
-
-    size_t cache_hit = 0;
-    size_t cache_not_found_count = 0;
-    size_t cache_expired_cound = 0;
-
-    {
-        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
-
-        const auto now = std::chrono::system_clock::now();
-
-        auto insert_to_answer_routine = [&](size_t row, size_t idx)
-        {
-            auto & cell = cells[idx];
-            if (!cell.isDefault())
-                out[row] = static_cast<OutputType>(attribute_array[idx]);
-        };
-
-        /// fetch up-to-date values, decide which ones require update
-        for (const auto row : ext::range(0, rows))
-        {
-            const auto id = ids[row];
-
-            /** cell should be updated if either:
-                *    1. ids do not match,
-                *    2. cell has expired,
-                *    3. explicit defaults were specified and cell was set default. */
-
-            const auto [cell_idx, state] = findCellIdxForGet(id, now);
-
-            if (state == ResultState::FoundAndValid)
-            {
-                ++cache_hit;
-                insert_to_answer_routine(row, cell_idx);
-            }
-            else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
-            {
-                ++cache_not_found_count;
-                cache_expired_or_not_found_ids[id].push_back(row);
-            }
-            else if (state == ResultState::FoundButExpired)
-            {
-                cache_expired_cound++;
-                cache_expired_or_not_found_ids[id].push_back(row);
-
-                if (allow_read_expired_keys)
-                    insert_to_answer_routine(row, cell_idx);
-            }
-        }
-    }
-
-    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_cound);
-    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
-    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
-    hit_count.fetch_add(rows - cache_not_found_count - cache_expired_cound, std::memory_order_release);
-
-    if (!cache_not_found_count)
-    {
-        /// Nothing to update - return
-        if (!cache_expired_cound)
-            return;
-
-        /// Update async only if allow_read_expired_keys_is_enabledadd condvar usage and better code
-        if (allow_read_expired_keys)
-        {
-            std::vector<Key> required_expired_ids;
-            required_expired_ids.reserve(cache_expired_cound);
-            std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
-                           std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
-
-            /// request new values
-            auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
-
-            tryPushToUpdateQueueOrThrow(update_unit_ptr);
-
-            /// Nothing to do - return
-            return;
-        }
-    }
-
-    /// From this point we have to update all keys sync.
-    /// Maybe allow_read_expired_keys_from_cache_dictionary is disabled
-    /// and there no cache_not_found_ids but some cache_expired.
-
-    std::vector<Key> required_ids;
-    required_ids.reserve(cache_not_found_count + cache_expired_cound);
-    std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
-                   std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
-
-    /// Request new values
-    auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
-
-    tryPushToUpdateQueueOrThrow(update_unit_ptr);
-    waitForCurrentUpdateFinish(update_unit_ptr);
-
-    /// Add updated keys to answer.
-
-    const size_t attribute_index = getAttributeIndex(attribute.name);
-
-    for (auto & [key, value] : update_unit_ptr->found_ids)
-    {
-        if (value.found)
-        {
-            for (const size_t row : cache_expired_or_not_found_ids[key])
-                out[row] = std::get<OutputType>(value.values[attribute_index]);
-        }
-    }
-}
-
-template <typename DefaultGetter>
-void CacheDictionary::getItemsString(
-    Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const
-{
-    const auto rows = ext::size(ids);
-
-    /// Save on some allocations.
-    out->getOffsets().reserve(rows);
-
-    auto & attribute_array = std::get<ContainerPtrType<StringRef>>(attribute.arrays);
-
-    auto found_outdated_values = false;
-
-    /// Perform optimistic version, fallback to pessimistic if failed.
-    {
-        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
-
-        const auto now = std::chrono::system_clock::now();
-
-        /// Fetch up-to-date values, discard on fail.
-        for (const auto row : ext::range(0, rows))
-        {
-            const auto id = ids[row];
-            const auto [cell_idx, state] = findCellIdxForGet(id, now);
-
-            if (state == ResultState::FoundAndValid)
-            {
-                auto & cell = cells[cell_idx];
-                const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
-                out->insertData(string_ref.data, string_ref.size);
-            }
-            else
-            {
-                found_outdated_values = true;
-                break;
-            }
-        }
-    }
-
-    /// Optimistic code completed successfully.
-    if (!found_outdated_values)
-    {
-        query_count.fetch_add(rows, std::memory_order_relaxed);
-        hit_count.fetch_add(rows, std::memory_order_release);
-        ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, ids.size());
-        return;
-    }
-
-    /// Now onto the pessimistic one, discard possible partial results from the optimistic path.
-    out->getChars().resize_assume_reserved(0);
-    out->getOffsets().resize_assume_reserved(0);
-
-    /// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
-    std::unordered_map<Key, std::vector<size_t>> cache_expired_or_not_found_ids;
-    /// we are going to store every string separately
-    std::unordered_map<Key, String> local_cache;
-
-    size_t cache_not_found_count = 0;
-    size_t cache_expired_count = 0;
-
-    size_t total_length = 0;
-    size_t cache_hit = 0;
-    {
-        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
-
-        const auto now = std::chrono::system_clock::now();
-
-        auto insert_value_routine = [&](size_t row, size_t id, size_t cell_idx)
-        {
-            const auto & cell = cells[cell_idx];
-            const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
-
-            /// Do not store default, but count it in total length.
-            if (!cell.isDefault())
-                local_cache[id] = String{string_ref};
-
-            total_length += string_ref.size + 1;
-        };
-
-        for (const auto row : ext::range(0, ids.size()))
-        {
-            const auto id = ids[row];
-            const auto [cell_idx, state] = findCellIdxForGet(id, now);
-
-            if (state == ResultState::FoundAndValid)
-            {
-                ++cache_hit;
-                insert_value_routine(row, id, cell_idx);
-            }
-            else if (state == ResultState::NotFound || state == ResultState::FoundButExpiredPermanently)
-            {
-                ++cache_not_found_count;
-                cache_expired_or_not_found_ids[id].push_back(row);
-            }
-            else if (state == ResultState::FoundButExpired)
-            {
-                ++cache_expired_count;
-                cache_expired_or_not_found_ids[id].push_back(row);
-
-                if (allow_read_expired_keys)
-                    insert_value_routine(row, id, cell_idx);
-            }
-        }
-    }
-
-    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired_count);
-    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found_count);
-    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);
-
-    query_count.fetch_add(rows, std::memory_order_relaxed);
-    hit_count.fetch_add(rows - cache_expired_count - cache_not_found_count, std::memory_order_release);
-
-    /// Async update of expired keys.
-    if (!cache_not_found_count)
-    {
-        if (allow_read_expired_keys && cache_expired_count)
-        {
-            std::vector<Key> required_expired_ids;
-            required_expired_ids.reserve(cache_expired_count);
-            std::transform(std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
-                           std::back_inserter(required_expired_ids), [](auto & pair) { return pair.first; });
-
-            auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_expired_ids));
-
-            tryPushToUpdateQueueOrThrow(update_unit_ptr);
-
-            /// Insert all found keys and defaults to output array.
-            out->getChars().reserve(total_length);
-
-            for (const auto row : ext::range(0, ext::size(ids)))
-            {
-                const auto id = ids[row];
-                StringRef value;
-
-                /// Previously we stored found keys in map.
-                const auto it = local_cache.find(id);
-                if (it != local_cache.end())
-                    value = StringRef(it->second);
-                else
-                    value = get_default(row);
-
-                out->insertData(value.data, value.size);
-            }
-
-            /// Nothing to do else.
-            return;
-        }
-    }
-
-    /// We will request both cache_not_found_ids and cache_expired_ids sync.
-    std::vector<Key> required_ids;
-    required_ids.reserve(cache_not_found_count + cache_expired_count);
-    std::transform(
-        std::begin(cache_expired_or_not_found_ids), std::end(cache_expired_or_not_found_ids),
-        std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
-
-    auto update_unit_ptr = std::make_shared<UpdateUnit>(std::move(required_ids));
-
-    tryPushToUpdateQueueOrThrow(update_unit_ptr);
-    waitForCurrentUpdateFinish(update_unit_ptr);
-
-    const size_t attribute_index = getAttributeIndex(attribute.name);
-
-    /// Only calculate the total length.
-    for (auto & [key, value] : update_unit_ptr->found_ids)
-    {
-        if (value.found)
-        {
-            const auto found_value_ref = std::get<String>(value.values[attribute_index]);
-            total_length += (found_value_ref.size() + 1) * cache_expired_or_not_found_ids[key].size();
-        }
-        else
-        {
-            for (const auto row : cache_expired_or_not_found_ids[key])
-                total_length += get_default(row).size + 1;
-        }
-    }
-
-    out->getChars().reserve(total_length);
-
-    for (const auto row : ext::range(0, ext::size(ids)))
-    {
-        const auto id = ids[row];
-        StringRef value;
-
-        /// We have two maps: found in cache and found in source.
-        const auto local_it = local_cache.find(id);
-        if (local_it != local_cache.end())
-            value = StringRef(local_it->second);
-        else
-        {
-            const auto found_it = update_unit_ptr->found_ids.find(id);
-
-            /// Previously we didn't store defaults in local cache.
-            if (found_it != update_unit_ptr->found_ids.end() && found_it->second.found)
-                value = std::get<String>(found_it->second.values[attribute_index]);
-            else
-                value = get_default(row);
-        }
-
-        out->insertData(value.data, value.size);
-    }
-}
-
-}
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index d27a8c1483e..6775f2b737b 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -76,7 +76,7 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
     const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
-    const ColumnPtr default_untyped) const
+    const ColumnPtr default_values_column) const
 {
     dict_struct.validateKeyTypes(key_types);
 
@@ -91,93 +91,26 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>; 
+
+        const auto null_value = ValueType{std::get<AttributeType>(attribute.null_values)};
+        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
         if constexpr (std::is_same_v<AttributeType, String>)
         {
-            auto column_string = ColumnString::create();
-            auto * out = column_string.get();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
-                {
-                    getItemsString(attribute, key_columns, out, [&](const size_t row) { return default_col->getDataAt(row); });
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<String>();
-
-                    getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; });
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                    const auto null_value = StringRef{std::get<String>(attribute.null_values)};
-
-                    getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column_string);
+            auto * out = column.get();
+            getItemsString(attribute, key_columns, out, default_value_extractor);
         }
         else
         {
-            using ResultColumnType
-                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
-            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
-
-            ResultColumnPtr column;
-
-            if constexpr (IsDecimalNumber<AttributeType>)
-            {
-                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
-                column = ColumnDecimal<AttributeType>::create(size, scale);
-            }
-            else if constexpr (IsNumber<AttributeType>)
-                column = ColumnVector<AttributeType>::create(keys_size);
-
             auto & out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
-                {
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        out,
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        out,
-                        [&](const size_t) { return def; }
-                    );
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsNumberImpl<AttributeType, AttributeType>(
-                    attribute,
-                    key_columns,
-                    out,
-                    [&](const size_t) { return null_value; }
-                );
-            }
-
-            result = std::move(column);
+            getItemsNumberImpl<AttributeType, AttributeType>(attribute, key_columns, out, default_value_extractor);
         }
+
+        result = std::move(column);
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
@@ -229,7 +162,7 @@ ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata
     return {oldest_id, false, false};
 }
 
-ColumnUInt8::Ptr ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes & key_types) const
+ColumnUInt8::Ptr ComplexKeyCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
 {
     dict_struct.validateKeyTypes(key_types);
 
@@ -315,9 +248,12 @@ ColumnUInt8::Ptr ComplexKeyCacheDictionary::has(const Columns & key_columns, con
 }
 
 
-template <typename AttributeType, typename OutputType, typename DefaultGetter>
+template <typename AttributeType, typename OutputType>
 void ComplexKeyCacheDictionary::getItemsNumberImpl(
-    Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const
+    Attribute & attribute, 
+    const Columns & key_columns,
+    PaddedPODArray<OutputType> & out,
+    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
 {
     /// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
     MapType<std::vector<size_t>> outdated_keys;
@@ -359,7 +295,7 @@ void ComplexKeyCacheDictionary::getItemsNumberImpl(
                 ++cache_hit;
                 const auto & cell_idx = find_result.cell_idx;
                 const auto & cell = cells[cell_idx];
-                out[row] = cell.isDefault() ? get_default(row) : static_cast<OutputType>(attribute_array[cell_idx]);
+                out[row] = cell.isDefault() ? default_value_extractor[row] : static_cast<OutputType>(attribute_array[cell_idx]);
             }
         }
     }
@@ -391,12 +327,15 @@ void ComplexKeyCacheDictionary::getItemsNumberImpl(
         [&](const StringRef key, const size_t)
         {
             for (const auto row : outdated_keys[key])
-                out[row] = get_default(row);
+                out[row] = default_value_extractor[row];
         });
 }
 
-template <typename DefaultGetter>
-void ComplexKeyCacheDictionary::getItemsString(Attribute & attribute, const Columns & key_columns, ColumnString * out, DefaultGetter && get_default) const
+void ComplexKeyCacheDictionary::getItemsString(
+    Attribute & attribute,
+    const Columns & key_columns,
+    ColumnString * out,
+    DictionaryDefaultValueExtractor<StringRef> & default_value_extractor) const
 {
     const auto rows_num = key_columns.front()->size();
     /// save on some allocations
@@ -431,7 +370,7 @@ void ComplexKeyCacheDictionary::getItemsString(Attribute & attribute, const Colu
             {
                 const auto & cell_idx = find_result.cell_idx;
                 const auto & cell = cells[cell_idx];
-                const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
+                const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
                 out->insertData(string_ref.data, string_ref.size);
             }
         }
@@ -480,7 +419,7 @@ void ComplexKeyCacheDictionary::getItemsString(Attribute & attribute, const Colu
                 ++cache_hit;
                 const auto & cell_idx = find_result.cell_idx;
                 const auto & cell = cells[cell_idx];
-                const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx];
+                const auto string_ref = cell.isDefault() ? default_value_extractor[row] : attribute_array[cell_idx];
 
                 if (!cell.isDefault())
                     map[key] = copyIntoArena(string_ref, temporary_keys_pool);
@@ -524,7 +463,7 @@ void ComplexKeyCacheDictionary::getItemsString(Attribute & attribute, const Colu
             [&](const StringRef key, const size_t)
             {
                 for (const auto row : outdated_keys[key])
-                    total_length += get_default(row).size + 1;
+                    total_length += default_value_extractor[row].size + 1;
             });
     }
 
@@ -534,7 +473,7 @@ void ComplexKeyCacheDictionary::getItemsString(Attribute & attribute, const Colu
     {
         const StringRef key = keys_array[row];
         const auto it = map.find(key);
-        const auto string_ref = it ? it->getMapped() : get_default(row);
+        const auto string_ref = it ? it->getMapped() : default_value_extractor[row];
         out->insertData(string_ref.data, string_ref.size);
     }
 }
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.h b/src/Dictionaries/ComplexKeyCacheDictionary.h
index 45719171478..de6fb508e5a 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.h
@@ -23,7 +23,7 @@
 #include "IDictionary.h"
 #include "IDictionarySource.h"
 #include <DataStreams/IBlockInputStream.h>
-
+#include "DictionaryHelpers.h"
 
 namespace ProfileEvents
 {
@@ -89,7 +89,7 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::complex; }
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
 
     ColumnPtr getColumn(
         const std::string& attribute_name,
@@ -98,7 +98,7 @@ public:
         const DataTypes & key_types,
         const ColumnPtr default_untyped) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -175,12 +175,18 @@ private:
 
     Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType>
     void getItemsNumberImpl(
-        Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const;
+        Attribute & attribute,
+        const Columns & key_columns,
+        PaddedPODArray<OutputType> & out,
+        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
 
-    template <typename DefaultGetter>
-    void getItemsString(Attribute & attribute, const Columns & key_columns, ColumnString * out, DefaultGetter && get_default) const;
+    void getItemsString(
+        Attribute & attribute,
+        const Columns & key_columns,
+        ColumnString * out,
+        DictionaryDefaultValueExtractor<StringRef> & default_value_extractor) const;
 
     template <typename PresentKeyHandler, typename AbsentKeyHandler>
     void update(
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.cpp b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
index e719c247279..6186b8b7dae 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
@@ -39,7 +39,7 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
     const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
-    const ColumnPtr default_untyped) const
+    const ColumnPtr default_values_column) const
 {
     dict_struct.validateKeyTypes(key_types);
 
@@ -48,13 +48,13 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
     const auto & attribute = getAttribute(attribute_name);
     const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-    auto size = key_columns.front()->size();
+    auto keys_size = key_columns.front()->size();
 
     ColumnUInt8::MutablePtr col_null_map_to;
     ColumnUInt8::Container * vec_null_map_to = nullptr;
     if (attribute.is_nullable)
     {
-        col_null_map_to = ColumnUInt8::create(size, false);
+        col_null_map_to = ColumnUInt8::create(keys_size, false);
         vec_null_map_to = &col_null_map_to->getData();
     }
 
@@ -62,160 +62,49 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+ 
+        const auto null_value = std::get<ValueType>(attribute.null_values);
+        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
         if constexpr (std::is_same_v<AttributeType, String>)
         {
-            auto column_string = ColumnString::create();
-            auto * out = column_string.get();
+            auto * out = column.get();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+            getItemsImpl<String, String>(
+                attribute,
+                key_columns,
+                [&](const size_t row, const String value, bool is_null)
                 {
-                    getItemsImpl<String, String>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const String value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
 
-                            const auto ref = StringRef{value};
-                            out->insertData(ref.data, ref.size);
-                        },
-                        [&](const size_t row)
-                        {
-                            const auto ref = default_col->getDataAt(row);
-                            return String(ref.data, ref.size);
-                        });
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<String>();
-
-                    getItemsImpl<String, String>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const String value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
-
-                            const auto ref = StringRef{value};
-                            out->insertData(ref.data, ref.size);
-                        },
-                        [&](const size_t) { return def; });
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto & null_value = std::get<StringRef>(attribute.null_values);
-
-                getItemsImpl<String, String>(
-                    attribute,
-                    key_columns,
-                    [&](const size_t row, const String value, bool is_null)
-                    {
-                        if (attribute.is_nullable)
-                        {
-                            (*vec_null_map_to)[row] = is_null;
-                        }
-
-                        const auto ref = StringRef{value};
-                        out->insertData(ref.data, ref.size);
-                    },
-                    [&](const size_t) { return String(null_value.data, null_value.size); });
-            }
-
-            result = std::move(column_string);
+                    const auto ref = StringRef{value};
+                    out->insertData(ref.data, ref.size);
+                },
+                default_value_extractor);
         }
         else
         {
-            using ResultColumnType
-                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
-            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
-
-            ResultColumnPtr column;
-
-            if constexpr (IsDecimalNumber<AttributeType>)
-            {
-                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
-                column = ColumnDecimal<AttributeType>::create(size, scale);
-            }
-            else if constexpr (IsNumber<AttributeType>)
-                column = ColumnVector<AttributeType>::create(size);
-
             auto & out = column->getData();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
-                {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
+            getItemsImpl<AttributeType, AttributeType>(
+                attribute,
+                key_columns,
+                [&](const size_t row, const auto value, bool is_null) {
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
 
-                            out[row] = value;
-                        },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
-
-                            out[row] = value;
-                        },
-                        [&](const size_t) { return def; }
-                    );
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    key_columns,
-                    [&](const size_t row, const auto value, bool is_null)
-                    {
-                        if (attribute.is_nullable)
-                        {
-                            (*vec_null_map_to)[row] = is_null;
-                        }
-
-                        out[row] = value;
-                    },
-                    [&](const size_t) { return null_value; }
-                );
-            }
-
-            result = std::move(column);
+                    out[row] = value;
+                },
+                default_value_extractor);
         }
+
+
+        result = std::move(column);
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
@@ -228,7 +117,7 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
     return result;
 }
 
-ColumnUInt8::Ptr ComplexKeyDirectDictionary::has(const Columns & key_columns, const DataTypes & key_types) const
+ColumnUInt8::Ptr ComplexKeyDirectDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
 {
     dict_struct.validateKeyTypes(key_types);
 
@@ -384,9 +273,12 @@ StringRef ComplexKeyDirectDictionary::placeKeysInPool(
 }
 
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void ComplexKeyDirectDictionary::getItemsImpl(
-    const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
+    const Attribute & attribute,
+    const Columns & key_columns,
+    ValueSetter && set_value,
+    DefaultValueExtractor & default_value_extractor) const
 {
     const auto rows = key_columns.front()->size();
     const auto keys_size = dict_struct.key->size();
@@ -401,7 +293,7 @@ void ComplexKeyDirectDictionary::getItemsImpl(
     {
         const StringRef key = placeKeysInPool(row, key_columns, keys_array, *dict_struct.key, temporary_keys_pool);
         keys[row] = key;
-        value_by_key[key] = get_default(row);
+        value_by_key[key] = static_cast<AttributeType>(default_value_extractor[row]);
         to_load[row] = row;
         value_is_null[key] = false;
     }
@@ -443,13 +335,9 @@ void ComplexKeyDirectDictionary::getItemsImpl(
                     auto value = attribute_column[row_idx];
 
                     if (value.isNull())
-                    {
                         value_is_null[key] = true;
-                    }
                     else
-                    {
                         value_by_key[key] = static_cast<OutputType>(value.template get<NearestFieldType<AttributeType>>());
-                    }
                 }
             }
         }
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.h b/src/Dictionaries/ComplexKeyDirectDictionary.h
index fddf34694ba..11158b23c5a 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.h
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.h
@@ -12,14 +12,13 @@
 #include <ext/range.h>
 #include <ext/size.h>
 #include <ext/map.h>
-#include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
-
+#include "DictionaryStructure.h"
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
-using BlockPtr = std::shared_ptr<Block>;
 
 class ComplexKeyDirectDictionary final : public IDictionaryBase
 {
@@ -60,16 +59,16 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::complex; }
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
 
     ColumnPtr getColumn(
         const std::string& attribute_name,
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnPtr default_untyped) const override;
+        const ColumnPtr default_values_column) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -117,9 +116,12 @@ private:
     StringRef placeKeysInPool(
         const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector<DictionaryAttribute> & key_attributes, Pool & pool) const;
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
-        const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
+        const Attribute & attribute,
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
     void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index 3d48638a1b2..76eaad1dcbc 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -41,7 +41,7 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
     const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
-    const ColumnPtr default_untyped) const
+    const ColumnPtr default_values_column) const
 {
     dict_struct.validateKeyTypes(key_types);
 
@@ -50,13 +50,13 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
     const auto & attribute = getAttribute(attribute_name);
     const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-    auto size = key_columns.front()->size();
+    auto keys_size = key_columns.front()->size();
 
     ColumnUInt8::MutablePtr col_null_map_to;
     ColumnUInt8::Container * vec_null_map_to = nullptr;
     if (attribute.is_nullable)
     {
-        col_null_map_to = ColumnUInt8::create(size, false);
+        col_null_map_to = ColumnUInt8::create(keys_size, false);
         vec_null_map_to = &col_null_map_to->getData();
     }
 
@@ -64,152 +64,48 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>; 
+
+        const auto null_value = std::get<ValueType>(attribute.null_values);
+        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
         if constexpr (std::is_same_v<AttributeType, String>)
         {
-            auto column_string = ColumnString::create();
-            auto * out = column_string.get();
+            auto * out = column.get();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+            getItemsImpl<StringRef, StringRef>(
+                attribute,
+                key_columns,
+                [&](const size_t row, const StringRef value, bool is_null)
                 {
-                    getItemsImpl<StringRef, StringRef>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const StringRef value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
 
-                            out->insertData(value.data, value.size);
-                        },
-                        [&](const size_t row) { return default_col->getDataAt(row); });
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<String>();
-
-                    getItemsImpl<StringRef, StringRef>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const StringRef value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
-
-                            out->insertData(value.data, value.size);
-                        },
-                        [&](const size_t) { return def; });
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto & null_value = std::get<StringRef>(attribute.null_values);
-
-                getItemsImpl<StringRef, StringRef>(
-                    attribute,
-                    key_columns,
-                    [&](const size_t row, const StringRef value, bool is_null)
-                    {
-                        if (attribute.is_nullable)
-                        {
-                            (*vec_null_map_to)[row] = is_null;
-                        }
-
-                        out->insertData(value.data, value.size);
-                    },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column_string);
+                    out->insertData(value.data, value.size);
+                },
+                default_value_extractor);
         }
         else
         {
-            using ResultColumnType
-                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
-            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
-
-            ResultColumnPtr column;
-
-            if constexpr (IsDecimalNumber<AttributeType>)
-            {
-                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
-                column = ColumnDecimal<AttributeType>::create(size, scale);
-            }
-            else if constexpr (IsNumber<AttributeType>)
-                column = ColumnVector<AttributeType>::create(size);
-
             auto & out = column->getData();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+            getItemsImpl<AttributeType, AttributeType>(
+                attribute,
+                key_columns,
+                [&](const size_t row, const auto value, bool is_null)
                 {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
 
-                            out[row] = value;
-                        },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
-
-                            out[row] = value;
-                        },
-                        [&](const size_t) { return def; }
-                    );
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    key_columns,
-                    [&](const size_t row, const auto value, bool is_null)
-                    {
-                        if (attribute.is_nullable)
-                        {
-                            (*vec_null_map_to)[row] = is_null;
-                        }
-
-                        out[row] = value;
-                    },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column);
+                    out[row] = value;
+                },
+                default_value_extractor);
         }
+
+        result = std::move(column);
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
@@ -222,7 +118,7 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
     return result;
 }
 
-ColumnUInt8::Ptr ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataTypes & key_types) const
+ColumnUInt8::Ptr ComplexKeyHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
 {
     dict_struct.validateKeyTypes(key_types);
 
@@ -236,15 +132,9 @@ ColumnUInt8::Ptr ComplexKeyHashedDictionary::has(const Columns & key_columns, co
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
 
-        if constexpr (std::is_same_v<AttributeType, String>)
-        {
-            has<StringRef>(attribute, key_columns, out);
-        }
-        else
-        {
-            has<AttributeType>(attribute, key_columns, out);
-        }
+        has<ValueType>(attribute, key_columns, out);
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
@@ -480,9 +370,12 @@ ComplexKeyHashedDictionary::createAttribute(const DictionaryAttribute & attribut
 }
 
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter>
 void ComplexKeyHashedDictionary::getItemsImpl(
-    const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
+    const Attribute & attribute,
+    const Columns & key_columns,
+    ValueSetter && set_value,
+    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
 {
     const auto & attr = std::get<ContainerType<AttributeType>>(attribute.maps);
 
@@ -505,9 +398,9 @@ void ComplexKeyHashedDictionary::getItemsImpl(
         else
         {
             if (attribute.is_nullable && attribute.nullable_set->find(key) != nullptr)
-                set_value(i, get_default(i), true);
+                set_value(i, default_value_extractor[i], true);
             else
-                set_value(i, get_default(i), false);
+                set_value(i, default_value_extractor[i], false);
         }
 
         /// free memory allocated for the key
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.h b/src/Dictionaries/ComplexKeyHashedDictionary.h
index a21ad223f19..63458c3c759 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.h
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.h
@@ -11,14 +11,13 @@
 #include <Core/Block.h>
 #include <common/StringRef.h>
 #include <ext/range.h>
-#include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
-
+#include "DictionaryStructure.h"
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
-using BlockPtr = std::shared_ptr<Block>;
 
 class ComplexKeyHashedDictionary final : public IDictionaryBase
 {
@@ -61,16 +60,16 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::complex; }
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
 
     ColumnPtr getColumn(
         const std::string& attribute_name,
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnPtr default_untyped) const override;
+        const ColumnPtr default_values_column) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -141,9 +140,12 @@ private:
 
     Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-    void
-    getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
+    template <typename AttributeType, typename OutputType, typename ValueSetter>
+    void getItemsImpl(
+        const Attribute & attribute,
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
 
     template <typename T>
     bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
diff --git a/src/Dictionaries/DictionaryBlockInputStream.h b/src/Dictionaries/DictionaryBlockInputStream.h
index 8361c01a75f..b5ab19acb6f 100644
--- a/src/Dictionaries/DictionaryBlockInputStream.h
+++ b/src/Dictionaries/DictionaryBlockInputStream.h
@@ -79,14 +79,14 @@ private:
     GetColumnsFunction get_key_columns_function;
     GetColumnsFunction get_view_columns_function;
 
-    enum class DictionaryKeyType
+    enum class DictionaryInputStreamKeyType
     {
         Id,
         ComplexKey,
         Callback
     };
 
-    DictionaryKeyType key_type;
+    DictionaryInputStreamKeyType key_type;
 };
 
 
@@ -97,7 +97,7 @@ DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
     , dictionary(dictionary_)
     , column_names(column_names_)
     , ids(std::move(ids_))
-    , key_type(DictionaryKeyType::Id)
+    , key_type(DictionaryInputStreamKeyType::Id)
 {
 }
 
@@ -110,7 +110,7 @@ DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
     : DictionaryBlockInputStreamBase(keys.size(), max_block_size_)
     , dictionary(dictionary_)
     , column_names(column_names_)
-    , key_type(DictionaryKeyType::ComplexKey)
+    , key_type(DictionaryInputStreamKeyType::ComplexKey)
 {
     const DictionaryStructure & dictionary_structure = dictionary->getStructure();
     fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns);
@@ -130,7 +130,7 @@ DictionaryBlockInputStream<Key>::DictionaryBlockInputStream(
     , data_columns(data_columns_)
     , get_key_columns_function(std::move(get_key_columns_function_))
     , get_view_columns_function(std::move(get_view_columns_function_))
-    , key_type(DictionaryKeyType::Callback)
+    , key_type(DictionaryInputStreamKeyType::Callback)
 {
 }
 
@@ -141,7 +141,7 @@ Block DictionaryBlockInputStream<Key>::getBlock(size_t start, size_t length) con
     /// TODO: Rewrite
     switch (key_type)
     {
-        case DictionaryKeyType::ComplexKey:
+        case DictionaryInputStreamKeyType::ComplexKey:
         {
             Columns columns;
             ColumnsWithTypeAndName view_columns;
@@ -155,13 +155,13 @@ Block DictionaryBlockInputStream<Key>::getBlock(size_t start, size_t length) con
             return fillBlock({}, columns, {}, std::move(view_columns));
         }
 
-        case DictionaryKeyType::Id:
+        case DictionaryInputStreamKeyType::Id:
         {
             PaddedPODArray<Key> ids_to_fill(ids.begin() + start, ids.begin() + start + length);
             return fillBlock(ids_to_fill, {}, {}, {});
         }
 
-        case DictionaryKeyType::Callback:
+        case DictionaryInputStreamKeyType::Callback:
         {
             Columns columns;
             columns.reserve(data_columns.size());
@@ -182,7 +182,7 @@ Block DictionaryBlockInputStream<Key>::getBlock(size_t start, size_t length) con
         }
     }
 
-    throw Exception("Unexpected DictionaryKeyType.", ErrorCodes::LOGICAL_ERROR);
+    throw Exception("Unexpected DictionaryInputStreamKeyType.", ErrorCodes::LOGICAL_ERROR);
 }
 
 template <typename Key>
@@ -212,8 +212,7 @@ Block DictionaryBlockInputStream<Key>::fillBlock(
         block_columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), structure.id->name);
     }
 
-    /// TODO: This can be optimized
-    auto dictionary_identifier_type = dictionary->getIdentifierType();
+    auto dictionary_key_type = dictionary->getKeyType();
 
     for (const auto idx : ext::range(0, structure.attributes.size()))
     {
@@ -222,15 +221,23 @@ Block DictionaryBlockInputStream<Key>::fillBlock(
         {
             ColumnPtr column;
 
-            if (dictionary_identifier_type == DictionaryIdentifierType::simple)
+            if (dictionary_key_type == DictionaryKeyType::simple)
             {
                 column = dictionary->getColumn(
-                    attribute.name, attribute.type, {ids_column}, {std::make_shared<DataTypeUInt64>()}, nullptr /* default_untyped*/);
+                    attribute.name,
+                    attribute.type,
+                    {ids_column},
+                    {std::make_shared<DataTypeUInt64>()},
+                    nullptr /* default_values_column */);
             }
             else
             {
                 column = dictionary->getColumn(
-                    attribute.name, attribute.type, keys, data_types, nullptr /* default_untyped*/);
+                    attribute.name, 
+                    attribute.type,
+                    keys,
+                    data_types,
+                    nullptr /* default_values_column*/);
             }
 
             block_columns.emplace_back(column, attribute.type, attribute.name);
diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h
new file mode 100644
index 00000000000..001a2a804b7
--- /dev/null
+++ b/src/Dictionaries/DictionaryHelpers.h
@@ -0,0 +1,142 @@
+#pragma once
+
+#include <Columns/IColumn.h>
+#include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include "DictionaryStructure.h"
+
+namespace DB
+{
+
+/**
+ * In Dictionaries implementation String attribute is stored in arena and StringRefs are pointing to it.
+ */
+template <typename DictionaryAttributeType>
+using DictionaryValueType = 
+    std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, StringRef, DictionaryAttributeType>;
+
+/**
+ * Used to create column with right type for DictionaryAttributeType.
+ */
+template <typename DictionaryAttributeType>
+class DictionaryAttributeColumnProvider
+{
+public:
+    using ColumnType = 
+        std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, ColumnString,
+            std::conditional_t<IsDecimalNumber<DictionaryAttributeType>, ColumnDecimal<DictionaryAttributeType>, 
+                ColumnVector<DictionaryAttributeType>>>;
+
+    using ColumnPtr = typename ColumnType::MutablePtr;
+
+    static ColumnPtr getColumn(const DictionaryAttribute & dictionary_attribute, size_t size)
+    {
+        if constexpr (std::is_same_v<DictionaryAttributeType, String>)
+        {
+            return ColumnType::create();
+        }
+        if constexpr (IsDecimalNumber<DictionaryAttributeType>)
+        {
+            auto scale = getDecimalScale(*dictionary_attribute.nested_type);
+            return ColumnType::create(size, scale);
+        }
+        else if constexpr (IsNumber<DictionaryAttributeType>)
+            return ColumnType::create(size);
+        else
+            throw Exception{"Unsupported attribute type.", ErrorCodes::TYPE_MISMATCH};
+    }
+};
+
+/**
+ * DictionaryDefaultValueExtractor used to simplify getting default value for IDictionary function `getColumn`.
+ * Provides interface for getting default value with operator[];
+ * 
+ * If default_values_column is not null in constructor than this column values will be used as default values.
+ * If default_values_column is null then attribute_default_value will be used.
+ */
+template <typename DefaultValueType>
+class DictionaryDefaultValueExtractor
+{
+    using ResultColumnType = 
+        std::conditional_t< std::is_same_v<DefaultValueType, StringRef>, ColumnString,
+            std::conditional_t<IsDecimalNumber<DefaultValueType>, ColumnDecimal<DefaultValueType>,
+                ColumnVector<DefaultValueType>>>;
+
+public:
+    DictionaryDefaultValueExtractor(DefaultValueType attribute_default_value, ColumnPtr default_values_column_ = nullptr)
+    {
+        if (default_values_column_ != nullptr)
+        {
+            if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_values_column))
+            {
+                default_values_column = default_col;
+            }
+            else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_values_column_.get()))
+            {
+                /// TODO: Check String lifetime safety
+                /// DefaultValueType for StringColumn is StringRef, but const column getValue will return String
+                using ConstColumnValue = std::conditional_t<std::is_same_v<DefaultValueType, StringRef>, String, DefaultValueType>;
+                default_value = std::make_optional<DefaultValueType>(default_col_const->template getValue<ConstColumnValue>());
+            }
+            else
+                throw Exception{"Type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
+        }
+        else
+            default_value = std::make_optional<DefaultValueType>(attribute_default_value);
+    }
+
+    DefaultValueType operator[](size_t row)
+    {
+        if (default_value)
+            return *default_value;
+        
+        if constexpr (std::is_same_v<ResultColumnType, ColumnString>)
+            return default_values_column->getDataAt(row);
+        else
+            return default_values_column->getData()[row];
+    }
+private:
+    const ResultColumnType * default_values_column = nullptr;
+    std::optional<DefaultValueType> default_value = {};
+};
+
+/**
+ * Returns ColumnVector data as PaddedPodArray. 
+ * 
+ * If column is constant parameter backup_storage is used to store values.
+ */
+template <typename T>
+static const PaddedPODArray<T> & getColumnVectorData(
+    const IDictionaryBase * dictionary [[maybe_unused]],
+    const ColumnPtr column,
+    PaddedPODArray<T> & backup_storage)
+{
+    bool is_const_column = isColumnConst(*column);
+    auto full_column = column->convertToFullColumnIfConst();
+    auto vector_col = checkAndGetColumn<ColumnVector<T>>(full_column.get());
+
+    if (!vector_col)
+    {
+        throw Exception{ErrorCodes::TYPE_MISMATCH,
+            "{}: type mismatch: column has wrong type expected {}",
+            dictionary->getDictionaryID().getNameForLogs(),
+            TypeName<T>::get()};
+    }
+
+    if (is_const_column)
+    {
+        // With type conversion and const columns we need to use backup storage here
+        auto & data = vector_col->getData();
+        backup_storage.assign(data);
+
+        return backup_storage;
+    }
+    else
+    {
+        return vector_col->getData();
+    }
+}
+
+}
diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index 5f556aa2057..0d767f322f7 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -6,6 +6,7 @@
 #include <Functions/FunctionHelpers.h>
 #include <Columns/ColumnNullable.h>
 #include <DataTypes/DataTypesDecimal.h>
+#include <Common/HashTable/HashSet.h>
 
 namespace DB
 {
@@ -37,11 +38,13 @@ DirectDictionary::DirectDictionary(
 void DirectDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
 {
     const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
+    DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
+    
     getItemsImpl<UInt64, UInt64>(
         *hierarchical_attribute,
         ids,
         [&](const size_t row, const UInt64 value, bool) { out[row] = value; },
-        [&](const size_t) { return null_value; });
+        extractor);
 }
 
 
@@ -135,22 +138,22 @@ ColumnPtr DirectDictionary::getColumn(
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes &,
-        const ColumnPtr default_untyped) const
+        const ColumnPtr default_values_column) const
 {
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
-    const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
 
     const auto & attribute = getAttribute(attribute_name);
 
-    auto size = ids.size();
+    auto keys_size = ids.size();
 
     ColumnUInt8::MutablePtr col_null_map_to;
     ColumnUInt8::Container * vec_null_map_to = nullptr;
     if (attribute.is_nullable)
     {
-        col_null_map_to = ColumnUInt8::create(size, false);
+        col_null_map_to = ColumnUInt8::create(keys_size, false);
         vec_null_map_to = &col_null_map_to->getData();
     }
 
@@ -161,160 +164,49 @@ ColumnPtr DirectDictionary::getColumn(
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
 
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+ 
+        const auto null_value = std::get<ValueType>(attribute.null_values);
+        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
+
         if constexpr (std::is_same_v<AttributeType, String>)
         {
-            auto column_string = ColumnString::create();
-            auto * out = column_string.get();
+            auto * out = column.get();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
+            getItemsImpl<String, String>(
+                attribute,
+                ids,
+                [&](const size_t row, const String value, bool is_null)
                 {
-                    getItemsImpl<String, String>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const String value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
 
-                            const auto ref = StringRef{value};
-                            out->insertData(ref.data, ref.size);
-                        },
-                        [&](const size_t row)
-                        {
-                            const auto ref = default_col->getDataAt(row);
-                            return String(ref.data, ref.size);
-                        });
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<String>();
-
-                    getItemsImpl<String, String>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const String value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
-
-                            const auto ref = StringRef{value};
-                            out->insertData(ref.data, ref.size);
-                        },
-                        [&](const size_t) { return def; });
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-
-            }
-            else
-            {
-                const auto & null_value = std::get<StringRef>(attribute.null_values);
-
-                getItemsImpl<String, String>(
-                    attribute,
-                    ids,
-                    [&](const size_t row, const String value, bool is_null)
-                    {
-                        if (attribute.is_nullable)
-                        {
-                            (*vec_null_map_to)[row] = is_null;
-                        }
-
-                        const auto ref = StringRef{value};
-                        out->insertData(ref.data, ref.size);
-                    },
-                    [&](const size_t) { return String(null_value.data, null_value.size); });
-            }
-
-            result = std::move(column_string);
+                    const auto ref = StringRef{value};
+                    out->insertData(ref.data, ref.size);
+                },
+                default_value_extractor);
         }
         else
         {
-            using ResultColumnType
-                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
-            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
-
-            ResultColumnPtr column;
-
-            if constexpr (IsDecimalNumber<AttributeType>)
-            {
-                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
-                column = ColumnDecimal<AttributeType>::create(size, scale);
-            }
-            else if constexpr (IsNumber<AttributeType>)
-                column = ColumnVector<AttributeType>::create(size);
-
             auto & out = column->getData();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
+            getItemsImpl<AttributeType, AttributeType>(
+                attribute,
+                ids,
+                [&](const size_t row, const auto value, bool is_null)
                 {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const auto value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
 
-                            out[row] = value;
-                        },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const auto value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
-
-                            out[row] = value;
-                        },
-                        [&](const size_t) { return def; }
-                    );
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    ids,
-                    [&](const size_t row, const auto value, bool is_null)
-                    {
-                        if (attribute.is_nullable)
-                        {
-                            (*vec_null_map_to)[row] = is_null;
-                        }
-
-                        out[row] = value;
-                    },
-                    [&](const size_t) { return null_value; }
-                );
-            }
-
-            result = std::move(column);
+                    out[row] = value;
+                },
+                default_value_extractor);
         }
+
+        result = std::move(column);
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
@@ -327,10 +219,10 @@ ColumnPtr DirectDictionary::getColumn(
     return result;
 }
 
-ColumnUInt8::Ptr DirectDictionary::has(const Columns & key_columns, const DataTypes &) const
+ColumnUInt8::Ptr DirectDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
 {
     PaddedPODArray<Key> backup_storage;
-    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
 
     auto result = ColumnUInt8::create(ext::size(ids));
     auto& out = result->getData();
@@ -425,20 +317,22 @@ DirectDictionary::Attribute DirectDictionary::createAttribute(const DictionaryAt
 }
 
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void DirectDictionary::getItemsImpl(
-    const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
+    const Attribute & attribute,
+    const PaddedPODArray<Key> & ids,
+    ValueSetter && set_value,
+    DefaultValueExtractor & default_value_extractor) const
 {
     const auto rows = ext::size(ids);
 
     HashMap<Key, OutputType> value_by_key;
-    HashMap<Key, bool> value_is_null;
+    HashSet<Key> value_is_null;
 
     for (const auto row : ext::range(0, rows))
     {
         auto key = ids[row];
-        value_by_key[key] = get_default(row);
-        value_is_null[key] = false;
+        value_by_key[key] = static_cast<AttributeType>(default_value_extractor[row]);
     }
 
     std::vector<Key> to_load;
@@ -470,7 +364,7 @@ void DirectDictionary::getItemsImpl(
                 auto value = attribute_column[row_idx];
 
                 if (value.isNull())
-                    value_is_null[key] = true;
+                    value_is_null.insert(key);
                 else
                     value_by_key[key] = static_cast<OutputType>(value.get<NearestFieldType<AttributeType>>());
             }
@@ -482,7 +376,7 @@ void DirectDictionary::getItemsImpl(
     for (const auto row : ext::range(0, rows))
     {
         auto key = ids[row];
-        set_value(row, value_by_key[key], value_is_null[key]);
+        set_value(row, value_by_key[key], value_is_null.find(key) != nullptr);
     }
 
     query_count.fetch_add(rows, std::memory_order_relaxed);
diff --git a/src/Dictionaries/DirectDictionary.h b/src/Dictionaries/DirectDictionary.h
index dcfea296778..9daa8415de8 100644
--- a/src/Dictionaries/DirectDictionary.h
+++ b/src/Dictionaries/DirectDictionary.h
@@ -13,11 +13,10 @@
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
-
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
-using BlockPtr = std::shared_ptr<Block>;
 
 class DirectDictionary final : public IDictionary
 {
@@ -65,16 +64,16 @@ public:
     void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
     void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
 
-    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::simple; }
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
 
     ColumnPtr getColumn(
         const std::string& attribute_name,
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnPtr default_untyped) const override;
+        const ColumnPtr default_values_column) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -114,9 +113,12 @@ private:
 
     Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & name);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
-        const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
+        const Attribute & attribute,
+        const PaddedPODArray<Key> & ids,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
     void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 307b231071e..3f6dad9a20d 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -49,7 +49,7 @@ FlatDictionary::FlatDictionary(
 void FlatDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
 {
     const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
-    DefaultValueExtractor<UInt64> extractor(null_value, nullptr);
+    DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
 
     getItemsImpl<UInt64, UInt64>(
         *hierarchical_attribute,
@@ -118,7 +118,7 @@ ColumnPtr FlatDictionary::getColumn(
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
-    const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
 
     auto size = ids.size();
 
@@ -129,52 +129,36 @@ ColumnPtr FlatDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+        
+        const auto null_value = std::get<ValueType>(attribute.null_values);
+        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
 
-        if constexpr (std::is_same_v<AttributeType, String>)
+        auto column = ColumnProvider::getColumn(dictionary_attribute, size);
+
+        if constexpr (std::is_same_v<ValueType, StringRef>)
         {
-            auto column_string = ColumnString::create();
-            auto * out = column_string.get();
+            auto * out = column.get();
 
-            const auto & null_value = std::get<StringRef>(attribute.null_values);
-            DefaultValueExtractor<StringRef> extractor(null_value, default_values_column);
-
-            getItemsImpl<StringRef, StringRef>(
+            getItemsImpl<ValueType, ValueType>(
                 attribute,
                 ids,
                 [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-                extractor);
-
-            result = std::move(column_string);
+                default_value_extractor);
         }
         else
         {
-            using ResultColumnType
-                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
-            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
-
-            ResultColumnPtr column;
-
-            if constexpr (IsDecimalNumber<AttributeType>)
-            {
-                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
-                column = ColumnDecimal<AttributeType>::create(size, scale);
-            }
-            else if constexpr (IsNumber<AttributeType>)
-                column = ColumnVector<AttributeType>::create(size);
-
             auto & out = column->getData();
 
-            const auto null_value = std::get<AttributeType>(attribute.null_values);
-            DefaultValueExtractor<AttributeType> extractor(null_value, default_values_column);
-
-            getItemsImpl<AttributeType, AttributeType>(
-                attribute, 
+            getItemsImpl<ValueType, ValueType>(
+                attribute,
                 ids, 
                 [&](const size_t row, const auto value) { out[row] = value; },
-                extractor);
-
-            result = std::move(column);
+                default_value_extractor);
         }
+
+        result = std::move(column);
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
@@ -199,10 +183,10 @@ ColumnPtr FlatDictionary::getColumn(
 }
 
 
-ColumnUInt8::Ptr FlatDictionary::has(const Columns & key_columns, const DataTypes &) const
+ColumnUInt8::Ptr FlatDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
 {
     PaddedPODArray<Key> backup_storage;
-    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
 
     auto result = ColumnUInt8::create(ext::size(ids));
     auto& out = result->getData();
@@ -425,7 +409,7 @@ void FlatDictionary::getItemsImpl(
     const Attribute & attribute,
     const PaddedPODArray<Key> & ids,
     ValueSetter && set_value,
-    DefaultValueExtractor<AttributeType> & default_value_extractor) const
+    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
 {
     const auto & attr = std::get<ContainerType<AttributeType>>(attribute.arrays);
     const auto rows = ext::size(ids);
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index 2aa32a1d8fe..9e719a4368d 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -18,55 +18,10 @@
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
-
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
-using BlockPtr = std::shared_ptr<Block>;
-
-template <typename DefaultValue>
-class DefaultValueExtractor
-{
-public:
-    using ResultColumnType = std::conditional_t<
-        std::is_same_v<DefaultValue, StringRef>,
-        ColumnString,
-        std::conditional_t<IsDecimalNumber<DefaultValue>, ColumnDecimal<DefaultValue>, ColumnVector<DefaultValue>>>;
-
-    DefaultValueExtractor(DefaultValue default_value_, ColumnPtr default_values_)
-    {
-        if (default_values_ != nullptr)
-        {
-            if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_values_))
-            {
-                default_values = default_col;
-            }
-            else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_values_.get()))
-            {
-                using ConstColumnValue = std::conditional_t<std::is_same_v<DefaultValue, StringRef>, String, DefaultValue>;
-                default_value = std::make_optional<DefaultValue>(default_col_const->template getValue<ConstColumnValue>());
-            }
-            else
-                throw Exception{"Type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-        }
-        else
-            default_value = std::make_optional<DefaultValue>(default_value_);
-    }
-
-    DefaultValue operator[](size_t row)
-    {
-        if (default_value)
-            return *default_value;
-        
-        if constexpr (std::is_same_v<ResultColumnType, ColumnString>)
-            return default_values->getDataAt(row);
-        else
-            return default_values->getData()[row];
-    }
-private:
-    const ResultColumnType * default_values = nullptr;
-    std::optional<DefaultValue> default_value = {};
-};
 
 class FlatDictionary final : public IDictionary
 {
@@ -116,7 +71,7 @@ public:
     void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
     void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
 
-    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::simple; }
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
 
     ColumnPtr getColumn(
         const std::string& attribute_name,
@@ -125,7 +80,7 @@ public:
         const DataTypes & key_types,
         const ColumnPtr default_values_column) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -195,7 +150,10 @@ private:
 
     template <typename AttributeType, typename OutputType, typename ValueSetter>
     void getItemsImpl(
-        const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultValueExtractor<AttributeType> & default_value_extractor) const;
+        const Attribute & attribute,
+        const PaddedPODArray<Key> & ids,
+        ValueSetter && set_value,
+        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
 
     template <typename T>
     void resize(Attribute & attribute, const Key id);
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index b71ee666d9e..22bcd3a602c 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -60,12 +60,13 @@ HashedDictionary::HashedDictionary(
 void HashedDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
 {
     const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
+    DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
 
     getItemsImpl<UInt64, UInt64>(
         *hierarchical_attribute,
         ids,
         [&](const size_t row, const UInt64 value) { out[row] = value; },
-        [&](const size_t) { return null_value; });
+        extractor);
 }
 
 
@@ -138,7 +139,7 @@ ColumnPtr HashedDictionary::getColumn(
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
-    const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
 
     auto size = ids.size();
 
@@ -149,105 +150,36 @@ ColumnPtr HashedDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+ 
+        const auto null_value = std::get<ValueType>(attribute.null_values);
+        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, size);
 
         if constexpr (std::is_same_v<AttributeType, String>)
         {
-            auto column_string = ColumnString::create();
-            auto * out = column_string.get();
+            auto * out = column.get();
 
-            if (default_values_column != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_values_column))
-                {
-                    getItemsImpl<StringRef, StringRef>(
-                        attribute,
-                        ids,
-                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-                        [&](const size_t row) { return default_col->getDataAt(row); });
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_values_column.get()))
-                {
-                    const auto & def = default_col_const->template getValue<String>();
-
-                    getItemsImpl<StringRef, StringRef>(
-                        attribute,
-                        ids,
-                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-                        [&](const size_t) { return def; });
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto & null_value = std::get<StringRef>(attribute.null_values);
-
-                getItemsImpl<StringRef, StringRef>(
-                    attribute,
-                    ids,
-                    [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column_string);
+            getItemsImpl<StringRef, StringRef>(
+                attribute,
+                ids,
+                [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                default_value_extractor);
         }
         else
         {
-            using ResultColumnType
-                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
-            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
-
-            ResultColumnPtr column;
-
-            if constexpr (IsDecimalNumber<AttributeType>)
-            {
-                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
-                column = ColumnDecimal<AttributeType>::create(size, scale);
-            }
-            else if constexpr (IsNumber<AttributeType>)
-                column = ColumnVector<AttributeType>::create(size);
-
             auto & out = column->getData();
 
-            if (default_values_column != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_values_column))
-                {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_values_column.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        ids,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t) { return def; }
-                    );
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    ids,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
-                    [&](const size_t) { return null_value; }
-                );
-            }
-
-            result = std::move(column);
+            getItemsImpl<AttributeType, AttributeType>(
+                attribute,
+                ids,
+                [&](const size_t row, const auto value) { return out[row] = value; },
+                default_value_extractor);
         }
+        
+        result = std::move(column);
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
@@ -271,10 +203,10 @@ ColumnPtr HashedDictionary::getColumn(
     return result;
 }
 
-ColumnUInt8::Ptr HashedDictionary::has(const Columns & key_columns, const DataTypes &) const
+ColumnUInt8::Ptr HashedDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
 {
     PaddedPODArray<Key> backup_storage;
-    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
 
     size_t ids_count = ext::size(ids);
 
@@ -586,28 +518,34 @@ HashedDictionary::Attribute HashedDictionary::createAttribute(const DictionaryAt
 }
 
 
-template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter>
 void HashedDictionary::getItemsAttrImpl(
-    const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
+    const MapType & attr,
+    const PaddedPODArray<Key> & ids,
+    ValueSetter && set_value,
+    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
 {
     const auto rows = ext::size(ids);
 
     for (const auto i : ext::range(0, rows))
     {
         const auto it = attr.find(ids[i]);
-        set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : get_default(i));
+        set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : default_value_extractor[i]);
     }
 
     query_count.fetch_add(rows, std::memory_order_relaxed);
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter>
 void HashedDictionary::getItemsImpl(
-    const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
+    const Attribute & attribute,
+    const PaddedPODArray<Key> & ids,
+    ValueSetter && set_value,
+    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
 {
     if (!sparse)
-        return getItemsAttrImpl<OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, get_default);
-    return getItemsAttrImpl<OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, get_default);
+        return getItemsAttrImpl<AttributeType, OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, default_value_extractor);
+    return getItemsAttrImpl<AttributeType, OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, default_value_extractor);
 }
 
 
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index 776b2f37f36..fd8bc0c10b1 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -14,6 +14,7 @@
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
+#include "DictionaryHelpers.h"
 
 /** This dictionary stores all content in a hash table in memory
   * (a separate Key -> Value map for each attribute)
@@ -22,7 +23,6 @@
 
 namespace DB
 {
-using BlockPtr = std::shared_ptr<Block>;
 
 class HashedDictionary final : public IDictionary
 {
@@ -68,7 +68,7 @@ public:
 
     void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
 
-    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::simple; }
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
 
     ColumnPtr getColumn(
         const std::string& attribute_name,
@@ -77,7 +77,7 @@ public:
         const DataTypes & key_types,
         const ColumnPtr default_values_column) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     void isInVectorVector(
         const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
@@ -182,12 +182,19 @@ private:
 
     Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
-    template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter>
     void getItemsAttrImpl(
-        const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+        const MapType & attr,
+        const PaddedPODArray<Key> & ids,
+        ValueSetter && set_value,
+        DictionaryDefaultValueExtractor<AttributeType> & extractor) const;
+
+    template <typename AttributeType, typename OutputType, typename ValueSetter>
     void getItemsImpl(
-        const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
+        const Attribute & attribute,
+        const PaddedPODArray<Key> & ids,
+        ValueSetter && set_value,
+        DictionaryDefaultValueExtractor<AttributeType> & extractor) const;
 
     template <typename T>
     bool setAttributeValueImpl(Attribute & attribute, const Key id, const T value);
diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h
index c774ab7f676..4c03b7cf2ad 100644
--- a/src/Dictionaries/IDictionary.h
+++ b/src/Dictionaries/IDictionary.h
@@ -28,13 +28,26 @@ namespace ErrorCodes
 struct IDictionaryBase;
 using DictionaryPtr = std::unique_ptr<IDictionaryBase>;
 
-enum class DictionaryIdentifierType
+/** DictionaryKeyType provides IDictionary client information about 
+  *  which key type is supported by dictionary.
+  * 
+  *  Simple is for dictionaries that support UInt64 key column.
+  * 
+  *  Complex is for dictionaries that support any combination of key columns.
+  * 
+  *  Range is for dictionary that support combination of UInt64 key column,
+  *  and Integer representable range key column.
+  */
+enum class DictionaryKeyType
 {
     simple,
     complex,
     range
 };
 
+/**
+ * Base class for Dictionaries implementation.
+ */ 
 struct IDictionaryBase : public IExternalLoadable
 {
     using Key = UInt64;
@@ -91,28 +104,32 @@ struct IDictionaryBase : public IExternalLoadable
 
     virtual bool isInjective(const std::string & attribute_name) const = 0;
 
-    virtual DictionaryIdentifierType getIdentifierType() const /* = 0; */
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-                        "Get identifier type not supported", getDictionaryID().getNameForLogs());
-    }
+    /** Subclass must provide key type that is supported by dictionary.
+      * Client will use that key type to provide valid key columns for `getColumn` and `has` functions.
+      */
+    virtual DictionaryKeyType getKeyType() const = 0;
 
+    /** Subclass must validate key columns and keys types 
+      * and return column representation of dictionary attribute.
+      * 
+      * Parameter default_values_column must be used to provide default values
+      * for keys that are not in dictionary. If null pointer is passed, 
+      * then default attribute value must be used.
+      */
     virtual ColumnPtr getColumn(
-        const std::string & attribute_name [[maybe_unused]],
-        const DataTypePtr & result_type [[maybe_unused]],
-        const Columns & key_columns [[maybe_unused]],
-        const DataTypes & key_types [[maybe_unused]],
-        const ColumnPtr default_values_column [[maybe_unused]]) const /* = 0; */
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-                        "Get column not supported", getDictionaryID().getNameForLogs());
-    }
+        const std::string & attribute_name,
+        const DataTypePtr & result_type,
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        const ColumnPtr default_values_column) const = 0;
 
-    virtual ColumnUInt8::Ptr has(const Columns & key_columns [[maybe_unused]], const DataTypes & key_types [[maybe_unused]]) const /* = 0; */
-    {
-         throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-                        "Has not supported", getDictionaryID().getNameForLogs());
-    }
+    /** Subclass must validate key columns and key types and return ColumnUInt8 that 
+      * is bitmask representation of is key in dictionary or not. 
+      * If key is in dictionary then value of associated row will be 1, otherwise 0.
+      */
+    virtual ColumnUInt8::Ptr hasKeys(
+        const Columns & key_columns,
+        const DataTypes & key_types) const = 0;
 
     virtual BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const = 0;
 
@@ -184,43 +201,4 @@ struct IDictionary : IDictionaryBase
     }
 };
 
-/// Implicit conversions in dictGet functions is disabled.
-inline void checkAttributeType(const IDictionaryBase * dictionary, const std::string & attribute_name,
-                               AttributeUnderlyingType attribute_type, AttributeUnderlyingType to)
-{
-    if (attribute_type != to)
-        throw Exception{ErrorCodes::TYPE_MISMATCH, "{}: type mismatch: attribute {} has type {}, expected {}",
-                        dictionary->getDictionaryID().getNameForLogs(),
-                        attribute_name, toString(attribute_type), toString(to)};
-}
-
-template <typename T>
-static const PaddedPODArray<T> &
-getColumnDataAsPaddedPODArray(const IDictionaryBase * dictionary, const ColumnPtr column, PaddedPODArray<T> & backup_storage)
-{
-    bool is_const_column = isColumnConst(*column);
-    auto full_column = column->convertToFullColumnIfConst();
-    auto vector_col = checkAndGetColumn<ColumnVector<T>>(full_column.get());
-
-    if (!vector_col)
-    {
-        throw Exception{ErrorCodes::TYPE_MISMATCH,
-            "{}: type mismatch: column has wrong type expected {}",
-            dictionary->getDictionaryID().getNameForLogs(),
-            "" /* TODO: Type name*/};
-    }
-
-    if (is_const_column)
-    {
-        // With type conversion and const columns we need to use backup storage here
-        auto & data = vector_col->getData();
-        backup_storage.assign(data);
-
-        return backup_storage;
-    }
-    else
-    {
-        return vector_col->getData();
-    }
-}
 }
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index 93451635cb2..468d720f69d 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -273,7 +273,7 @@ ColumnPtr IPAddressDictionary::getColumn(
     const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
-    const ColumnPtr default_untyped) const
+    const ColumnPtr default_values_column) const
 {
     validateKeyTypes(key_types);
 
@@ -288,105 +288,37 @@ ColumnPtr IPAddressDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>; 
+
+        const auto null_value = ValueType{std::get<AttributeType>(attribute.null_values)};
+        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, size);
+
 
         if constexpr (std::is_same_v<AttributeType, String>)
         {
-            auto column_string = ColumnString::create();
-            auto * out = column_string.get();
+            auto * out = column.get();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
-                {
-                    getItemsImpl<StringRef, StringRef>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-                        [&](const size_t row) { return default_col->getDataAt(row); });
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<String>();
-
-                    getItemsImpl<StringRef, StringRef>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-                        [&](const size_t) { return StringRef { def }; });
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
-
-                getItemsImpl<StringRef, StringRef>(
-                    attribute,
-                    key_columns,
-                    [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column_string);
+            getItemsImpl<ValueType, ValueType>(
+                attribute,
+                key_columns,
+                [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
+                default_value_extractor);
         }
         else
         {
-            using ResultColumnType
-                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
-            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
-
-            ResultColumnPtr column;
-
-            if constexpr (IsDecimalNumber<AttributeType>)
-            {
-                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
-                column = ColumnDecimal<AttributeType>::create(size, scale);
-            }
-            else if constexpr (IsNumber<AttributeType>)
-                column = ColumnVector<AttributeType>::create(size);
-
             auto & out = column->getData();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
-                {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t) { return def; }
-                    );
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    key_columns,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
-                    [&](const size_t) { return null_value; }
-                );
-            }
-
-            result = std::move(column);
+            getItemsImpl<ValueType, ValueType>(
+                attribute,
+                key_columns,
+                [&](const size_t row, const auto value) { return out[row] = value; },
+                default_value_extractor);
         }
+
+        result = std::move(column);
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
@@ -395,7 +327,7 @@ ColumnPtr IPAddressDictionary::getColumn(
 }
 
 
-ColumnUInt8::Ptr IPAddressDictionary::has(const Columns & key_columns, const DataTypes & key_types) const
+ColumnUInt8::Ptr IPAddressDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
 {
     validateKeyTypes(key_types);
 
@@ -705,9 +637,12 @@ const uint8_t * IPAddressDictionary::getIPv6FromOffset(const IPAddressDictionary
     return reinterpret_cast<const uint8_t *>(&ipv6_col[i * IPV6_BINARY_LENGTH]);
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter>
 void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
-    const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
+    const Attribute & attribute,
+    const Columns & key_columns,
+    ValueSetter && set_value,
+    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
 {
     const auto first_column = key_columns.front();
     const auto rows = first_column->size();
@@ -744,7 +679,7 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
                 set_value(i, static_cast<OutputType>(vec[row_idx[*found_it]]));
             }
             else
-                set_value(i, get_default(i));
+                set_value(i, default_value_extractor[i]);
         }
         return;
     }
@@ -779,13 +714,16 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
             mask_column[*found_it] == mask))
             set_value(i, static_cast<OutputType>(vec[row_idx[*found_it]]));
         else
-            set_value(i, get_default(i));
+            set_value(i, default_value_extractor[i]);
     }
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter>
 void IPAddressDictionary::getItemsImpl(
-    const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
+    const Attribute & attribute,
+    const Columns & key_columns,
+    ValueSetter && set_value,
+    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
 {
     const auto first_column = key_columns.front();
     const auto rows = first_column->size();
@@ -794,7 +732,7 @@ void IPAddressDictionary::getItemsImpl(
     if (unlikely(key_columns.size() == 2))
     {
         getItemsByTwoKeyColumnsImpl<AttributeType, OutputType>(
-            attribute, key_columns, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default));
+            attribute, key_columns, std::forward<ValueSetter>(set_value), default_value_extractor);
         query_count.fetch_add(rows, std::memory_order_relaxed);
         return;
     }
@@ -812,7 +750,7 @@ void IPAddressDictionary::getItemsImpl(
             if (found != ipNotFound())
                 set_value(i, static_cast<OutputType>(vec[*found]));
             else
-                set_value(i, get_default(i));
+                set_value(i, default_value_extractor[i]);
         }
     }
     else
@@ -827,7 +765,7 @@ void IPAddressDictionary::getItemsImpl(
             if (found != ipNotFound())
                 set_value(i, static_cast<OutputType>(vec[*found]));
             else
-                set_value(i, get_default(i));
+                set_value(i, default_value_extractor[i]);
         }
     }
 
diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h
index 3d9565c47ce..0694724aaca 100644
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@@ -16,6 +16,7 @@
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
@@ -61,16 +62,16 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::complex; }
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
 
     ColumnPtr getColumn(
         const std::string& attribute_name,
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnPtr default_untyped) const override;
+        const ColumnPtr default_values_column) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -140,13 +141,19 @@ private:
 
     Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter>
     void getItemsByTwoKeyColumnsImpl(
-        const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
+        const Attribute & attribute,
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-    void
-    getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
+    template <typename AttributeType, typename OutputType, typename ValueSetter>
+    void getItemsImpl(
+        const Attribute & attribute,
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
 
     template <typename T>
     void setAttributeValueImpl(Attribute & attribute, const T value);
diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp
index c1969ea7b35..15af4680d9d 100644
--- a/src/Dictionaries/PolygonDictionary.cpp
+++ b/src/Dictionaries/PolygonDictionary.cpp
@@ -99,120 +99,50 @@ ColumnPtr IPolygonDictionary::getColumn(
     const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes &,
-    const ColumnPtr default_untyped) const
+    const ColumnPtr default_values_column) const
 {
-    /// TODO: Validate input types
-
     ColumnPtr result;
 
     const auto index = getAttributeIndex(attribute_name);
     const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-    auto size = key_columns.front()->size();
+    auto keys_size = key_columns.front()->size();
 
     auto type_call = [&](const auto &dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+ 
+        const auto null_value = static_cast<ValueType>(std::get<AttributeType>(null_values[index]));
+        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
         if constexpr (std::is_same_v<AttributeType, String>)
         {
             auto column_string = ColumnString::create();
-            auto * out = column_string.get();
+            auto * out = column.get();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
-                {
-                    getItemsImpl<String, StringRef>(
-                        index,
-                        key_columns,
-                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-                        [&](const size_t row) { return default_col->getDataAt(row); });
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<String>();
-
-                    getItemsImpl<String, StringRef>(
-                        index,
-                        key_columns,
-                        [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
-                        [&](const size_t) { return def; });
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto & null_value = StringRef{std::get<String>(null_values[index])};
-
-                getItemsImpl<String, StringRef>(
-                    index,
-                    key_columns,
-                    [&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column_string);
+            getItemsImpl<String, StringRef>(
+                index,
+                key_columns,
+                [&](const size_t, const StringRef & value) { out->insertData(value.data, value.size); },
+                default_value_extractor);
         }
         else
         {
-            using ResultColumnType
-                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
-            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
-
-            ResultColumnPtr column;
-
-            if constexpr (IsDecimalNumber<AttributeType>)
-            {
-                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
-                column = ColumnDecimal<AttributeType>::create(size, scale);
-            }
-            else if constexpr (IsNumber<AttributeType>)
-                column = ColumnVector<AttributeType>::create(size);
-
             auto & out = column->getData();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
-                {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        index,
-                        key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        index,
-                        key_columns,
-                        [&](const size_t row, const auto value) { return out[row] = value; },
-                        [&](const size_t) { return def; }
-                    );
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(null_values[index]);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    index,
-                    key_columns,
-                    [&](const size_t row, const auto value) { return out[row] = value; },
-                    [&](const size_t) { return null_value; }
-                );
-            }
-
-            result = std::move(column);
+            getItemsImpl<AttributeType, AttributeType>(
+                index,
+                key_columns,
+                [&](const size_t row, const auto value) { return out[row] = value; },
+                default_value_extractor);
         }
+
+        result = std::move(column);
     };
 
     callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
@@ -383,7 +313,7 @@ std::vector<IPolygonDictionary::Point> IPolygonDictionary::extractPoints(const C
     return result;
 }
 
-ColumnUInt8::Ptr IPolygonDictionary::has(const Columns & key_columns, const DataTypes &) const
+ColumnUInt8::Ptr IPolygonDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
 {
     auto size = key_columns.front()->size();
     auto result = ColumnUInt8::create(size);
@@ -410,9 +340,12 @@ size_t IPolygonDictionary::getAttributeIndex(const std::string & attribute_name)
     return it->second;
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void IPolygonDictionary::getItemsImpl(
-        size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
+        size_t attribute_ind,
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const
 {
     const auto points = extractPoints(key_columns);
 
@@ -428,7 +361,7 @@ void IPolygonDictionary::getItemsImpl(
         id = ids[id];
         if (!found)
         {
-            set_value(i, static_cast<OutputType>(get_default(i)));
+            set_value(i, static_cast<OutputType>(default_value_extractor[i]));
             continue;
         }
         if constexpr (std::is_same<AttributeType, String>::value)
diff --git a/src/Dictionaries/PolygonDictionary.h b/src/Dictionaries/PolygonDictionary.h
index f7b7d92fd27..a0ea189c10a 100644
--- a/src/Dictionaries/PolygonDictionary.h
+++ b/src/Dictionaries/PolygonDictionary.h
@@ -12,6 +12,7 @@
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
@@ -78,16 +79,16 @@ public:
 
     bool isInjective(const std::string & attribute_name) const override;
 
-    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::complex; }
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
 
     ColumnPtr getColumn(
         const std::string& attribute_name,
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnPtr default_untyped) const override;
+        const ColumnPtr default_values_column) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
 
@@ -142,8 +143,12 @@ private:
     void appendNullValue(AttributeUnderlyingType type, const Field & value);
 
     /** Helper function for retrieving the value of an attribute by key. */
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
-    void getItemsImpl(size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
+    void getItemsImpl(
+        size_t attribute_ind,
+        const Columns & key_columns,
+        ValueSetter && set_value,
+        DefaultValueExtractor & default_value_extractor) const;
 
     /** A mapping from the names of the attributes to their index in the two vectors defined below. */
     std::map<std::string, size_t> attribute_index_by_name;
diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp
index 02fb71ca3d7..a1f5c7a68fb 100644
--- a/src/Dictionaries/RangeHashedDictionary.cpp
+++ b/src/Dictionaries/RangeHashedDictionary.cpp
@@ -93,16 +93,14 @@ ColumnPtr RangeHashedDictionary::getColumn(
     const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
-    const ColumnPtr default_untyped) const
+    const ColumnPtr default_values_column) const
 {
-    /// TODO: Validate input types
-
     ColumnPtr result;
 
     const auto & attribute = getAttribute(attribute_name);
     const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
 
-    auto size = key_columns.front()->size();
+    auto keys_size = key_columns.front()->size();
 
     /// Cast second column to storage type
     Columns modified_key_columns = key_columns;
@@ -117,7 +115,7 @@ ColumnPtr RangeHashedDictionary::getColumn(
     ColumnUInt8::Container * vec_null_map_to = nullptr;
     if (attribute.is_nullable)
     {
-        col_null_map_to = ColumnUInt8::create(size, false);
+        col_null_map_to = ColumnUInt8::create(keys_size, false);
         vec_null_map_to = &col_null_map_to->getData();
     }
 
@@ -125,152 +123,46 @@ ColumnPtr RangeHashedDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>; 
+
+        const auto null_value = std::get<ValueType>(attribute.null_values);
+        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
         if constexpr (std::is_same_v<AttributeType, String>)
         {
-            auto column_string = ColumnString::create();
-            auto * out = column_string.get();
+            auto * out = column.get();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
-                {
-                    getItemsImpl<StringRef, StringRef>(
-                        attribute,
-                        modified_key_columns,
-                        [&](const size_t row, const StringRef value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
+            getItemsImpl<ValueType, ValueType>(
+                attribute,
+                modified_key_columns,
+                [&](const size_t row, const StringRef value, bool is_null) {
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
 
-                            out->insertData(value.data, value.size);
-                        },
-                        [&](const size_t row) { return default_col->getDataAt(row); });
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<String>();
-
-                    getItemsImpl<StringRef, StringRef>(
-                        attribute,
-                        modified_key_columns,
-                        [&](const size_t row, const StringRef value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
-
-                            out->insertData(value.data, value.size);
-                        },
-                        [&](const size_t) { return def; });
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto & null_value = std::get<StringRef>(attribute.null_values);
-
-                getItemsImpl<StringRef, StringRef>(
-                    attribute,
-                    modified_key_columns,
-                    [&](const size_t row, const StringRef value, bool is_null)
-                    {
-                        if (attribute.is_nullable)
-                        {
-                            (*vec_null_map_to)[row] = is_null;
-                        }
-
-                        out->insertData(value.data, value.size);
-                    },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column_string);
+                    out->insertData(value.data, value.size);
+                },
+                default_value_extractor);
         }
         else
         {
-            using ResultColumnType
-                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
-            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
-
-            ResultColumnPtr column;
-
-            if constexpr (IsDecimalNumber<AttributeType>)
-            {
-                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
-                column = ColumnDecimal<AttributeType>::create(size, scale);
-            }
-            else if constexpr (IsNumber<AttributeType>)
-                column = ColumnVector<AttributeType>::create(size);
-
             auto & out = column->getData();
 
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
-                {
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        modified_key_columns,
-                        [&](const size_t row, const auto value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
+            getItemsImpl<ValueType, ValueType>(
+                attribute,
+                modified_key_columns,
+                [&](const size_t row, const auto value, bool is_null) {
+                    if (attribute.is_nullable)
+                        (*vec_null_map_to)[row] = is_null;
 
-                            out[row] = value;
-                        },
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsImpl<AttributeType, AttributeType>(
-                        attribute,
-                        modified_key_columns,
-                        [&](const size_t row, const auto value, bool is_null)
-                        {
-                            if (attribute.is_nullable)
-                            {
-                                (*vec_null_map_to)[row] = is_null;
-                            }
-
-                            out[row] = value;
-                        },
-                        [&](const size_t) { return def; }
-                    );
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(attribute.null_values);
-
-                getItemsImpl<AttributeType, AttributeType>(
-                    attribute,
-                    modified_key_columns,
-                    [&](const size_t row, const auto value, bool is_null)
-                    {
-                        if (attribute.is_nullable)
-                        {
-                            (*vec_null_map_to)[row] = is_null;
-                        }
-
-                        out[row] = value;
-                    },
-                    [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column);
+                    out[row] = value;
+                },
+                default_value_extractor);
         }
+
+        result = std::move(column);
     };
 
     callOnDictionaryAttributeType(attribute.type, type_call);
@@ -283,7 +175,7 @@ ColumnPtr RangeHashedDictionary::getColumn(
     return result;
 }
 
-ColumnUInt8::Ptr RangeHashedDictionary::has(const Columns &, const DataTypes &) const
+ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns &, const DataTypes &) const
 {
     throw Exception(ErrorCodes::NOT_IMPLEMENTED,
         "Has not supported", getDictionaryID().getNameForLogs());
@@ -421,18 +313,18 @@ RangeHashedDictionary::createAttribute(const DictionaryAttribute& attribute, con
     return attr;
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter>
 void RangeHashedDictionary::getItemsImpl(
     const Attribute & attribute,
     const Columns & key_columns,
     ValueSetter && set_value,
-    DefaultGetter && get_default) const
+    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
 {
     PaddedPODArray<Key> key_backup_storage;
     PaddedPODArray<RangeStorageType> range_backup_storage;
 
-    const PaddedPODArray<Key> & ids = getColumnDataAsPaddedPODArray(this, key_columns[0], key_backup_storage);
-    const PaddedPODArray<RangeStorageType> & dates = getColumnDataAsPaddedPODArray(this, key_columns[1], range_backup_storage);
+    const PaddedPODArray<Key> & ids = getColumnVectorData(this, key_columns[0], key_backup_storage);
+    const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, key_columns[1], range_backup_storage);
 
     const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
 
@@ -443,11 +335,13 @@ void RangeHashedDictionary::getItemsImpl(
         {
             const auto date = dates[row];
             const auto & ranges_and_values = it->getMapped();
-            const auto val_it
-                = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
-                  {
-                      return v.range.contains(date);
-                  });
+            const auto val_it = std::find_if(
+                std::begin(ranges_and_values),
+                std::end(ranges_and_values),
+                [date](const Value<AttributeType> & v)
+                {
+                    return v.range.contains(date);
+                });
 
             if (val_it != std::end(ranges_and_values))
             {
@@ -456,16 +350,16 @@ void RangeHashedDictionary::getItemsImpl(
                 if (value)
                     set_value(row, static_cast<OutputType>(*value), false); // NOLINT
                 else
-                    set_value(row, get_default(row), true);
+                    set_value(row, default_value_extractor[row], true);
             }
             else
             {
-                set_value(row, get_default(row), false);
+                set_value(row, default_value_extractor[row], false);
             }
         }
         else
         {
-            set_value(row, get_default(row), false);
+            set_value(row, default_value_extractor[row], false);
         }
     }
 
diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h
index 1a3bfd65d33..8252ee24cd5 100644
--- a/src/Dictionaries/RangeHashedDictionary.h
+++ b/src/Dictionaries/RangeHashedDictionary.h
@@ -1,5 +1,10 @@
 #pragma once
 
+#include <atomic>
+#include <memory>
+#include <variant>
+#include <optional>
+
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Common/HashTable/HashMap.h>
@@ -7,11 +12,7 @@
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
-
-#include <atomic>
-#include <memory>
-#include <variant>
-#include <optional>
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
@@ -53,16 +54,16 @@ public:
         return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
     }
 
-    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::range; }
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::range; }
 
     ColumnPtr getColumn(
         const std::string& attribute_name,
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnPtr default_untyped) const override;
+        const ColumnPtr default_values_column) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     using RangeStorageType = Int64;
 
@@ -151,12 +152,12 @@ private:
 
     Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter>
     void getItemsImpl(
         const Attribute & attribute,
         const Columns & key_columns,
         ValueSetter && set_value,
-        DefaultGetter && get_default) const;
+        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
 
     template <typename T>
     void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value);
diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp
index 642e956f97e..c064ccb0e63 100644
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@@ -446,7 +446,7 @@ void SSDCachePartition::flush()
 
 template <typename Out, typename GetDefault>
 void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODArray<UInt64> & ids,
-    ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
+    ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
     std::chrono::system_clock::time_point now) const
 {
     auto set_value = [&](const size_t index, ReadBuffer & buf)
@@ -457,7 +457,7 @@ void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODAr
         if (metadata.expiresAt() > now)
         {
             if (metadata.isDefault())
-                out[index] = get_default(index);
+                out[index] = default_value_extractor[index];
             else
             {
                 ignoreFromBufferToAttributeIndex(attribute_index, buf);
@@ -1333,12 +1333,13 @@ ColumnPtr SSDCacheDictionary::getColumn(
     const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes &,
-    const ColumnPtr default_untyped) const
+    const ColumnPtr default_values_column) const
 {
     ColumnPtr result;
 
     PaddedPODArray<Key> backup_storage;
-    const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    const auto & ids = getColumnVectorData(this, key_columns.front(), backup_storage);
+    auto keys_size = ids.size();
 
     const auto index = getAttributeIndex(attribute_name);
     const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
@@ -1347,94 +1348,25 @@ ColumnPtr SSDCacheDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        auto identifiers_size = ids.size();
+        const auto null_value = static_cast<ValueType>(std::get<AttributeType>(null_values[index]));
+        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
         if constexpr (std::is_same_v<AttributeType, String>)
         {
-            auto column_string = ColumnString::create();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
-                {
-                    getItemsStringImpl(index, ids, column_string.get(), [&](const size_t row) { return default_col->getDataAt(row); });
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<String>();
-
-                    getItemsStringImpl(index, ids, column_string.get(), [&](const size_t) { return StringRef{def}; });
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = StringRef{std::get<String>(null_values[index])};
-
-                getItemsStringImpl(index, ids, column_string.get(), [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column_string);
+            getItemsStringImpl(index, ids, column.get(), default_value_extractor);
         }
         else
         {
-            using ResultColumnType
-                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
-            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
-
-            ResultColumnPtr column;
-
-            if constexpr (IsDecimalNumber<AttributeType>)
-            {
-                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
-                column = ColumnDecimal<AttributeType>::create(identifiers_size, scale);
-            }
-            else if constexpr (IsNumber<AttributeType>)
-                column = ColumnVector<AttributeType>::create(identifiers_size);
-
             auto & out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
-                {
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        index,
-                        ids,
-                        out,
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        index,
-                        ids,
-                        out,
-                        [&](const size_t) { return def; }
-                    );
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(null_values[index]);
-
-                getItemsNumberImpl<AttributeType, AttributeType>(
-                    index,
-                    ids,
-                    out,
-                    [&](const size_t) { return null_value; }
-                );
-            }
-
-            result = std::move(column);
+            getItemsNumberImpl<AttributeType, AttributeType>(index, ids, out, default_value_extractor);
         }
+
+        result = std::move(column);
     };
 
     callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
@@ -1444,12 +1376,15 @@ ColumnPtr SSDCacheDictionary::getColumn(
 
 template <typename AttributeType, typename OutputType, typename DefaultGetter>
 void SSDCacheDictionary::getItemsNumberImpl(
-        const size_t attribute_index, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
+        const size_t attribute_index,
+        const PaddedPODArray<Key> & ids,
+        ResultArrayType<OutputType> & out,
+        DefaultGetter & default_value_extractor) const
 {
     const auto now = std::chrono::system_clock::now();
 
     std::unordered_map<Key, std::vector<size_t>> not_found_ids;
-    storage.getValue<OutputType>(attribute_index, ids, out, not_found_ids, get_default, now);
+    storage.getValue<OutputType>(attribute_index, ids, out, not_found_ids, default_value_extractor, now);
     if (not_found_ids.empty())
         return;
 
@@ -1467,14 +1402,17 @@ void SSDCacheDictionary::getItemsNumberImpl(
             [&](const size_t id)
             {
                 for (const size_t row : not_found_ids[id])
-                    out[row] = get_default(row);
+                    out[row] = default_value_extractor[row];
             },
             getLifetime());
 }
 
 template <typename DefaultGetter>
-void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const PaddedPODArray<Key> & ids,
-        ColumnString * out, DefaultGetter && get_default) const
+void SSDCacheDictionary::getItemsStringImpl(
+    const size_t attribute_index,
+    const PaddedPODArray<Key> & ids,
+    ColumnString * out,
+    DefaultGetter & default_value_extractor) const
 {
     const auto now = std::chrono::system_clock::now();
 
@@ -1493,7 +1431,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
         {
             if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
             {
-                auto to_insert = get_default(row);
+                auto to_insert = default_value_extractor[row];
                 out->insertData(to_insert.data, to_insert.size);
                 ++default_index;
             }
@@ -1524,7 +1462,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
         const auto & id = ids[row];
         if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
         {
-            auto to_insert = get_default(row);
+            auto to_insert = default_value_extractor[row];
             out->insertData(to_insert.data, to_insert.size);
             ++default_index;
         }
@@ -1538,16 +1476,16 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const
         }
         else
         {
-            auto to_insert = get_default(row);
+            auto to_insert = default_value_extractor[row];
             out->insertData(to_insert.data, to_insert.size);
         }
     }
 }
 
-ColumnUInt8::Ptr SSDCacheDictionary::has(const Columns & key_columns, const DataTypes &) const
+ColumnUInt8::Ptr SSDCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes &) const
 {
     PaddedPODArray<Key> backup_storage;
-    const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
+    const auto& ids = getColumnVectorData(this, key_columns.front(), backup_storage);
 
     auto result = ColumnUInt8::create(ext::size(ids));
     auto& out = result->getData();
diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h
index 158bf38cf42..4d4d3befa22 100644
--- a/src/Dictionaries/SSDCacheDictionary.h
+++ b/src/Dictionaries/SSDCacheDictionary.h
@@ -2,11 +2,15 @@
 
 #if defined(__linux__) || defined(__FreeBSD__)
 
-#include "DictionaryStructure.h"
-#include "IDictionary.h"
-#include "IDictionarySource.h"
 #include <atomic>
 #include <chrono>
+#include <list>
+#include <shared_mutex>
+#include <variant>
+#include <vector>
+
+#include <Poco/Logger.h>
+
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Common/ArenaWithFreeLists.h>
@@ -16,12 +20,11 @@
 #include <Core/Block.h>
 #include <Dictionaries/BucketCache.h>
 #include <IO/HashingWriteBuffer.h>
-#include <list>
 #include <pcg_random.hpp>
-#include <Poco/Logger.h>
-#include <shared_mutex>
-#include <variant>
-#include <vector>
+#include "DictionaryStructure.h"
+#include "IDictionary.h"
+#include "IDictionarySource.h"
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
@@ -109,7 +112,7 @@ public:
 
     template <typename Out, typename GetDefault>
     void getValue(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
-            ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
+            ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
             std::chrono::system_clock::time_point now) const;
 
     void getString(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
@@ -232,7 +235,7 @@ public:
     template <typename Out, typename GetDefault>
     void getValue(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
             ResultArrayType<Out> & out, std::unordered_map<Key, std::vector<size_t>> & not_found,
-            GetDefault & get_default, std::chrono::system_clock::time_point now) const;
+            GetDefault & default_value_extractor, std::chrono::system_clock::time_point now) const;
 
     void getString(size_t attribute_index, const PaddedPODArray<UInt64> & ids,
             StringRefs & refs, ArenaWithFreeLists & arena, std::unordered_map<Key, std::vector<size_t>> & not_found,
@@ -351,16 +354,16 @@ public:
 
     std::exception_ptr getLastException() const override { return storage.getLastException(); }
 
-    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::simple; }
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::simple; }
 
     ColumnPtr getColumn(
         const std::string& attribute_name,
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnPtr default_untyped) const override;
+        const ColumnPtr default_values_column) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     template <typename T>
     using ResultArrayType = SSDCacheStorage::ResultArrayType<T>;
@@ -377,11 +380,17 @@ private:
 
     template <typename AttributeType, typename OutputType, typename DefaultGetter>
     void getItemsNumberImpl(
-            size_t attribute_index, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
+        size_t attribute_index,
+        const PaddedPODArray<Key> & ids,
+        ResultArrayType<OutputType> & out,
+        DefaultGetter & default_value_extractor) const;
 
     template <typename DefaultGetter>
-    void getItemsStringImpl(size_t attribute_index, const PaddedPODArray<Key> & ids,
-            ColumnString * out, DefaultGetter && get_default) const;
+    void getItemsStringImpl(
+        size_t attribute_index,
+        const PaddedPODArray<Key> & ids,
+        ColumnString * out,
+        DefaultGetter & default_value_extractor) const;
 
     const std::string name;
     const DictionaryStructure dict_struct;
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
index 5372e0e1afb..1bff34b7cb3 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
@@ -462,8 +462,12 @@ void SSDComplexKeyCachePartition::flush()
 
 template <typename Out, typename GetDefault>
 void SSDComplexKeyCachePartition::getValue(
-    const size_t attribute_index, const Columns & key_columns, const DataTypes & key_types,
-    ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
+    const size_t attribute_index,
+    const Columns & key_columns,
+    const DataTypes & key_types,
+    ResultArrayType<Out> & out,
+    std::vector<bool> & found,
+    GetDefault & default_value_extractor,
     std::chrono::system_clock::time_point now) const
 {
     auto set_value = [&](const size_t index, ReadBuffer & buf)
@@ -475,7 +479,7 @@ void SSDComplexKeyCachePartition::getValue(
         if (metadata.expiresAt() > now)
         {
             if (metadata.isDefault())
-                out[index] = get_default(index);
+                out[index] = default_value_extractor[index];
             else
             {
                 ignoreFromBufferToAttributeIndex(attribute_index, buf);
@@ -521,7 +525,7 @@ void SSDComplexKeyCachePartition::getString(const size_t attribute_index,
     getImpl(key_columns, key_types, set_value, found);
 }
 
-void SSDComplexKeyCachePartition::has(
+void SSDComplexKeyCachePartition::hasKeys(
     const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
     std::vector<bool> & found, std::chrono::system_clock::time_point now) const
 {
@@ -1019,7 +1023,7 @@ void SSDComplexKeyCacheStorage::getString(
     hit_count.fetch_add(n - count_not_found, std::memory_order_release);
 }
 
-void SSDComplexKeyCacheStorage::has(
+void SSDComplexKeyCacheStorage::hasKeys(
     const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
     std::unordered_map<KeyRef, std::vector<size_t>> & not_found,
     TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const
@@ -1032,7 +1036,7 @@ void SSDComplexKeyCacheStorage::has(
     {
         std::shared_lock lock(rw_lock);
         for (const auto & partition : partitions)
-            partition->has(key_columns, key_types, out, found, now);
+            partition->hasKeys(key_columns, key_types, out, found, now);
     }
 
     size_t count_not_found = 0;
@@ -1382,7 +1386,7 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
     const DataTypePtr & result_type,
     const Columns & key_columns,
     const DataTypes & key_types,
-    const ColumnPtr default_untyped) const
+    const ColumnPtr default_values_column) const
 {
     ColumnPtr result;
 
@@ -1398,95 +1402,31 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
 
+        using ValueType = DictionaryValueType<AttributeType>;
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
+
+        const auto null_value = static_cast<ValueType>(std::get<AttributeType>(null_values[index]));
+        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
+
         if constexpr (std::is_same_v<AttributeType, String>)
         {
-            auto column_string = ColumnString::create();
-            auto * out = column_string.get();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ColumnString>(*default_untyped))
-                {
-                    getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t row) { return default_col->getDataAt(row); });
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<String>();
-
-                    getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return StringRef{def}; });
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = StringRef{std::get<String>(null_values[index])};
-
-                getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return null_value; });
-            }
-
-            result = std::move(column_string);
+            auto * out = column.get();
+            getItemsStringImpl(index, key_columns, key_types, out, default_value_extractor);
         }
         else
         {
-            using ResultColumnType
-                = std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<AttributeType>, ColumnVector<AttributeType>>;
-            using ResultColumnPtr = typename ResultColumnType::MutablePtr;
-
-            ResultColumnPtr column;
-
-            if constexpr (IsDecimalNumber<AttributeType>)
-            {
-                auto scale = getDecimalScale(*dictionary_attribute.nested_type);
-                column = ColumnDecimal<AttributeType>::create(keys_size, scale);
-            }
-            else if constexpr (IsNumber<AttributeType>)
-                column = ColumnVector<AttributeType>::create(keys_size);
-
             auto & out = column->getData();
-
-            if (default_untyped != nullptr)
-            {
-                if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_untyped))
-                {
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        index,
-                        key_columns,
-                        key_types,
-                        out,
-                        [&](const size_t row) { return default_col->getData()[row]; }
-                    );
-                }
-                else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_untyped.get()))
-                {
-                    const auto & def = default_col_const->template getValue<AttributeType>();
-
-                    getItemsNumberImpl<AttributeType, AttributeType>(
-                        index,
-                        key_columns,
-                        key_types,
-                        out,
-                        [&](const size_t) { return def; }
-                    );
-                }
-                else
-                    throw Exception{full_name + ": type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
-            }
-            else
-            {
-                const auto null_value = std::get<AttributeType>(null_values[index]); /* NOLINT */
-
-                getItemsNumberImpl<AttributeType, AttributeType>(
-                    index,
-                    key_columns,
-                    key_types,
-                    out,
-                    [&](const size_t) { return null_value; }
-                );
-            }
-
-            result = std::move(column);
+            getItemsNumberImpl<AttributeType, AttributeType>(
+                index,
+                key_columns,
+                key_types,
+                out,
+                default_value_extractor);
         }
+
+        result = std::move(column);
     };
 
     callOnDictionaryAttributeType(dict_struct.attributes[index].underlying_type, type_call);
@@ -1494,11 +1434,13 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
     return result;
 }
 
-template <typename AttributeType, typename OutputType, typename DefaultGetter>
+template <typename AttributeType, typename OutputType>
 void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
     const size_t attribute_index,
-    const Columns & key_columns, const DataTypes & key_types,
-    ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
+    const Columns & key_columns,
+    const DataTypes & key_types,
+    ResultArrayType<OutputType> & out,
+    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
 {
     assert(dict_struct.key);
     assert(key_columns.size() == key_types.size());
@@ -1509,7 +1451,7 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
 
     TemporalComplexKeysPool not_found_pool;
     std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
-    storage.getValue<OutputType>(attribute_index, key_columns, key_types, out, not_found_keys, not_found_pool, get_default, now);
+    storage.getValue<OutputType>(attribute_index, key_columns, key_types, out, not_found_keys, not_found_pool, default_value_extractor, now);
     if (not_found_keys.empty())
         return;
 
@@ -1536,18 +1478,17 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
             [&](const auto key)
             {
                 for (const size_t row : not_found_keys[key])
-                    out[row] = get_default(row);
+                    out[row] = default_value_extractor[row];
             },
             getLifetime());
 }
 
-template <typename DefaultGetter>
 void SSDComplexKeyCacheDictionary::getItemsStringImpl(
     const size_t attribute_index,
     const Columns & key_columns,
     const DataTypes & key_types,
     ColumnString * out,
-    DefaultGetter && get_default) const
+    DictionaryDefaultValueExtractor<StringRef> & default_value_extractor) const
 {
     dict_struct.validateKeyTypes(key_types);
 
@@ -1573,7 +1514,7 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
         {
             if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
             {
-                auto to_insert = get_default(row);
+                auto to_insert = default_value_extractor[row];
                 out->insertData(to_insert.data, to_insert.size);
                 ++default_index;
             }
@@ -1616,7 +1557,7 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
         SCOPE_EXIT(tmp_keys_pool.rollback(key));
         if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row))
         {
-            auto to_insert = get_default(row);
+            auto to_insert = default_value_extractor[row];
             out->insertData(to_insert.data, to_insert.size);
             ++default_index;
         }
@@ -1630,13 +1571,13 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
         }
         else
         {
-            auto to_insert = get_default(row);
+            auto to_insert = default_value_extractor[row];
             out->insertData(to_insert.data, to_insert.size);
         }
     }
 }
 
-ColumnUInt8::Ptr SSDComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes & key_types) const
+ColumnUInt8::Ptr SSDComplexKeyCacheDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
 {
     dict_struct.validateKeyTypes(key_types);
 
@@ -1652,7 +1593,7 @@ ColumnUInt8::Ptr SSDComplexKeyCacheDictionary::has(const Columns & key_columns,
 
     std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
     TemporalComplexKeysPool not_found_pool;
-    storage.has(key_columns, key_types, out, not_found_keys, not_found_pool, now);
+    storage.hasKeys(key_columns, key_types, out, not_found_keys, not_found_pool, now);
     if (not_found_keys.empty())
         return result;
 
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
index a12a9789c45..9e33e1dbdfb 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
@@ -2,11 +2,13 @@
 
 #if defined(OS_LINUX) || defined(__FreeBSD__)
 
-#include "DictionaryStructure.h"
-#include "IDictionary.h"
-#include "IDictionarySource.h"
 #include <atomic>
 #include <chrono>
+#include <list>
+#include <shared_mutex>
+#include <variant>
+#include <vector>
+#include <Poco/Logger.h>
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Common/Arena.h>
@@ -19,13 +21,11 @@
 #include <Dictionaries/BucketCache.h>
 #include <ext/scope_guard.h>
 #include <IO/HashingWriteBuffer.h>
-#include <list>
 #include <pcg_random.hpp>
-#include <Poco/Logger.h>
-#include <shared_mutex>
-#include <variant>
-#include <vector>
-
+#include "IDictionary.h"
+#include "IDictionarySource.h"
+#include "DictionaryStructure.h"
+#include "DictionaryHelpers.h"
 
 namespace DB
 {
@@ -313,7 +313,7 @@ public:
     template <typename Out, typename GetDefault>
     void getValue(const size_t attribute_index,
             const Columns & key_columns, const DataTypes & key_types,
-            ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & get_default,
+            ResultArrayType<Out> & out, std::vector<bool> & found, GetDefault & default_value_extractor,
             std::chrono::system_clock::time_point now) const;
 
     void getString(const size_t attribute_index,
@@ -321,7 +321,7 @@ public:
             StringRefs & refs, ArenaWithFreeLists & arena, std::vector<bool> & found,
             std::vector<size_t> & default_ids, std::chrono::system_clock::time_point now) const;
 
-    void has(const Columns & key_columns, const DataTypes & key_types,
+    void hasKeys(const Columns & key_columns, const DataTypes & key_types,
             ResultArrayType<UInt8> & out, std::vector<bool> & found,
             std::chrono::system_clock::time_point now) const;
 
@@ -459,7 +459,7 @@ public:
             TemporalComplexKeysPool & not_found_pool,
             std::vector<size_t> & default_ids, std::chrono::system_clock::time_point now) const;
 
-    void has(const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
+    void hasKeys(const Columns & key_columns, const DataTypes & key_types, ResultArrayType<UInt8> & out,
             std::unordered_map<KeyRef, std::vector<size_t>> & not_found,
             TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const;
 
@@ -569,16 +569,16 @@ public:
 
     std::exception_ptr getLastException() const override { return storage.getLastException(); }
 
-    DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::complex; }
+    DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; }
 
     ColumnPtr getColumn(
         const std::string& attribute_name,
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnPtr default_untyped) const override;
+        const ColumnPtr default_values_column) const override;
 
-    ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
+    ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
     template <typename T>
     using ResultArrayType = SSDComplexKeyCacheStorage::ResultArrayType<T>;
@@ -593,17 +593,20 @@ private:
     AttributeValueVariant createAttributeNullValueWithType(const AttributeUnderlyingType type, const Field & null_value);
     void createAttributes();
 
-    template <typename AttributeType, typename OutputType, typename DefaultGetter>
+    template <typename AttributeType, typename OutputType>
     void getItemsNumberImpl(
         const size_t attribute_index,
-        const Columns & key_columns, const DataTypes & key_types,
-        ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        ResultArrayType<OutputType> & out,
+        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
 
-    template <typename DefaultGetter>
     void getItemsStringImpl(
         const size_t attribute_index,
-        const Columns & key_columns, const DataTypes & key_types,
-        ColumnString * out, DefaultGetter && get_default) const;
+        const Columns & key_columns,
+        const DataTypes & key_types,
+        ColumnString * out,
+        DictionaryDefaultValueExtractor<StringRef> & default_value_extractor) const;
 
     const std::string name;
     const DictionaryStructure dict_struct;
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 2130771251a..e48265aa2b7 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -192,21 +192,21 @@ private:
             return result_type->createColumn();
 
         auto dictionary = helper.getDictionary(arguments[0]);
-        auto dictionary_identifier_type = dictionary->getIdentifierType();
+        auto dictionary_key_type = dictionary->getKeyType();
 
         const ColumnWithTypeAndName & key_column_with_type = arguments[1];
         const auto key_column = key_column_with_type.column;
         const auto key_column_type = WhichDataType(key_column_with_type.type);
 
-        if (dictionary_identifier_type == DictionaryIdentifierType::simple)
+        if (dictionary_key_type == DictionaryKeyType::simple)
         {
             if (!key_column_type.isUInt64())
                 throw Exception{"Second argument of function " + getName() + " must be " + dictionary->getStructure().getKeyDescription(),
                     ErrorCodes::TYPE_MISMATCH};
 
-            return dictionary->has({key_column}, {std::make_shared<DataTypeUInt64>()});
+            return dictionary->hasKeys({key_column}, {std::make_shared<DataTypeUInt64>()});
         }
-        else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
+        else if (dictionary_key_type == DictionaryKeyType::complex)
         {
             if (!key_column_type.isTuple())
                 throw Exception{"Second argument of function " + getName() + " must be " + dictionary->getStructure().getKeyDescription(),
@@ -218,7 +218,7 @@ private:
             const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_column_full).getColumnsCopy();
             const auto & key_types = static_cast<const DataTypeTuple &>(*key_column_with_type.type).getElements();
 
-            return dictionary->has(key_columns, key_types);
+            return dictionary->hasKeys(key_columns, key_types);
         }
         else
             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Has not supported for range dictionary", dictionary->getDictionaryID().getNameForLogs());
@@ -296,16 +296,14 @@ public:
                     + getName() + ", must be UInt64 or tuple(...).",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
 
-        auto dictionary_identifier_type = dictionary->getIdentifierType();
+        auto dictionary_key_type = dictionary->getKeyType();
 
         size_t current_arguments_index = 3;
 
-        /// TODO: Add more information to error messages
-
         ColumnPtr range_col = nullptr;
         DataTypePtr range_col_type = nullptr;
 
-        if (dictionary_identifier_type == DictionaryIdentifierType::range)
+        if (dictionary_key_type == DictionaryKeyType::range)
         {
             if (current_arguments_index >= arguments.size())
                 throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
@@ -336,11 +334,11 @@ public:
         const ColumnWithTypeAndName & key_col_with_type = arguments[2];
         const auto key_column = key_col_with_type.column;
 
-        if (dictionary_identifier_type == DictionaryIdentifierType::simple)
+        if (dictionary_key_type == DictionaryKeyType::simple)
         {
             result = dictionary->getColumn(attribute_name, result_type, {key_column}, {std::make_shared<DataTypeUInt64>()}, default_col);
         }
-        else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
+        else if (dictionary_key_type == DictionaryKeyType::complex)
         {
             /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
             ColumnPtr key_column_full = key_col_with_type.column->convertToFullColumnIfConst();
@@ -350,7 +348,7 @@ public:
 
             result = dictionary->getColumn(attribute_name, result_type, key_columns, key_types, default_col);
         }
-        else if (dictionary_identifier_type == DictionaryIdentifierType::range)
+        else if (dictionary_key_type == DictionaryKeyType::range)
         {
             result = dictionary->getColumn(
                 attribute_name, result_type, {key_column, range_col}, {std::make_shared<DataTypeUInt64>(), range_col_type}, default_col);

From b7a150cc6311beadda6ae9b8633068195fb41c16 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sat, 23 Jan 2021 19:47:33 +0300
Subject: [PATCH 0246/1238] Updated DictionaryDefaultValueExtractor interface

---
 src/Dictionaries/CacheDictionary.cpp          | 15 ++---
 src/Dictionaries/CacheDictionary.h            |  6 +-
 .../ComplexKeyCacheDictionary.cpp             | 15 ++---
 src/Dictionaries/ComplexKeyCacheDictionary.h  |  6 +-
 .../ComplexKeyDirectDictionary.cpp            | 10 +--
 .../ComplexKeyHashedDictionary.cpp            | 11 ++--
 src/Dictionaries/ComplexKeyHashedDictionary.h |  4 +-
 src/Dictionaries/DictionaryBlockInputStream.h |  2 +-
 src/Dictionaries/DictionaryHelpers.h          | 65 +++++++++----------
 src/Dictionaries/DirectDictionary.cpp         |  9 +--
 src/Dictionaries/FlatDictionary.cpp           | 13 ++--
 src/Dictionaries/FlatDictionary.h             |  4 +-
 src/Dictionaries/HashedDictionary.cpp         | 17 ++---
 src/Dictionaries/HashedDictionary.h           |  8 +--
 src/Dictionaries/IDictionary.h                | 31 +++++----
 src/Dictionaries/IPAddressDictionary.cpp      | 14 ++--
 src/Dictionaries/IPAddressDictionary.h        |  8 +--
 src/Dictionaries/PolygonDictionary.cpp        |  7 +-
 src/Dictionaries/RangeHashedDictionary.cpp    | 17 +++--
 src/Dictionaries/RangeHashedDictionary.h      |  4 +-
 src/Dictionaries/SSDCacheDictionary.cpp       |  5 +-
 .../SSDComplexKeyCacheDictionary.cpp          | 12 ++--
 .../SSDComplexKeyCacheDictionary.h            |  6 +-
 23 files changed, 145 insertions(+), 144 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index eac2cd1b41d..ee9408e0c91 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -271,11 +271,10 @@ ColumnPtr CacheDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
-        using ValueType = DictionaryValueType<AttributeType>;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
- 
-        const auto null_value = static_cast<ValueType>(std::get<AttributeType>(attribute.null_value));
-        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        const auto null_value = std::get<AttributeType>(attribute.null_value);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
@@ -297,12 +296,12 @@ ColumnPtr CacheDictionary::getColumn(
     return result;
 }
 
-template <typename AttributeType, typename OutputType>
+template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
 void CacheDictionary::getItemsNumberImpl(
-    Attribute & attribute, 
+    Attribute & attribute,
     const PaddedPODArray<Key> & ids,
     ResultArrayType<OutputType> & out,
-    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
+    DefaultValueExtractor & default_value_extractor) const
 {
     /// First fill everything with default values
     const auto rows = ext::size(ids);
@@ -427,7 +426,7 @@ void CacheDictionary::getItemsString(
     Attribute & attribute,
     const PaddedPODArray<Key> & ids,
     ColumnString * out,
-    DictionaryDefaultValueExtractor<StringRef> & default_value_extractor) const
+    DictionaryDefaultValueExtractor<String> & default_value_extractor) const
 {
     const auto rows = ext::size(ids);
 
diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h
index 9f0b3fa4f63..35d38f03cbe 100644
--- a/src/Dictionaries/CacheDictionary.h
+++ b/src/Dictionaries/CacheDictionary.h
@@ -204,18 +204,18 @@ private:
     /* NOLINTNEXTLINE(readability-convert-member-functions-to-static) */
     Attribute createAttributeWithTypeAndName(const AttributeUnderlyingType type, const String & name, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType>
+    template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
     void getItemsNumberImpl(
         Attribute & attribute,
         const PaddedPODArray<Key> & ids,
         ResultArrayType<OutputType> & out,
-        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
+        DefaultValueExtractor & default_value_extractor) const;
 
     void getItemsString(
         Attribute & attribute,
         const PaddedPODArray<Key> & ids,
         ColumnString * out,
-        DictionaryDefaultValueExtractor<StringRef> & default_value_extractor) const;
+        DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
 
     PaddedPODArray<Key> getCachedIds() const;
 
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index 6775f2b737b..ff5a0ed9356 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -91,11 +91,10 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
-        using ValueType = DictionaryValueType<AttributeType>;
-        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>; 
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        const auto null_value = ValueType{std::get<AttributeType>(attribute.null_values)};
-        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+        const auto null_value = std::get<AttributeType>(attribute.null_values);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
@@ -248,12 +247,12 @@ ColumnUInt8::Ptr ComplexKeyCacheDictionary::hasKeys(const Columns & key_columns,
 }
 
 
-template <typename AttributeType, typename OutputType>
+template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
 void ComplexKeyCacheDictionary::getItemsNumberImpl(
-    Attribute & attribute, 
+    Attribute & attribute,
     const Columns & key_columns,
     PaddedPODArray<OutputType> & out,
-    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
+    DefaultValueExtractor & default_value_extractor) const
 {
     /// Mapping: <key> -> { all indices `i` of `key_columns` such that `key_columns[i]` = <key> }
     MapType<std::vector<size_t>> outdated_keys;
@@ -335,7 +334,7 @@ void ComplexKeyCacheDictionary::getItemsString(
     Attribute & attribute,
     const Columns & key_columns,
     ColumnString * out,
-    DictionaryDefaultValueExtractor<StringRef> & default_value_extractor) const
+    DictionaryDefaultValueExtractor<String> & default_value_extractor) const
 {
     const auto rows_num = key_columns.front()->size();
     /// save on some allocations
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.h b/src/Dictionaries/ComplexKeyCacheDictionary.h
index de6fb508e5a..48ffcb814ef 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.h
@@ -175,18 +175,18 @@ private:
 
     Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType>
+    template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
     void getItemsNumberImpl(
         Attribute & attribute,
         const Columns & key_columns,
         PaddedPODArray<OutputType> & out,
-        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
+        DefaultValueExtractor & default_value_extractor) const;
 
     void getItemsString(
         Attribute & attribute,
         const Columns & key_columns,
         ColumnString * out,
-        DictionaryDefaultValueExtractor<StringRef> & default_value_extractor) const;
+        DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
 
     template <typename PresentKeyHandler, typename AbsentKeyHandler>
     void update(
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.cpp b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
index 6186b8b7dae..391b5c47980 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.cpp
@@ -64,9 +64,10 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
         using AttributeType = typename Type::AttributeType;
         using ValueType = DictionaryValueType<AttributeType>;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
- 
-        const auto null_value = std::get<ValueType>(attribute.null_values);
-        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
+        AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
@@ -94,7 +95,8 @@ ColumnPtr ComplexKeyDirectDictionary::getColumn(
             getItemsImpl<AttributeType, AttributeType>(
                 attribute,
                 key_columns,
-                [&](const size_t row, const auto value, bool is_null) {
+                [&](const size_t row, const auto value, bool is_null)
+                {
                     if (attribute.is_nullable)
                         (*vec_null_map_to)[row] = is_null;
 
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index 76eaad1dcbc..01cc089b99a 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -65,10 +65,11 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
         using ValueType = DictionaryValueType<AttributeType>;
-        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>; 
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        const auto null_value = std::get<ValueType>(attribute.null_values);
-        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+        const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
+        AttributeType null_value = attribute_null_value;
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
@@ -370,12 +371,12 @@ ComplexKeyHashedDictionary::createAttribute(const DictionaryAttribute & attribut
 }
 
 
-template <typename AttributeType, typename OutputType, typename ValueSetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void ComplexKeyHashedDictionary::getItemsImpl(
     const Attribute & attribute,
     const Columns & key_columns,
     ValueSetter && set_value,
-    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
+    DefaultValueExtractor & default_value_extractor) const
 {
     const auto & attr = std::get<ContainerType<AttributeType>>(attribute.maps);
 
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.h b/src/Dictionaries/ComplexKeyHashedDictionary.h
index 63458c3c759..6acb6600189 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.h
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.h
@@ -140,12 +140,12 @@ private:
 
     Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
         const Attribute & attribute,
         const Columns & key_columns,
         ValueSetter && set_value,
-        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
     bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
diff --git a/src/Dictionaries/DictionaryBlockInputStream.h b/src/Dictionaries/DictionaryBlockInputStream.h
index b5ab19acb6f..f045d47c2c2 100644
--- a/src/Dictionaries/DictionaryBlockInputStream.h
+++ b/src/Dictionaries/DictionaryBlockInputStream.h
@@ -233,7 +233,7 @@ Block DictionaryBlockInputStream<Key>::fillBlock(
             else
             {
                 column = dictionary->getColumn(
-                    attribute.name, 
+                    attribute.name,
                     attribute.type,
                     keys,
                     data_types,
diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h
index 001a2a804b7..587a4dc8f5a 100644
--- a/src/Dictionaries/DictionaryHelpers.h
+++ b/src/Dictionaries/DictionaryHelpers.h
@@ -10,11 +10,16 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int TYPE_MISMATCH;
+}
+
 /**
  * In Dictionaries implementation String attribute is stored in arena and StringRefs are pointing to it.
  */
 template <typename DictionaryAttributeType>
-using DictionaryValueType = 
+using DictionaryValueType =
     std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, StringRef, DictionaryAttributeType>;
 
 /**
@@ -24,9 +29,9 @@ template <typename DictionaryAttributeType>
 class DictionaryAttributeColumnProvider
 {
 public:
-    using ColumnType = 
+    using ColumnType =
         std::conditional_t<std::is_same_v<DictionaryAttributeType, String>, ColumnString,
-            std::conditional_t<IsDecimalNumber<DictionaryAttributeType>, ColumnDecimal<DictionaryAttributeType>, 
+            std::conditional_t<IsDecimalNumber<DictionaryAttributeType>, ColumnDecimal<DictionaryAttributeType>,
                 ColumnVector<DictionaryAttributeType>>>;
 
     using ColumnPtr = typename ColumnType::MutablePtr;
@@ -50,66 +55,60 @@ public:
 };
 
 /**
- * DictionaryDefaultValueExtractor used to simplify getting default value for IDictionary function `getColumn`.
- * Provides interface for getting default value with operator[];
- * 
- * If default_values_column is not null in constructor than this column values will be used as default values.
- * If default_values_column is null then attribute_default_value will be used.
+  * DictionaryDefaultValueExtractor used to simplify getting default value for IDictionary function `getColumn`.
+  * Provides interface for getting default value with operator[];
+  *
+  * If default_values_column is null then attribute_default_value will be used.
+  * If default_values_column is not null in constructor than this column values will be used as default values.
  */
-template <typename DefaultValueType>
+template <typename DictionaryAttributeType>
 class DictionaryDefaultValueExtractor
 {
-    using ResultColumnType = 
-        std::conditional_t< std::is_same_v<DefaultValueType, StringRef>, ColumnString,
-            std::conditional_t<IsDecimalNumber<DefaultValueType>, ColumnDecimal<DefaultValueType>,
-                ColumnVector<DefaultValueType>>>;
+    using DefaultColumnType = typename DictionaryAttributeColumnProvider<DictionaryAttributeType>::ColumnType;
 
 public:
-    DictionaryDefaultValueExtractor(DefaultValueType attribute_default_value, ColumnPtr default_values_column_ = nullptr)
+    DictionaryDefaultValueExtractor(DictionaryAttributeType attribute_default_value, ColumnPtr default_values_column_ = nullptr)
     {
-        if (default_values_column_ != nullptr)
+        if (!default_values_column_)
+            default_value = { std::move(attribute_default_value) };
+        else
         {
-            if (const auto * const default_col = checkAndGetColumn<ResultColumnType>(*default_values_column))
+            if (const auto * const default_col = checkAndGetColumn<DefaultColumnType>(*default_values_column))
             {
                 default_values_column = default_col;
             }
-            else if (const auto * const default_col_const = checkAndGetColumnConst<ResultColumnType>(default_values_column_.get()))
+            else if (const auto * const default_col_const = checkAndGetColumnConst<DefaultColumnType>(default_values_column_.get()))
             {
-                /// TODO: Check String lifetime safety
-                /// DefaultValueType for StringColumn is StringRef, but const column getValue will return String
-                using ConstColumnValue = std::conditional_t<std::is_same_v<DefaultValueType, StringRef>, String, DefaultValueType>;
-                default_value = std::make_optional<DefaultValueType>(default_col_const->template getValue<ConstColumnValue>());
+                default_value = { default_col_const->template getValue<DictionaryAttributeType>() };
             }
             else
-                throw Exception{"Type of default column is not the same as result type.", ErrorCodes::TYPE_MISMATCH};
+                throw Exception{"Type of default column is not the same as dictionary attribute type.", ErrorCodes::TYPE_MISMATCH};
         }
-        else
-            default_value = std::make_optional<DefaultValueType>(attribute_default_value);
     }
 
-    DefaultValueType operator[](size_t row)
+    DictionaryValueType<DictionaryAttributeType> operator[](size_t row)
     {
         if (default_value)
-            return *default_value;
-        
-        if constexpr (std::is_same_v<ResultColumnType, ColumnString>)
+            return static_cast<DictionaryAttributeType>(*default_value);
+
+        if constexpr (std::is_same_v<DefaultColumnType, ColumnString>)
             return default_values_column->getDataAt(row);
         else
             return default_values_column->getData()[row];
     }
 private:
-    const ResultColumnType * default_values_column = nullptr;
-    std::optional<DefaultValueType> default_value = {};
+    const DefaultColumnType * default_values_column = nullptr;
+    std::optional<DictionaryAttributeType> default_value = {};
 };
 
 /**
- * Returns ColumnVector data as PaddedPodArray. 
- * 
+ * Returns ColumnVector data as PaddedPodArray.
+
  * If column is constant parameter backup_storage is used to store values.
  */
 template <typename T>
 static const PaddedPODArray<T> & getColumnVectorData(
-    const IDictionaryBase * dictionary [[maybe_unused]],
+    const IDictionaryBase * dictionary,
     const ColumnPtr column,
     PaddedPODArray<T> & backup_storage)
 {
diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp
index 0d767f322f7..b61f256b0cc 100644
--- a/src/Dictionaries/DirectDictionary.cpp
+++ b/src/Dictionaries/DirectDictionary.cpp
@@ -39,7 +39,7 @@ void DirectDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<
 {
     const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
     DictionaryDefaultValueExtractor<UInt64> extractor(null_value);
-    
+
     getItemsImpl<UInt64, UInt64>(
         *hierarchical_attribute,
         ids,
@@ -166,9 +166,10 @@ ColumnPtr DirectDictionary::getColumn(
 
         using ValueType = DictionaryValueType<AttributeType>;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
- 
-        const auto null_value = std::get<ValueType>(attribute.null_values);
-        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
+        AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index 3f6dad9a20d..f4f50a69598 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -131,9 +131,10 @@ ColumnPtr FlatDictionary::getColumn(
         using AttributeType = typename Type::AttributeType;
         using ValueType = DictionaryValueType<AttributeType>;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
-        
-        const auto null_value = std::get<ValueType>(attribute.null_values);
-        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
+        AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, size);
 
@@ -153,7 +154,7 @@ ColumnPtr FlatDictionary::getColumn(
 
             getItemsImpl<ValueType, ValueType>(
                 attribute,
-                ids, 
+                ids,
                 [&](const size_t row, const auto value) { out[row] = value; },
                 default_value_extractor);
         }
@@ -404,12 +405,12 @@ FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttrib
 }
 
 
-template <typename AttributeType, typename OutputType, typename ValueSetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void FlatDictionary::getItemsImpl(
     const Attribute & attribute,
     const PaddedPODArray<Key> & ids,
     ValueSetter && set_value,
-    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
+    DefaultValueExtractor & default_value_extractor) const
 {
     const auto & attr = std::get<ContainerType<AttributeType>>(attribute.arrays);
     const auto rows = ext::size(ids);
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index 9e719a4368d..90123ce08bf 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -148,12 +148,12 @@ private:
 
     Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
         const Attribute & attribute,
         const PaddedPODArray<Key> & ids,
         ValueSetter && set_value,
-        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
     void resize(Attribute & attribute, const Key id);
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 22bcd3a602c..5a9f9a02066 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -152,9 +152,10 @@ ColumnPtr HashedDictionary::getColumn(
         using AttributeType = typename Type::AttributeType;
         using ValueType = DictionaryValueType<AttributeType>;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
- 
-        const auto null_value = std::get<ValueType>(attribute.null_values);
-        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
+        AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, size);
 
@@ -178,7 +179,7 @@ ColumnPtr HashedDictionary::getColumn(
                 [&](const size_t row, const auto value) { return out[row] = value; },
                 default_value_extractor);
         }
-        
+
         result = std::move(column);
     };
 
@@ -518,12 +519,12 @@ HashedDictionary::Attribute HashedDictionary::createAttribute(const DictionaryAt
 }
 
 
-template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter>
+template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter, typename DefaultValueExtractor>
 void HashedDictionary::getItemsAttrImpl(
     const MapType & attr,
     const PaddedPODArray<Key> & ids,
     ValueSetter && set_value,
-    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
+    DefaultValueExtractor & default_value_extractor) const
 {
     const auto rows = ext::size(ids);
 
@@ -536,12 +537,12 @@ void HashedDictionary::getItemsAttrImpl(
     query_count.fetch_add(rows, std::memory_order_relaxed);
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void HashedDictionary::getItemsImpl(
     const Attribute & attribute,
     const PaddedPODArray<Key> & ids,
     ValueSetter && set_value,
-    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
+    DefaultValueExtractor & default_value_extractor) const
 {
     if (!sparse)
         return getItemsAttrImpl<AttributeType, OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, default_value_extractor);
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index fd8bc0c10b1..97b329a8b25 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -182,19 +182,19 @@ private:
 
     Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter>
+    template <typename AttributeType, typename OutputType, typename MapType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsAttrImpl(
         const MapType & attr,
         const PaddedPODArray<Key> & ids,
         ValueSetter && set_value,
-        DictionaryDefaultValueExtractor<AttributeType> & extractor) const;
+        DefaultValueExtractor & default_value_extractor) const;
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
         const Attribute & attribute,
         const PaddedPODArray<Key> & ids,
         ValueSetter && set_value,
-        DictionaryDefaultValueExtractor<AttributeType> & extractor) const;
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
     bool setAttributeValueImpl(Attribute & attribute, const Key id, const T value);
diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h
index 4c03b7cf2ad..e0e4c7eb880 100644
--- a/src/Dictionaries/IDictionary.h
+++ b/src/Dictionaries/IDictionary.h
@@ -22,21 +22,20 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
-    extern const int TYPE_MISMATCH;
 }
 
 struct IDictionaryBase;
 using DictionaryPtr = std::unique_ptr<IDictionaryBase>;
 
-/** DictionaryKeyType provides IDictionary client information about 
-  *  which key type is supported by dictionary.
-  * 
-  *  Simple is for dictionaries that support UInt64 key column.
-  * 
-  *  Complex is for dictionaries that support any combination of key columns.
-  * 
-  *  Range is for dictionary that support combination of UInt64 key column,
-  *  and Integer representable range key column.
+/** DictionaryKeyType provides IDictionary client information about
+  * which key type is supported by dictionary.
+  *
+  * Simple is for dictionaries that support UInt64 key column.
+  *
+  * Complex is for dictionaries that support any combination of key columns.
+  *
+  * Range is for dictionary that support combination of UInt64 key column,
+  * and numeric representable range key column.
   */
 enum class DictionaryKeyType
 {
@@ -47,7 +46,7 @@ enum class DictionaryKeyType
 
 /**
  * Base class for Dictionaries implementation.
- */ 
+ */
 struct IDictionaryBase : public IExternalLoadable
 {
     using Key = UInt64;
@@ -109,11 +108,11 @@ struct IDictionaryBase : public IExternalLoadable
       */
     virtual DictionaryKeyType getKeyType() const = 0;
 
-    /** Subclass must validate key columns and keys types 
+    /** Subclass must validate key columns and keys types
       * and return column representation of dictionary attribute.
-      * 
+      *
       * Parameter default_values_column must be used to provide default values
-      * for keys that are not in dictionary. If null pointer is passed, 
+      * for keys that are not in dictionary. If null pointer is passed,
       * then default attribute value must be used.
       */
     virtual ColumnPtr getColumn(
@@ -123,8 +122,8 @@ struct IDictionaryBase : public IExternalLoadable
         const DataTypes & key_types,
         const ColumnPtr default_values_column) const = 0;
 
-    /** Subclass must validate key columns and key types and return ColumnUInt8 that 
-      * is bitmask representation of is key in dictionary or not. 
+    /** Subclass must validate key columns and key types and return ColumnUInt8 that
+      * is bitmask representation of is key in dictionary or not.
       * If key is in dictionary then value of associated row will be 1, otherwise 0.
       */
     virtual ColumnUInt8::Ptr hasKeys(
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index 468d720f69d..b59fa85cc51 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -289,10 +289,10 @@ ColumnPtr IPAddressDictionary::getColumn(
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
         using ValueType = DictionaryValueType<AttributeType>;
-        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>; 
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        const auto null_value = ValueType{std::get<AttributeType>(attribute.null_values)};
-        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+        const auto null_value = std::get<AttributeType>(attribute.null_values);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, size);
 
@@ -637,12 +637,12 @@ const uint8_t * IPAddressDictionary::getIPv6FromOffset(const IPAddressDictionary
     return reinterpret_cast<const uint8_t *>(&ipv6_col[i * IPV6_BINARY_LENGTH]);
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
     const Attribute & attribute,
     const Columns & key_columns,
     ValueSetter && set_value,
-    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
+    DefaultValueExtractor & default_value_extractor) const
 {
     const auto first_column = key_columns.front();
     const auto rows = first_column->size();
@@ -718,12 +718,12 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
     }
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void IPAddressDictionary::getItemsImpl(
     const Attribute & attribute,
     const Columns & key_columns,
     ValueSetter && set_value,
-    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
+    DefaultValueExtractor & default_value_extractor) const
 {
     const auto first_column = key_columns.front();
     const auto rows = first_column->size();
diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h
index 0694724aaca..445cea9910f 100644
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@@ -141,19 +141,19 @@ private:
 
     Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsByTwoKeyColumnsImpl(
         const Attribute & attribute,
         const Columns & key_columns,
         ValueSetter && set_value,
-        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
+        DefaultValueExtractor & default_value_extractor) const;
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
         const Attribute & attribute,
         const Columns & key_columns,
         ValueSetter && set_value,
-        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
     void setAttributeValueImpl(Attribute & attribute, const T value);
diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp
index 15af4680d9d..497a7483808 100644
--- a/src/Dictionaries/PolygonDictionary.cpp
+++ b/src/Dictionaries/PolygonDictionary.cpp
@@ -112,11 +112,10 @@ ColumnPtr IPolygonDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
-        using ValueType = DictionaryValueType<AttributeType>;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
- 
-        const auto null_value = static_cast<ValueType>(std::get<AttributeType>(null_values[index]));
-        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+
+        const auto null_value = std::get<AttributeType>(null_values[index]);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp
index a1f5c7a68fb..9fb1a57a381 100644
--- a/src/Dictionaries/RangeHashedDictionary.cpp
+++ b/src/Dictionaries/RangeHashedDictionary.cpp
@@ -124,10 +124,11 @@ ColumnPtr RangeHashedDictionary::getColumn(
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
         using ValueType = DictionaryValueType<AttributeType>;
-        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>; 
+        using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        const auto null_value = std::get<ValueType>(attribute.null_values);
-        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+        const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
+        AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
@@ -138,7 +139,8 @@ ColumnPtr RangeHashedDictionary::getColumn(
             getItemsImpl<ValueType, ValueType>(
                 attribute,
                 modified_key_columns,
-                [&](const size_t row, const StringRef value, bool is_null) {
+                [&](const size_t row, const StringRef value, bool is_null)
+                {
                     if (attribute.is_nullable)
                         (*vec_null_map_to)[row] = is_null;
 
@@ -153,7 +155,8 @@ ColumnPtr RangeHashedDictionary::getColumn(
             getItemsImpl<ValueType, ValueType>(
                 attribute,
                 modified_key_columns,
-                [&](const size_t row, const auto value, bool is_null) {
+                [&](const size_t row, const auto value, bool is_null)
+                {
                     if (attribute.is_nullable)
                         (*vec_null_map_to)[row] = is_null;
 
@@ -313,12 +316,12 @@ RangeHashedDictionary::createAttribute(const DictionaryAttribute& attribute, con
     return attr;
 }
 
-template <typename AttributeType, typename OutputType, typename ValueSetter>
+template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
 void RangeHashedDictionary::getItemsImpl(
     const Attribute & attribute,
     const Columns & key_columns,
     ValueSetter && set_value,
-    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
+    DefaultValueExtractor & default_value_extractor) const
 {
     PaddedPODArray<Key> key_backup_storage;
     PaddedPODArray<RangeStorageType> range_backup_storage;
diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h
index 8252ee24cd5..216559089f5 100644
--- a/src/Dictionaries/RangeHashedDictionary.h
+++ b/src/Dictionaries/RangeHashedDictionary.h
@@ -152,12 +152,12 @@ private:
 
     Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
-    template <typename AttributeType, typename OutputType, typename ValueSetter>
+    template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
         const Attribute & attribute,
         const Columns & key_columns,
         ValueSetter && set_value,
-        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
+        DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
     void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value);
diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp
index c064ccb0e63..217ae6ca1f0 100644
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@@ -1348,11 +1348,10 @@ ColumnPtr SSDCacheDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
-        using ValueType = DictionaryValueType<AttributeType>;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        const auto null_value = static_cast<ValueType>(std::get<AttributeType>(null_values[index]));
-        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+        const auto null_value = std::get<AttributeType>(null_values[index]);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
index 1bff34b7cb3..a51df76c92f 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
@@ -1401,12 +1401,10 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
-
-        using ValueType = DictionaryValueType<AttributeType>;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        const auto null_value = static_cast<ValueType>(std::get<AttributeType>(null_values[index]));
-        DictionaryDefaultValueExtractor<ValueType> default_value_extractor(null_value, default_values_column);
+        const auto null_value = std::get<AttributeType>(null_values[index]);
+        DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
 
@@ -1434,13 +1432,13 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
     return result;
 }
 
-template <typename AttributeType, typename OutputType>
+template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
 void SSDComplexKeyCacheDictionary::getItemsNumberImpl(
     const size_t attribute_index,
     const Columns & key_columns,
     const DataTypes & key_types,
     ResultArrayType<OutputType> & out,
-    DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const
+    DefaultValueExtractor & default_value_extractor) const
 {
     assert(dict_struct.key);
     assert(key_columns.size() == key_types.size());
@@ -1488,7 +1486,7 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl(
     const Columns & key_columns,
     const DataTypes & key_types,
     ColumnString * out,
-    DictionaryDefaultValueExtractor<StringRef> & default_value_extractor) const
+    DictionaryDefaultValueExtractor<String> & default_value_extractor) const
 {
     dict_struct.validateKeyTypes(key_types);
 
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
index 9e33e1dbdfb..be65d823e34 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
@@ -593,20 +593,20 @@ private:
     AttributeValueVariant createAttributeNullValueWithType(const AttributeUnderlyingType type, const Field & null_value);
     void createAttributes();
 
-    template <typename AttributeType, typename OutputType>
+    template <typename AttributeType, typename OutputType, typename DefaultValueExtractor>
     void getItemsNumberImpl(
         const size_t attribute_index,
         const Columns & key_columns,
         const DataTypes & key_types,
         ResultArrayType<OutputType> & out,
-        DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
+        DefaultValueExtractor & default_value_extractor) const;
 
     void getItemsStringImpl(
         const size_t attribute_index,
         const Columns & key_columns,
         const DataTypes & key_types,
         ColumnString * out,
-        DictionaryDefaultValueExtractor<StringRef> & default_value_extractor) const;
+        DictionaryDefaultValueExtractor<String> & default_value_extractor) const;
 
     const std::string name;
     const DictionaryStructure dict_struct;

From 6653e356a353d082f28d5cfa3c53d1c385808a13 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sat, 23 Jan 2021 20:08:27 +0300
Subject: [PATCH 0247/1238] Fixed build issues

---
 src/Dictionaries/ComplexKeyHashedDictionary.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
index 01cc089b99a..a0784b5a417 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@@ -68,7 +68,7 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
         const auto attribute_null_value = std::get<ValueType>(attribute.null_values);
-        AttributeType null_value = attribute_null_value;
+        AttributeType null_value = static_cast<AttributeType>(attribute_null_value);
         DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(std::move(null_value), default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);

From 8e8d6f36ac4ace7f6fa0b7de7d874dbed7690056 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Mon, 25 Jan 2021 11:18:07 +0300
Subject: [PATCH 0248/1238] Fixed DictionaryDefaultValueExtractor

---
 src/Dictionaries/DictionaryHelpers.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h
index 587a4dc8f5a..3bc6c3bd344 100644
--- a/src/Dictionaries/DictionaryHelpers.h
+++ b/src/Dictionaries/DictionaryHelpers.h
@@ -69,11 +69,11 @@ class DictionaryDefaultValueExtractor
 public:
     DictionaryDefaultValueExtractor(DictionaryAttributeType attribute_default_value, ColumnPtr default_values_column_ = nullptr)
     {
-        if (!default_values_column_)
+        if (default_values_column_ == nullptr)
             default_value = { std::move(attribute_default_value) };
         else
         {
-            if (const auto * const default_col = checkAndGetColumn<DefaultColumnType>(*default_values_column))
+            if (const auto * const default_col = checkAndGetColumn<DefaultColumnType>(*default_values_column_))
             {
                 default_values_column = default_col;
             }

From 31881d24ad1aa7a5b09dc477d4f3107165e1720d Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Mon, 25 Jan 2021 15:58:26 +0300
Subject: [PATCH 0249/1238] Fixed clang errors

---
 src/Dictionaries/CacheDictionary.cpp              | 2 +-
 src/Dictionaries/ComplexKeyCacheDictionary.cpp    | 2 +-
 src/Dictionaries/DictionaryHelpers.h              | 6 ++++--
 src/Dictionaries/IPAddressDictionary.cpp          | 2 +-
 src/Dictionaries/PolygonDictionary.cpp            | 2 +-
 src/Dictionaries/SSDCacheDictionary.cpp           | 6 +++---
 src/Dictionaries/SSDComplexKeyCacheDictionary.cpp | 2 +-
 7 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index ee9408e0c91..ad98d69fdf9 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -273,7 +273,7 @@ ColumnPtr CacheDictionary::getColumn(
         using AttributeType = typename Type::AttributeType;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        const auto null_value = std::get<AttributeType>(attribute.null_value);
+        const auto & null_value = std::get<AttributeType>(attribute.null_value);
         DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index ff5a0ed9356..a6478e96a15 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -93,7 +93,7 @@ ColumnPtr ComplexKeyCacheDictionary::getColumn(
         using AttributeType = typename Type::AttributeType;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        const auto null_value = std::get<AttributeType>(attribute.null_values);
+        const auto & null_value = std::get<AttributeType>(attribute.null_values);
         DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h
index 3bc6c3bd344..726837b2418 100644
--- a/src/Dictionaries/DictionaryHelpers.h
+++ b/src/Dictionaries/DictionaryHelpers.h
@@ -67,6 +67,8 @@ class DictionaryDefaultValueExtractor
     using DefaultColumnType = typename DictionaryAttributeColumnProvider<DictionaryAttributeType>::ColumnType;
 
 public:
+    using DefaultValueType = DictionaryValueType<DictionaryAttributeType>;
+
     DictionaryDefaultValueExtractor(DictionaryAttributeType attribute_default_value, ColumnPtr default_values_column_ = nullptr)
     {
         if (default_values_column_ == nullptr)
@@ -86,10 +88,10 @@ public:
         }
     }
 
-    DictionaryValueType<DictionaryAttributeType> operator[](size_t row)
+    DefaultValueType operator[](size_t row)
     {
         if (default_value)
-            return static_cast<DictionaryAttributeType>(*default_value);
+            return static_cast<DefaultValueType>(*default_value);
 
         if constexpr (std::is_same_v<DefaultColumnType, ColumnString>)
             return default_values_column->getDataAt(row);
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index b59fa85cc51..fa9f8899142 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -291,7 +291,7 @@ ColumnPtr IPAddressDictionary::getColumn(
         using ValueType = DictionaryValueType<AttributeType>;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        const auto null_value = std::get<AttributeType>(attribute.null_values);
+        const auto & null_value = std::get<AttributeType>(attribute.null_values);
         DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, size);
diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp
index 497a7483808..e0d0fa0a0e6 100644
--- a/src/Dictionaries/PolygonDictionary.cpp
+++ b/src/Dictionaries/PolygonDictionary.cpp
@@ -114,7 +114,7 @@ ColumnPtr IPolygonDictionary::getColumn(
         using AttributeType = typename Type::AttributeType;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        const auto null_value = std::get<AttributeType>(null_values[index]);
+        const auto & null_value = std::get<AttributeType>(null_values[index]);
         DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp
index 217ae6ca1f0..cbeea39decb 100644
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@@ -940,14 +940,14 @@ SSDCacheStorage::~SSDCacheStorage()
 template <typename Out, typename GetDefault>
 void SSDCacheStorage::getValue(const size_t attribute_index, const PaddedPODArray<UInt64> & ids,
       ResultArrayType<Out> & out, std::unordered_map<Key, std::vector<size_t>> & not_found,
-      GetDefault & get_default, std::chrono::system_clock::time_point now) const
+      GetDefault & default_value_extractor, std::chrono::system_clock::time_point now) const
 {
     std::vector<bool> found(ids.size(), false);
 
     {
         std::shared_lock lock(rw_lock);
         for (const auto & partition : partitions)
-            partition->getValue<Out>(attribute_index, ids, out, found, get_default, now);
+            partition->getValue<Out>(attribute_index, ids, out, found, default_value_extractor, now);
     }
 
     for (size_t i = 0; i < ids.size(); ++i)
@@ -1350,7 +1350,7 @@ ColumnPtr SSDCacheDictionary::getColumn(
         using AttributeType = typename Type::AttributeType;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        const auto null_value = std::get<AttributeType>(null_values[index]);
+        const auto & null_value = std::get<AttributeType>(null_values[index]);
         DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
index a51df76c92f..cb22dd2be15 100644
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
@@ -1403,7 +1403,7 @@ ColumnPtr SSDComplexKeyCacheDictionary::getColumn(
         using AttributeType = typename Type::AttributeType;
         using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
 
-        const auto null_value = std::get<AttributeType>(null_values[index]);
+        const auto & null_value = std::get<AttributeType>(null_values[index]);
         DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(null_value, default_values_column);
 
         auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);

From 67ffa4189e9776b008e06c1fabdaf0f006170648 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Tue, 26 Jan 2021 13:47:12 +0300
Subject: [PATCH 0250/1238] Fixed clang build

---
 src/Dictionaries/ComplexKeyCacheDictionary.cpp |  2 +-
 src/Dictionaries/ComplexKeyCacheDictionary.h   |  2 +-
 src/Dictionaries/ComplexKeyDirectDictionary.h  |  4 ++--
 src/Dictionaries/ComplexKeyHashedDictionary.h  |  8 ++++----
 src/Dictionaries/DictionaryHelpers.h           | 16 +++++++++++-----
 src/Dictionaries/DictionaryStructure.cpp       |  2 +-
 src/Dictionaries/DirectDictionary.h            |  4 ++--
 src/Dictionaries/FlatDictionary.h              |  4 ++--
 src/Dictionaries/HashedDictionary.cpp          |  4 ++--
 src/Dictionaries/IPAddressDictionary.h         |  4 ++--
 src/Dictionaries/RangeHashedDictionary.h       |  9 +++++----
 11 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
index a6478e96a15..cbb57f81793 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@@ -471,7 +471,7 @@ void ComplexKeyCacheDictionary::getItemsString(
     for (const auto row : ext::range(0, ext::size(keys_array)))
     {
         const StringRef key = keys_array[row];
-        const auto it = map.find(key);
+        auto * const it = map.find(key);
         const auto string_ref = it ? it->getMapped() : default_value_extractor[row];
         out->insertData(string_ref.data, string_ref.size);
     }
diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.h b/src/Dictionaries/ComplexKeyCacheDictionary.h
index 48ffcb814ef..f5643fc799c 100644
--- a/src/Dictionaries/ComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/ComplexKeyCacheDictionary.h
@@ -96,7 +96,7 @@ public:
         const DataTypePtr & result_type,
         const Columns & key_columns,
         const DataTypes & key_types,
-        const ColumnPtr default_untyped) const override;
+        const ColumnPtr default_values_column) const override;
 
     ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
 
diff --git a/src/Dictionaries/ComplexKeyDirectDictionary.h b/src/Dictionaries/ComplexKeyDirectDictionary.h
index 11158b23c5a..0e191321daa 100644
--- a/src/Dictionaries/ComplexKeyDirectDictionary.h
+++ b/src/Dictionaries/ComplexKeyDirectDictionary.h
@@ -108,9 +108,9 @@ private:
     void addAttributeSize(const Attribute & attribute);
 
     template <typename T>
-    void createAttributeImpl(Attribute & attribute, const Field & null_value);
+    static void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value, const std::string & name);
+    static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value, const std::string & name);
 
     template <typename Pool>
     StringRef placeKeysInPool(
diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.h b/src/Dictionaries/ComplexKeyHashedDictionary.h
index 6acb6600189..ecc720ca0b0 100644
--- a/src/Dictionaries/ComplexKeyHashedDictionary.h
+++ b/src/Dictionaries/ComplexKeyHashedDictionary.h
@@ -136,9 +136,9 @@ private:
     void calculateBytesAllocated();
 
     template <typename T>
-    void createAttributeImpl(Attribute & attribute, const Field & null_value);
+    static void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value);
+    static Attribute createAttribute(const DictionaryAttribute & attribute, const Field & null_value);
 
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
@@ -148,9 +148,9 @@ private:
         DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
-    bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
+    static bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
 
-    bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
+    static bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
 
     const Attribute & getAttribute(const std::string & attribute_name) const;
 
diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h
index 726837b2418..0026d8848ca 100644
--- a/src/Dictionaries/DictionaryHelpers.h
+++ b/src/Dictionaries/DictionaryHelpers.h
@@ -70,18 +70,21 @@ public:
     using DefaultValueType = DictionaryValueType<DictionaryAttributeType>;
 
     DictionaryDefaultValueExtractor(DictionaryAttributeType attribute_default_value, ColumnPtr default_values_column_ = nullptr)
+        : default_value(std::move(attribute_default_value))
     {
         if (default_values_column_ == nullptr)
-            default_value = { std::move(attribute_default_value) };
+            use_default_value_from_column = false;
         else
         {
             if (const auto * const default_col = checkAndGetColumn<DefaultColumnType>(*default_values_column_))
             {
                 default_values_column = default_col;
+                use_default_value_from_column = true;
             }
             else if (const auto * const default_col_const = checkAndGetColumnConst<DefaultColumnType>(default_values_column_.get()))
             {
-                default_value = { default_col_const->template getValue<DictionaryAttributeType>() };
+                default_value = default_col_const->template getValue<DictionaryAttributeType>();
+                use_default_value_from_column = false;
             }
             else
                 throw Exception{"Type of default column is not the same as dictionary attribute type.", ErrorCodes::TYPE_MISMATCH};
@@ -90,8 +93,10 @@ public:
 
     DefaultValueType operator[](size_t row)
     {
-        if (default_value)
-            return static_cast<DefaultValueType>(*default_value);
+        if (!use_default_value_from_column)
+            return static_cast<DefaultValueType>(default_value);
+
+        assert(default_values_column != nullptr);
 
         if constexpr (std::is_same_v<DefaultColumnType, ColumnString>)
             return default_values_column->getDataAt(row);
@@ -99,8 +104,9 @@ public:
             return default_values_column->getData()[row];
     }
 private:
+    DictionaryAttributeType default_value;
     const DefaultColumnType * default_values_column = nullptr;
-    std::optional<DictionaryAttributeType> default_value = {};
+    bool use_default_value_from_column = false;
 };
 
 /**
diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index 5c6863da651..95c2e0a3e09 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -226,7 +226,7 @@ const DictionaryAttribute & DictionaryStructure::getAttribute(const String& attr
     if (find_iter == attributes.end())
         throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
 
-    auto & attribute = *find_iter;
+    const auto & attribute = *find_iter;
 
     if (!areTypesEqual(attribute.type, type))
         throw Exception{"Attribute type does not match, expected " + attribute.type->getName() + ", found " + type->getName(),
diff --git a/src/Dictionaries/DirectDictionary.h b/src/Dictionaries/DirectDictionary.h
index 9daa8415de8..c6f4c15556b 100644
--- a/src/Dictionaries/DirectDictionary.h
+++ b/src/Dictionaries/DirectDictionary.h
@@ -109,9 +109,9 @@ private:
     void addAttributeSize(const Attribute & attribute);
 
     template <typename T>
-    void createAttributeImpl(Attribute & attribute, const Field & null_value);
+    static void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & name);
+    static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value, const std::string & name);
 
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h
index 90123ce08bf..23bfa3d37b5 100644
--- a/src/Dictionaries/FlatDictionary.h
+++ b/src/Dictionaries/FlatDictionary.h
@@ -144,9 +144,9 @@ private:
     void calculateBytesAllocated();
 
     template <typename T>
-    void createAttributeImpl(Attribute & attribute, const Field & null_value);
+    static void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
+    static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 5a9f9a02066..b51f2414142 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -506,7 +506,7 @@ HashedDictionary::Attribute HashedDictionary::createAttribute(const DictionaryAt
     auto nullable_set = attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
     Attribute attr{attribute.underlying_type, std::move(nullable_set), {}, {}, {}, {}};
 
-    auto type_call = [&](const auto &dictionary_attribute_type)
+    auto type_call = [&, this](const auto &dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
@@ -576,7 +576,7 @@ bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, co
 {
     bool result = false;
 
-    auto type_call = [&](const auto &dictionary_attribute_type)
+    auto type_call = [&, this](const auto &dictionary_attribute_type)
     {
         using Type = std::decay_t<decltype(dictionary_attribute_type)>;
         using AttributeType = typename Type::AttributeType;
diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h
index 445cea9910f..629a850267c 100644
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@@ -137,9 +137,9 @@ private:
     void calculateBytesAllocated();
 
     template <typename T>
-    void createAttributeImpl(Attribute & attribute, const Field & null_value);
+    static void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
+    static Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
 
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsByTwoKeyColumnsImpl(
diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h
index 216559089f5..201e61df5e5 100644
--- a/src/Dictionaries/RangeHashedDictionary.h
+++ b/src/Dictionaries/RangeHashedDictionary.h
@@ -148,9 +148,10 @@ private:
     void calculateBytesAllocated();
 
     template <typename T>
-    void createAttributeImpl(Attribute & attribute, const Field & null_value);
+    static void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
+    
+    static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
@@ -160,9 +161,9 @@ private:
         DefaultValueExtractor & default_value_extractor) const;
 
     template <typename T>
-    void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value);
+    static void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value);
 
-    void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
+    static void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
 
     const Attribute & getAttribute(const std::string & attribute_name) const;
 

From f0b1bc88c72fb262572053812eb7e14293f8e056 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Tue, 26 Jan 2021 20:49:02 +0300
Subject: [PATCH 0251/1238] Fixed style issues

---
 src/Dictionaries/RangeHashedDictionary.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h
index 201e61df5e5..80cf47eb93b 100644
--- a/src/Dictionaries/RangeHashedDictionary.h
+++ b/src/Dictionaries/RangeHashedDictionary.h
@@ -150,7 +150,6 @@ private:
     template <typename T>
     static void createAttributeImpl(Attribute & attribute, const Field & null_value);
 
-    
     static Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
 
     template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultValueExtractor>

From 5dfe1c98e2fb5f20ac28ada5ffd43c6f72ff7ce7 Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Wed, 27 Jan 2021 16:25:46 +0300
Subject: [PATCH 0252/1238] Update BaseDaemon.cpp

---
 base/daemon/BaseDaemon.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp
index c51609cc171..43cb7baa10a 100644
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@@ -152,7 +152,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
     if (sig != SIGTSTP) /// This signal is used for debugging.
     {
         /// The time that is usually enough for separate thread to print info into log.
-        sleepForSeconds(10);
+        sleepForSeconds(20);  /// FIXME: use some feedback from threads that process stacktrace
         call_default_signal_handler(sig);
     }
 

From 7af28e758a5bab37e540d7e9f32a0dea23168753 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 27 Jan 2021 16:37:58 +0300
Subject: [PATCH 0253/1238] Process read requests without raft

---
 src/Common/ZooKeeper/ZooKeeperCommon.cpp  |  6 +++
 src/Common/ZooKeeper/ZooKeeperCommon.h    | 12 ++++++
 src/Coordination/NuKeeperServer.cpp       | 49 +++++++++++++----------
 src/Coordination/NuKeeperStateMachine.cpp |  6 +++
 src/Coordination/NuKeeperStateMachine.h   |  2 +
 5 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp
index 9c699ee298a..278d36f9245 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp
@@ -326,6 +326,12 @@ void ZooKeeperMultiRequest::readImpl(ReadBuffer & in)
     }
 }
 
+bool ZooKeeperMultiRequest::isReadRequest() const
+{
+    /// Possibly we can do better
+    return false;
+}
+
 void ZooKeeperMultiResponse::readImpl(ReadBuffer & in)
 {
     for (auto & response : responses)
diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h
index 9adb0c06e4c..b2c18c31798 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.h
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.h
@@ -60,6 +60,7 @@ struct ZooKeeperRequest : virtual Request
     static std::shared_ptr<ZooKeeperRequest> read(ReadBuffer & in);
 
     virtual ZooKeeperResponsePtr makeResponse() const = 0;
+    virtual bool isReadRequest() const = 0;
 };
 
 using ZooKeeperRequestPtr = std::shared_ptr<ZooKeeperRequest>;
@@ -71,6 +72,7 @@ struct ZooKeeperHeartbeatRequest final : ZooKeeperRequest
     void writeImpl(WriteBuffer &) const override {}
     void readImpl(ReadBuffer &) override {}
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return true; }
 };
 
 struct ZooKeeperHeartbeatResponse final : ZooKeeperResponse
@@ -104,6 +106,7 @@ struct ZooKeeperAuthRequest final : ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
 };
 
 struct ZooKeeperAuthResponse final : ZooKeeperResponse
@@ -122,6 +125,7 @@ struct ZooKeeperCloseRequest final : ZooKeeperRequest
     void readImpl(ReadBuffer &) override {}
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
 };
 
 struct ZooKeeperCloseResponse final : ZooKeeperResponse
@@ -146,6 +150,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
 };
 
 struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse
@@ -167,6 +172,7 @@ struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
 };
 
 struct ZooKeeperRemoveResponse final : RemoveResponse, ZooKeeperResponse
@@ -183,6 +189,7 @@ struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return !has_watch; }
 };
 
 struct ZooKeeperExistsResponse final : ExistsResponse, ZooKeeperResponse
@@ -199,6 +206,7 @@ struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return !has_watch; }
 };
 
 struct ZooKeeperGetResponse final : GetResponse, ZooKeeperResponse
@@ -217,6 +225,7 @@ struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest
     void writeImpl(WriteBuffer & out) const override;
     void readImpl(ReadBuffer & in) override;
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
 };
 
 struct ZooKeeperSetResponse final : SetResponse, ZooKeeperResponse
@@ -232,6 +241,7 @@ struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest
     void writeImpl(WriteBuffer & out) const override;
     void readImpl(ReadBuffer & in) override;
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return !has_watch; }
 };
 
 struct ZooKeeperSimpleListRequest final : ZooKeeperListRequest
@@ -261,6 +271,7 @@ struct ZooKeeperCheckRequest final : CheckRequest, ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return !has_watch; }
 };
 
 struct ZooKeeperCheckResponse final : CheckResponse, ZooKeeperResponse
@@ -290,6 +301,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest
     void readImpl(ReadBuffer & in) override;
 
     ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override;
 };
 
 struct ZooKeeperMultiResponse final : MultiResponse, ZooKeeperResponse
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 6d70eff1121..8b8288424d9 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -146,34 +146,41 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(n
 
 TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKeeperStorage::RequestsForSessions & requests)
 {
-    std::vector<nuraft::ptr<nuraft::buffer>> entries;
-    for (const auto & [session_id, request] : requests)
+    if (requests.size() == 1 && requests[0].request->isReadRequest())
     {
-        ops_mapping[session_id][request->xid] = request->makeResponse();
-        entries.push_back(getZooKeeperLogEntry(session_id, request));
+        return state_machine->processReadRequest(requests[0]);
     }
-
-    auto result = raft_instance->append_entries(entries);
-    if (!result->get_accepted())
-        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot send requests to RAFT, mostly because we are not leader, code {}, message: '{}'", result->get_result_code(), result->get_result_str());
-
-    if (result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
+    else
     {
-        TestKeeperStorage::ResponsesForSessions responses;
+        std::vector<nuraft::ptr<nuraft::buffer>> entries;
         for (const auto & [session_id, request] : requests)
         {
-            auto response = request->makeResponse();
-            response->xid = request->xid;
-            response->zxid = 0; /// FIXME what we can do with it?
-            response->error = Coordination::Error::ZOPERATIONTIMEOUT;
-            responses.push_back(DB::TestKeeperStorage::ResponseForSession{session_id, response});
+            ops_mapping[session_id][request->xid] = request->makeResponse();
+            entries.push_back(getZooKeeperLogEntry(session_id, request));
         }
-        return responses;
-    }
-    else if (result->get_result_code() != nuraft::cmd_result_code::OK)
-        throw Exception(ErrorCodes::RAFT_ERROR, "Requests result failed with code {} and message: '{}'", result->get_result_code(), result->get_result_str());
 
-    return readZooKeeperResponses(result->get());
+        auto result = raft_instance->append_entries(entries);
+        if (!result->get_accepted())
+            throw Exception(ErrorCodes::RAFT_ERROR, "Cannot send requests to RAFT, mostly because we are not leader, code {}, message: '{}'", result->get_result_code(), result->get_result_str());
+
+        if (result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
+        {
+            TestKeeperStorage::ResponsesForSessions responses;
+            for (const auto & [session_id, request] : requests)
+            {
+                auto response = request->makeResponse();
+                response->xid = request->xid;
+                response->zxid = 0; /// FIXME what we can do with it?
+                response->error = Coordination::Error::ZOPERATIONTIMEOUT;
+                responses.push_back(DB::TestKeeperStorage::ResponseForSession{session_id, response});
+            }
+            return responses;
+        }
+        else if (result->get_result_code() != nuraft::cmd_result_code::OK)
+            throw Exception(ErrorCodes::RAFT_ERROR, "Requests result failed with code {} and message: '{}'", result->get_result_code(), result->get_result_str());
+
+        return readZooKeeperResponses(result->get());
+    }
 }
 
 
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 52c82f44784..9f4572c02e0 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -223,4 +223,10 @@ int NuKeeperStateMachine::read_logical_snp_obj(
     return 0;
 }
 
+TestKeeperStorage::ResponsesForSessions NuKeeperStateMachine::processReadRequest(const TestKeeperStorage::RequestForSession & request_for_session)
+{
+    std::lock_guard lock(storage_lock);
+    return storage.processRequest(request_for_session.request, request_for_session.session_id);
+}
+
 }
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index a120e3f1cf6..368e088a2f9 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -47,6 +47,8 @@ public:
         return storage;
     }
 
+    TestKeeperStorage::ResponsesForSessions processReadRequest(const TestKeeperStorage::RequestForSession & requests);
+
 private:
     struct StorageSnapshot
     {

From e9f893d9b0c8906b00252c6fdc97ae54069f57f4 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 27 Jan 2021 16:57:26 +0300
Subject: [PATCH 0254/1238] Move query plan optimixations into separate files.

---
 src/CMakeLists.txt                            |   1 +
 .../QueryPlan/Optimizations/Optimizations.h   |  24 ++
 .../Optimizations/liftUpArrayJoin.cpp         |  79 +++++
 .../QueryPlan/Optimizations/limitPushDown.cpp | 107 +++++++
 .../Optimizations/mergeExpressions.cpp        |  60 ++++
 .../QueryPlan/Optimizations/splitFilter.cpp   |  50 ++++
 src/Processors/QueryPlan/QueryPlan.cpp        | 278 +-----------------
 src/Processors/ya.make                        |   4 +
 8 files changed, 333 insertions(+), 270 deletions(-)
 create mode 100644 src/Processors/QueryPlan/Optimizations/Optimizations.h
 create mode 100644 src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
 create mode 100644 src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
 create mode 100644 src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
 create mode 100644 src/Processors/QueryPlan/Optimizations/splitFilter.cpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4207d371c09..dba9385fe27 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -190,6 +190,7 @@ add_object_library(clickhouse_processors_sources Processors/Sources)
 add_object_library(clickhouse_processors_merges Processors/Merges)
 add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms)
 add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)
+add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations)
 
 set (DBMS_COMMON_LIBRARIES)
 # libgcc_s does not provide an implementation of an atomics library. Instead,
diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h
new file mode 100644
index 00000000000..93b4be90a98
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h
@@ -0,0 +1,24 @@
+#include <Processors/QueryPlan/QueryPlan.h>
+
+namespace DB
+{
+
+namespace QueryPlanOptimizations
+{
+
+/// Move LimitStep down if possible.
+void tryPushDownLimit(QueryPlanStepPtr & parent, QueryPlan::Node * child_node);
+
+/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
+bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
+
+/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
+/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
+bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * child_node);
+
+/// Move ARRAY JOIN up if possible.
+void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_node, QueryPlan::Nodes & nodes);
+
+}
+
+}
diff --git a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
new file mode 100644
index 00000000000..474124d970d
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
@@ -0,0 +1,79 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <Processors/QueryPlan/FilterStep.h>
+#include <Processors/QueryPlan/ExpressionStep.h>
+#include <Processors/QueryPlan/ArrayJoinStep.h>
+#include <Interpreters/ActionsDAG.h>
+#include <Interpreters/ArrayJoinAction.h>
+
+namespace DB::QueryPlanOptimizations
+{
+
+void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_node, QueryPlan::Nodes & nodes)
+{
+    auto & parent = parent_node->step;
+    auto & child = child_node->step;
+    auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
+    auto * filter_step = typeid_cast<FilterStep *>(parent.get());
+    auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
+
+    if (!(expression_step || filter_step) || !array_join_step)
+        return;
+
+    const auto & array_join = array_join_step->arrayJoin();
+    const auto & expression = expression_step ? expression_step->getExpression()
+                                              : filter_step->getExpression();
+
+    auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
+
+    /// No actions can be moved before ARRAY JOIN.
+    if (split_actions.first->empty())
+        return;
+
+    auto description = parent->getStepDescription();
+
+    /// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
+    if (split_actions.second->empty())
+    {
+        auto expected_header = parent->getOutputStream().header;
+
+        /// Expression/Filter -> ArrayJoin
+        std::swap(parent, child);
+        /// ArrayJoin -> Expression/Filter
+
+        if (expression_step)
+            child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
+                                                     std::move(split_actions.first));
+        else
+            child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
+                                                 std::move(split_actions.first),
+                                                 filter_step->getFilterColumnName(),
+                                                 filter_step->removesFilterColumn());
+
+        child->setStepDescription(std::move(description));
+
+        array_join_step->updateInputStream(child->getOutputStream(), expected_header);
+        return;
+    }
+
+    /// Add new expression step before ARRAY JOIN.
+    /// Expression/Filter -> ArrayJoin -> Something
+    auto & node = nodes.emplace_back();
+    node.children.swap(child_node->children);
+    child_node->children.emplace_back(&node);
+    /// Expression/Filter -> ArrayJoin -> node -> Something
+
+    node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
+                                                 std::move(split_actions.first));
+    node.step->setStepDescription(description);
+    array_join_step->updateInputStream(node.step->getOutputStream(), {});
+
+    if (expression_step)
+        parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
+    else
+        parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
+                                              filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
+
+    parent->setStepDescription(description + " [split]");
+}
+
+}
diff --git a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
new file mode 100644
index 00000000000..8d2f5f50fc8
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
@@ -0,0 +1,107 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <Processors/QueryPlan/ITransformingStep.h>
+#include <Processors/QueryPlan/LimitStep.h>
+#include <Processors/QueryPlan/TotalsHavingStep.h>
+#include <Processors/QueryPlan/MergingSortedStep.h>
+#include <Processors/QueryPlan/FinishSortingStep.h>
+#include <Processors/QueryPlan/MergeSortingStep.h>
+#include <Processors/QueryPlan/PartialSortingStep.h>
+#include <Common/typeid_cast.h>
+
+namespace DB::QueryPlanOptimizations
+{
+
+/// If plan looks like Limit -> Sorting, update limit for Sorting
+static bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
+{
+    if (limit == 0)
+        return false;
+
+    QueryPlanStepPtr & step = node->step;
+    QueryPlan::Node * child = nullptr;
+    bool updated = false;
+
+    if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
+    {
+        /// TODO: remove LimitStep here.
+        merging_sorted->updateLimit(limit);
+        updated = true;
+        child = node->children.front();
+    }
+    else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
+    {
+        /// TODO: remove LimitStep here.
+        finish_sorting->updateLimit(limit);
+        updated = true;
+    }
+    else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
+    {
+        merge_sorting->updateLimit(limit);
+        updated = true;
+        child = node->children.front();
+    }
+    else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
+    {
+        partial_sorting->updateLimit(limit);
+        updated = true;
+    }
+
+    /// We often have chain PartialSorting -> MergeSorting -> MergingSorted
+    /// Try update limit for them also if possible.
+    if (child)
+        tryUpdateLimitForSortingSteps(child, limit);
+
+    return updated;
+}
+
+void tryPushDownLimit(QueryPlanStepPtr & parent, QueryPlan::Node * child_node)
+{
+    auto & child = child_node->step;
+    auto * limit = typeid_cast<LimitStep *>(parent.get());
+
+    if (!limit)
+        return;
+
+    /// Skip LIMIT WITH TIES by now.
+    if (limit->withTies())
+        return;
+
+    const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
+
+    /// Skip everything which is not transform.
+    if (!transforming)
+        return;
+
+    /// Special cases for sorting steps.
+    if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
+        return;
+
+    /// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
+    if (typeid_cast<const TotalsHavingStep *>(child.get()))
+        return;
+
+    /// Now we should decide if pushing down limit possible for this step.
+
+    const auto & transform_traits = transforming->getTransformTraits();
+    const auto & data_stream_traits = transforming->getDataStreamTraits();
+
+    /// Cannot push down if child changes the number of rows.
+    if (!transform_traits.preserves_number_of_rows)
+        return;
+
+    /// Cannot push down if data was sorted exactly by child stream.
+    if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
+        return;
+
+    /// Now we push down limit only if it doesn't change any stream properties.
+    /// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
+    if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
+        return;
+
+    /// Input stream for Limit have changed.
+    limit->updateInputStream(transforming->getInputStreams().front());
+
+    parent.swap(child);
+}
+
+}
diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
new file mode 100644
index 00000000000..3bbfe0e1efb
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
@@ -0,0 +1,60 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <Processors/QueryPlan/FilterStep.h>
+#include <Processors/QueryPlan/ExpressionStep.h>
+#include <Interpreters/ActionsDAG.h>
+
+namespace DB::QueryPlanOptimizations
+{
+
+bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * child_node)
+{
+    auto & parent = parent_node->step;
+    auto & child = child_node->step;
+
+    auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
+    auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
+    auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
+
+    if (parent_expr && child_expr)
+    {
+        const auto & child_actions = child_expr->getExpression();
+        const auto & parent_actions = parent_expr->getExpression();
+
+        /// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
+        /// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
+        /// Such a query will return two zeroes if we combine actions together.
+        if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
+            return false;
+
+        auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
+
+        auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
+        expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
+
+        parent_node->step = std::move(expr);
+        parent_node->children.swap(child_node->children);
+        return true;
+    }
+    else if (parent_filter && child_expr)
+    {
+        const auto & child_actions = child_expr->getExpression();
+        const auto & parent_actions = parent_filter->getExpression();
+
+        if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
+            return false;
+
+        auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
+
+        auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
+                                                   parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
+        filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
+
+        parent_node->step = std::move(filter);
+        parent_node->children.swap(child_node->children);
+        return true;
+    }
+
+    return false;
+}
+
+}
diff --git a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
new file mode 100644
index 00000000000..fd82bd69a9e
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
@@ -0,0 +1,50 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <Processors/QueryPlan/FilterStep.h>
+#include <Processors/QueryPlan/ExpressionStep.h>
+#include <Interpreters/ActionsDAG.h>
+
+namespace DB::QueryPlanOptimizations
+{
+
+/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
+bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
+{
+    auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
+    if (!filter_step)
+        return false;
+
+    const auto & expr = filter_step->getExpression();
+
+    /// Do not split if there are function like runningDifference.
+    if (expr->hasStatefulFunctions())
+        return false;
+
+    auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
+
+    if (split.second->empty())
+        return false;
+
+    if (filter_step->removesFilterColumn())
+        split.second->removeUnusedInput(filter_step->getFilterColumnName());
+
+    auto description = filter_step->getStepDescription();
+
+    auto & filter_node = nodes.emplace_back();
+    node->children.swap(filter_node.children);
+    node->children.push_back(&filter_node);
+
+    filter_node.step = std::make_unique<FilterStep>(
+            filter_node.children.at(0)->step->getOutputStream(),
+            std::move(split.first),
+            filter_step->getFilterColumnName(),
+            filter_step->removesFilterColumn());
+
+    node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
+
+    filter_node.step->setStepDescription("(" + description + ")[split]");
+    node->step->setStepDescription(description);
+
+    return true;
+}
+
+}
diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp
index 6fba991a56c..49249e99864 100644
--- a/src/Processors/QueryPlan/QueryPlan.cpp
+++ b/src/Processors/QueryPlan/QueryPlan.cpp
@@ -6,12 +6,7 @@
 #include <Interpreters/ActionsDAG.h>
 #include <Interpreters/ArrayJoinAction.h>
 #include <stack>
-#include <Processors/QueryPlan/LimitStep.h>
-#include "MergingSortedStep.h"
-#include "FinishSortingStep.h"
-#include "MergeSortingStep.h"
-#include "PartialSortingStep.h"
-#include "TotalsHavingStep.h"
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
 #include "ExpressionStep.h"
 #include "ArrayJoinStep.h"
 #include "FilterStep.h"
@@ -341,263 +336,6 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio
     }
 }
 
-/// If plan looks like Limit -> Sorting, update limit for Sorting
-bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
-{
-    if (limit == 0)
-        return false;
-
-    QueryPlanStepPtr & step = node->step;
-    QueryPlan::Node * child = nullptr;
-    bool updated = false;
-
-    if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
-    {
-        /// TODO: remove LimitStep here.
-        merging_sorted->updateLimit(limit);
-        updated = true;
-        child = node->children.front();
-    }
-    else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
-    {
-        /// TODO: remove LimitStep here.
-        finish_sorting->updateLimit(limit);
-        updated = true;
-    }
-    else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
-    {
-        merge_sorting->updateLimit(limit);
-        updated = true;
-        child = node->children.front();
-    }
-    else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
-    {
-        partial_sorting->updateLimit(limit);
-        updated = true;
-    }
-
-    /// We often have chain PartialSorting -> MergeSorting -> MergingSorted
-    /// Try update limit for them also if possible.
-    if (child)
-        tryUpdateLimitForSortingSteps(child, limit);
-
-    return updated;
-}
-
-/// Move LimitStep down if possible.
-static void tryPushDownLimit(QueryPlanStepPtr & parent, QueryPlan::Node * child_node)
-{
-    auto & child = child_node->step;
-    auto * limit = typeid_cast<LimitStep *>(parent.get());
-
-    if (!limit)
-        return;
-
-    /// Skip LIMIT WITH TIES by now.
-    if (limit->withTies())
-        return;
-
-    const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
-
-    /// Skip everything which is not transform.
-    if (!transforming)
-        return;
-
-    /// Special cases for sorting steps.
-    if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
-        return;
-
-    /// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
-    if (typeid_cast<const TotalsHavingStep *>(child.get()))
-        return;
-
-    /// Now we should decide if pushing down limit possible for this step.
-
-    const auto & transform_traits = transforming->getTransformTraits();
-    const auto & data_stream_traits = transforming->getDataStreamTraits();
-
-    /// Cannot push down if child changes the number of rows.
-    if (!transform_traits.preserves_number_of_rows)
-        return;
-
-    /// Cannot push down if data was sorted exactly by child stream.
-    if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
-        return;
-
-    /// Now we push down limit only if it doesn't change any stream properties.
-    /// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
-    if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
-        return;
-
-    /// Input stream for Limit have changed.
-    limit->updateInputStream(transforming->getInputStreams().front());
-
-    parent.swap(child);
-}
-
-/// Move ARRAY JOIN up if possible.
-static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_node, QueryPlan::Nodes & nodes)
-{
-    auto & parent = parent_node->step;
-    auto & child = child_node->step;
-    auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
-    auto * filter_step = typeid_cast<FilterStep *>(parent.get());
-    auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
-
-    if (!(expression_step || filter_step) || !array_join_step)
-        return;
-
-    const auto & array_join = array_join_step->arrayJoin();
-    const auto & expression = expression_step ? expression_step->getExpression()
-                                              : filter_step->getExpression();
-
-    auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
-
-    /// No actions can be moved before ARRAY JOIN.
-    if (split_actions.first->empty())
-        return;
-
-    auto description = parent->getStepDescription();
-
-    /// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
-    if (split_actions.second->empty())
-    {
-        auto expected_header = parent->getOutputStream().header;
-
-        /// Expression/Filter -> ArrayJoin
-        std::swap(parent, child);
-        /// ArrayJoin -> Expression/Filter
-
-        if (expression_step)
-            child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
-                                                     std::move(split_actions.first));
-        else
-            child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
-                                                 std::move(split_actions.first),
-                                                 filter_step->getFilterColumnName(),
-                                                 filter_step->removesFilterColumn());
-
-        child->setStepDescription(std::move(description));
-
-        array_join_step->updateInputStream(child->getOutputStream(), expected_header);
-        return;
-    }
-
-    /// Add new expression step before ARRAY JOIN.
-    /// Expression/Filter -> ArrayJoin -> Something
-    auto & node = nodes.emplace_back();
-    node.children.swap(child_node->children);
-    child_node->children.emplace_back(&node);
-    /// Expression/Filter -> ArrayJoin -> node -> Something
-
-    node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
-                                                 std::move(split_actions.first));
-    node.step->setStepDescription(description);
-    array_join_step->updateInputStream(node.step->getOutputStream(), {});
-
-    if (expression_step)
-        parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
-    else
-        parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
-                                              filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
-
-    parent->setStepDescription(description + " [split]");
-}
-
-/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
-/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
-static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * child_node)
-{
-    auto & parent = parent_node->step;
-    auto & child = child_node->step;
-
-    auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
-    auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
-    auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
-
-    if (parent_expr && child_expr)
-    {
-        const auto & child_actions = child_expr->getExpression();
-        const auto & parent_actions = parent_expr->getExpression();
-
-        /// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
-        /// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
-        /// Such a query will return two zeroes if we combine actions together.
-        if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
-            return false;
-
-        auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
-
-        auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
-        expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
-
-        parent_node->step = std::move(expr);
-        parent_node->children.swap(child_node->children);
-        return true;
-    }
-    else if (parent_filter && child_expr)
-    {
-        const auto & child_actions = child_expr->getExpression();
-        const auto & parent_actions = parent_filter->getExpression();
-
-        if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
-            return false;
-
-        auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
-
-        auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
-                                                   parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
-        filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
-
-        parent_node->step = std::move(filter);
-        parent_node->children.swap(child_node->children);
-        return true;
-    }
-
-    return false;
-}
-
-/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
-static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
-{
-    auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
-    if (!filter_step)
-        return false;
-
-    const auto & expr = filter_step->getExpression();
-
-    /// Do not split if there are function like runningDifference.
-    if (expr->hasStatefulFunctions())
-        return false;
-
-    auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
-
-    if (split.second->empty())
-        return false;
-
-    if (filter_step->removesFilterColumn())
-        split.second->removeUnusedInput(filter_step->getFilterColumnName());
-
-    auto description = filter_step->getStepDescription();
-
-    auto & filter_node = nodes.emplace_back();
-    node->children.swap(filter_node.children);
-    node->children.push_back(&filter_node);
-
-    filter_node.step = std::make_unique<FilterStep>(
-            filter_node.children.at(0)->step->getOutputStream(),
-            std::move(split.first),
-            filter_step->getFilterColumnName(),
-            filter_step->removesFilterColumn());
-
-    node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
-
-    filter_node.step->setStepDescription("(" + description + ")[split]");
-    node->step->setStepDescription(description);
-
-    return true;
-}
-
 void QueryPlan::optimize()
 {
     /* Stack contains info for every nodes in the path from tree root to the current node.
@@ -623,14 +361,14 @@ void QueryPlan::optimize()
         {
             if (frame.node->children.size() == 1)
             {
-                tryPushDownLimit(frame.node->step, frame.node->children.front());
+                QueryPlanOptimizations::tryPushDownLimit(frame.node->step, frame.node->children.front());
 
-                while (tryMergeExpressions(frame.node, frame.node->children.front()));
+                while (QueryPlanOptimizations::tryMergeExpressions(frame.node, frame.node->children.front()));
 
                 if (frame.node->children.size() == 1)
-                    tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
+                    QueryPlanOptimizations::tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
 
-                trySplitFilter(frame.node, nodes);
+                QueryPlanOptimizations::trySplitFilter(frame.node, nodes);
             }
         }
 
@@ -643,11 +381,11 @@ void QueryPlan::optimize()
         {
             if (frame.node->children.size() == 1)
             {
-                while (tryMergeExpressions(frame.node, frame.node->children.front()));
+                while (QueryPlanOptimizations::tryMergeExpressions(frame.node, frame.node->children.front()));
 
-                trySplitFilter(frame.node, nodes);
+                QueryPlanOptimizations::trySplitFilter(frame.node, nodes);
 
-                tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
+                QueryPlanOptimizations::tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
             }
 
             stack.pop();
diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index 2eb27be8899..f05a9f0bdba 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -116,6 +116,10 @@ SRCS(
     QueryPlan/MergingFinal.cpp
     QueryPlan/MergingSortedStep.cpp
     QueryPlan/OffsetStep.cpp
+    QueryPlan/Optimizations/liftUpArrayJoin.cpp
+    QueryPlan/Optimizations/limitPushDown.cpp
+    QueryPlan/Optimizations/mergeExpressions.cpp
+    QueryPlan/Optimizations/splitFilter.cpp
     QueryPlan/PartialSortingStep.cpp
     QueryPlan/QueryPlan.cpp
     QueryPlan/ReadFromPreparedSource.cpp

From 36963e0ebaaf249ae7e55c1ab8712019c6b35f30 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Wed, 27 Jan 2021 18:17:42 +0300
Subject: [PATCH 0255/1238] Minor fix

---
 src/Functions/FunctionsCoding.h                     | 13 +++++--------
 .../0_stateless/00725_ipv4_ipv6_domains.reference   | 11 +++++++++++
 .../queries/0_stateless/00725_ipv4_ipv6_domains.sql |  2 ++
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h
index 38940be1c58..b54550536ff 100644
--- a/src/Functions/FunctionsCoding.h
+++ b/src/Functions/FunctionsCoding.h
@@ -263,12 +263,10 @@ public:
     static constexpr auto name = "IPv6StringToNum";
     static FunctionPtr create(const Context &) { return std::make_shared<FunctionIPv6StringToNum>(); }
 
-    static inline UInt32 parseIPv4(const char * pos)
+    static inline bool tryParseIPv4(const char * pos)
     {
         UInt32 result = 0;
-        DB::parseIPv4(pos, reinterpret_cast<unsigned char *>(&result));
-
-        return result;
+        return DB::parseIPv4(pos, reinterpret_cast<unsigned char *>(&result));
     }
 
     String getName() const override { return name; }
@@ -301,18 +299,17 @@ public:
             const ColumnString::Offsets & offsets_src = col_in->getOffsets();
             size_t src_offset = 0;
 
-            char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] = "::ffff:";
-
             for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); out_offset += IPV6_BINARY_LENGTH, ++i)
             {
                 /// For both cases below: In case of failure, the function parseIPv6 fills vec_res with zero bytes.
 
                 /// If the source IP address is parsable as an IPv4 address, then transform it into a valid IPv6 address.
                 /// Keeping it simple by just prefixing `::ffff:` to the IPv4 address to represent it as a valid IPv6 address.
-                if (parseIPv4(reinterpret_cast<const char *>(&vec_src[src_offset])))
+                if (tryParseIPv4(reinterpret_cast<const char *>(&vec_src[src_offset])))
                 {
+                    char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] = "::ffff:";
                     std::strcat(src_ipv4_buf, reinterpret_cast<const char *>(&vec_src[src_offset]));
-                    parseIPv6(reinterpret_cast<const char *>(&src_ipv4_buf), reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
+                    parseIPv6(src_ipv4_buf, reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
                 }
                 else
                 {
diff --git a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference
index 12b309316aa..bddd10ba865 100644
--- a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference
+++ b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference
@@ -49,5 +49,16 @@ FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF is ipv6 string: 	1
 ::ffff:127.0.0.1 is ipv6 string:                        	1
 ::ffff:8.8.8.8 is ipv6 string:                          	1
 2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D is ipv6 string: 	1
+::ffff:0.0.0.0
 ::ffff:127.0.0.1
 ::ffff:127.0.0.1
+::ffff:127.0.0.0
+::ffff:127.0.0.1
+::ffff:127.0.0.2
+::ffff:127.0.0.3
+::ffff:127.0.0.4
+::ffff:127.0.0.5
+::ffff:127.0.0.6
+::ffff:127.0.0.7
+::ffff:127.0.0.8
+::ffff:127.0.0.9
diff --git a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql
index c070dcfe835..39f50781a7e 100644
--- a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql
+++ b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql
@@ -86,5 +86,7 @@ SELECT '2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D is ipv6 string: ', isIPv6String(
 
 -- IPV6 functions parse IPv4 addresses.
 
+SELECT toIPv6('0.0.0.0');
 SELECT toIPv6('127.0.0.1');
 SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0);
+SELECT toIPv6('127.0.0.' || toString(number)) FROM numbers(10);

From 8070562bd00e61ab64f3a41bd1b8e7383e729da4 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 27 Jan 2021 18:24:39 +0300
Subject: [PATCH 0256/1238] Run tests in parallel in flaky check

---
 docker/test/stateless/run.sh | 11 +++++------
 tests/clickhouse-test        |  7 +++++--
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index d9a03f84726..fb510a87fcd 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -55,12 +55,11 @@ function run_tests()
         ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip')
     fi
 
-    for _ in $(seq 1 "$NUM_TRIES"); do
-        clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt
-        if [ "${PIPESTATUS[0]}" -ne "0" ]; then
-            break;
-        fi
-    done
+    clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
+            --test-runs "$NUM_TRIES" --jobs 4 \
+            "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
+        | ts '%Y-%m-%d %H:%M:%S' \
+        | tee -a test_output/test_result.txt
 }
 
 export -f run_tests
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index d5c6019d28f..70f7fe18982 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -342,9 +342,10 @@ def run_tests_array(all_tests_with_params):
                             SERVER_DIED = True
                             break
 
+                    file_suffix = ('.' + str(os.getpid())) if is_concurrent else ''
                     reference_file = os.path.join(suite_dir, name) + '.reference'
-                    stdout_file = os.path.join(suite_tmp_dir, name) + '.stdout'
-                    stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr'
+                    stdout_file = os.path.join(suite_tmp_dir, name) + file_suffix + '.stdout'
+                    stderr_file = os.path.join(suite_tmp_dir, name) + file_suffix + '.stderr'
 
                     proc, stdout, stderr, total_time = run_single_test(args, ext, server_logs_level, client_options, case_file, stdout_file, stderr_file)
 
@@ -717,6 +718,7 @@ def main(args):
             all_tests = [case for case in all_tests if is_test_from_dir(suite_dir, case)]
             if args.test:
                 all_tests = [t for t in all_tests if any([re.search(r, t) for r in args.test])]
+            all_tests = all_tests * args.test_runs
             all_tests.sort(key=key_func)
 
             jobs = args.jobs
@@ -886,6 +888,7 @@ if __name__ == '__main__':
     parser.add_argument('--database', help='Database for tests (random name test_XXXXXX by default)')
     parser.add_argument('--parallel', default='1/1', help='One parallel test run number/total')
     parser.add_argument('-j', '--jobs', default=1, nargs='?', type=int, help='Run all tests in parallel')
+    parser.add_argument('--test-runs', default=1, nargs='?', type=int, help='Run each test many times (useful for e.g. flaky check)')
     parser.add_argument('-U', '--unified', default=3, type=int, help='output NUM lines of unified context')
     parser.add_argument('-r', '--server-check-retries', default=30, type=int, help='Num of tries to execute SELECT 1 before tests started')
     parser.add_argument('--skip-list-path', help="Path to skip-list file")

From 0db606084a3924cbd0de2251a327cb8b2780ae75 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 27 Jan 2021 18:30:09 +0300
Subject: [PATCH 0257/1238] change some tests to see the effect

---
 tests/queries/0_stateless/00459_group_array_insert_at.sql | 1 +
 tests/queries/0_stateless/01014_lazy_database_basic.sh    | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/tests/queries/0_stateless/00459_group_array_insert_at.sql b/tests/queries/0_stateless/00459_group_array_insert_at.sql
index 59ecfc05045..8bc329d9103 100644
--- a/tests/queries/0_stateless/00459_group_array_insert_at.sql
+++ b/tests/queries/0_stateless/00459_group_array_insert_at.sql
@@ -1,3 +1,4 @@
+-- remove this comment before merge
 SELECT groupArrayInsertAt(toString(number), number * 2) FROM (SELECT * FROM system.numbers LIMIT 10);
 SELECT groupArrayInsertAt('-')(toString(number), number * 2) FROM (SELECT * FROM system.numbers LIMIT 10);
 SELECT groupArrayInsertAt([123])(range(number), number * 2) FROM (SELECT * FROM system.numbers LIMIT 10);
diff --git a/tests/queries/0_stateless/01014_lazy_database_basic.sh b/tests/queries/0_stateless/01014_lazy_database_basic.sh
index 11d698e764e..76cbcea6d97 100755
--- a/tests/queries/0_stateless/01014_lazy_database_basic.sh
+++ b/tests/queries/0_stateless/01014_lazy_database_basic.sh
@@ -4,6 +4,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
+# remove this comment before merge
+
 ${CLICKHOUSE_CLIENT} -n -q "DROP DATABASE IF EXISTS testlazy"
 
 ${CLICKHOUSE_CLIENT} -n -q "

From 95e15131a8e539f91a2214b60e43349922263e54 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 27 Jan 2021 20:32:59 +0300
Subject: [PATCH 0258/1238] Fix unsufficient args check (trash code) in
 StringSearcher

---
 src/Common/StringSearcher.h        | 111 +++++++++++++++++++++--------
 src/Common/Volnitsky.h             |  82 ++++++++++++++-------
 src/Functions/LowerUpperUTF8Impl.h |   1 +
 src/Functions/randomStringUTF8.cpp |  10 ++-
 4 files changed, 146 insertions(+), 58 deletions(-)

diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h
index e39253c439f..fb731bfcda3 100644
--- a/src/Common/StringSearcher.h
+++ b/src/Common/StringSearcher.h
@@ -98,15 +98,25 @@ public:
         }
         else
         {
-            const auto first_u32 = UTF8::convert(needle);
-            const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
-            const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
+            int first_u32 = UTF8::convert(needle);
 
-            /// lower and uppercase variants of the first octet of the first character in `needle`
-            UTF8::convert(first_l_u32, l_seq, sizeof(l_seq));
-            l = l_seq[0];
-            UTF8::convert(first_u_u32, u_seq, sizeof(u_seq));
-            u = u_seq[0];
+            /// Invalid UTF-8
+            if (first_u32 < 0)
+            {
+                l = needle[0];
+                u = needle[0];
+            }
+            else
+            {
+                int first_l_u32 = Poco::Unicode::toLower(first_u32);
+                int first_u_u32 = Poco::Unicode::toUpper(first_u32);
+
+                /// lower and uppercase variants of the first octet of the first character in `needle`
+                UTF8::convert(first_l_u32, l_seq, sizeof(l_seq));
+                l = l_seq[0];
+                UTF8::convert(first_u_u32, u_seq, sizeof(u_seq));
+                u = u_seq[0];
+            }
         }
 
 #ifdef __SSE4_1__
@@ -128,18 +138,21 @@ public:
                 continue;
             }
 
-            const auto src_len = UTF8::seqLength(*needle_pos);
-            const auto c_u32 = UTF8::convert(needle_pos);
+            size_t src_len = UTF8::seqLength(*needle_pos);
+            int c_u32 = UTF8::convert(needle_pos);  /// This assumes valid UTF-8 or zero byte after needle.
 
-            const auto c_l_u32 = Poco::Unicode::toLower(c_u32);
-            const auto c_u_u32 = Poco::Unicode::toUpper(c_u32);
+            if (c_u32 >= 0)
+            {
+                int c_l_u32 = Poco::Unicode::toLower(c_u32);
+                int c_u_u32 = Poco::Unicode::toUpper(c_u32);
 
-            const auto dst_l_len = static_cast<uint8_t>(UTF8::convert(c_l_u32, l_seq, sizeof(l_seq)));
-            const auto dst_u_len = static_cast<uint8_t>(UTF8::convert(c_u_u32, u_seq, sizeof(u_seq)));
+                uint8_t dst_l_len = static_cast<uint8_t>(UTF8::convert(c_l_u32, l_seq, sizeof(l_seq)));
+                uint8_t dst_u_len = static_cast<uint8_t>(UTF8::convert(c_u_u32, u_seq, sizeof(u_seq)));
 
-            /// @note Unicode standard states it is a rare but possible occasion
-            if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
-                throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER};
+                /// @note Unicode standard states it is a rare but possible occasion
+                if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
+                    throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER};
+            }
 
             cache_actual_len += src_len;
             if (cache_actual_len < n)
@@ -183,11 +196,20 @@ public:
                     pos += cache_valid_len;
                     auto needle_pos = needle + cache_valid_len;
 
-                    while (needle_pos < needle_end &&
-                           Poco::Unicode::toLower(UTF8::convert(pos)) ==
-                           Poco::Unicode::toLower(UTF8::convert(needle_pos)))
+                    while (needle_pos < needle_end)
                     {
-                        /// @note assuming sequences for lowercase and uppercase have exact same length
+                        int haystack_code_point = UTF8::convert(pos);
+                        int needle_code_point = UTF8::convert(needle_pos);
+
+                        /// Invalid UTF-8, should not compare equals
+                        if (haystack_code_point < 0 || needle_code_point < 0)
+                            break;
+
+                        /// Not equals case insensitive.
+                        if (Poco::Unicode::toLower(haystack_code_point) !=  Poco::Unicode::toLower(needle_code_point))
+                            break;
+
+                        /// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
                         const auto len = UTF8::seqLength(*pos);
                         pos += len;
                         needle_pos += len;
@@ -209,10 +231,19 @@ public:
             pos += first_needle_symbol_is_ascii;
             auto needle_pos = needle + first_needle_symbol_is_ascii;
 
-            while (needle_pos < needle_end &&
-                   Poco::Unicode::toLower(UTF8::convert(pos)) ==
-                   Poco::Unicode::toLower(UTF8::convert(needle_pos)))
+            while (needle_pos < needle_end)
             {
+                int haystack_code_point = UTF8::convert(pos);
+                int needle_code_point = UTF8::convert(needle_pos);
+
+                /// Invalid UTF-8, should not compare equals
+                if (haystack_code_point < 0 || needle_code_point < 0)
+                    break;
+
+                /// Not equals case insensitive.
+                if (Poco::Unicode::toLower(haystack_code_point) !=  Poco::Unicode::toLower(needle_code_point))
+                    break;
+
                 const auto len = UTF8::seqLength(*pos);
                 pos += len;
                 needle_pos += len;
@@ -270,11 +301,20 @@ public:
                             auto haystack_pos = haystack + cache_valid_len;
                             auto needle_pos = needle + cache_valid_len;
 
-                            while (haystack_pos < haystack_end && needle_pos < needle_end &&
-                                   Poco::Unicode::toLower(UTF8::convert(haystack_pos)) ==
-                                   Poco::Unicode::toLower(UTF8::convert(needle_pos)))
+                            while (haystack_pos < haystack_end && needle_pos < needle_end)
                             {
-                                /// @note assuming sequences for lowercase and uppercase have exact same length
+                                int haystack_code_point = UTF8::convert(haystack_pos);
+                                int needle_code_point = UTF8::convert(needle_pos);
+
+                                /// Invalid UTF-8, should not compare equals
+                                if (haystack_code_point < 0 || needle_code_point < 0)
+                                    break;
+
+                                /// Not equals case insensitive.
+                                if (Poco::Unicode::toLower(haystack_code_point) !=  Poco::Unicode::toLower(needle_code_point))
+                                    break;
+
+                                /// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
                                 const auto len = UTF8::seqLength(*haystack_pos);
                                 haystack_pos += len;
                                 needle_pos += len;
@@ -302,10 +342,19 @@ public:
                 auto haystack_pos = haystack + first_needle_symbol_is_ascii;
                 auto needle_pos = needle + first_needle_symbol_is_ascii;
 
-                while (haystack_pos < haystack_end && needle_pos < needle_end &&
-                       Poco::Unicode::toLower(UTF8::convert(haystack_pos)) ==
-                       Poco::Unicode::toLower(UTF8::convert(needle_pos)))
+                while (haystack_pos < haystack_end && needle_pos < needle_end)
                 {
+                    int haystack_code_point = UTF8::convert(haystack_pos);
+                    int needle_code_point = UTF8::convert(needle_pos);
+
+                    /// Invalid UTF-8, should not compare equals
+                    if (haystack_code_point < 0 || needle_code_point < 0)
+                        break;
+
+                    /// Not equals case insensitive.
+                    if (Poco::Unicode::toLower(haystack_code_point) !=  Poco::Unicode::toLower(needle_code_point))
+                        break;
+
                     const auto len = UTF8::seqLength(*haystack_pos);
                     haystack_pos += len;
                     needle_pos += len;
diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h
index a1fa83b4f33..09317f1fbbf 100644
--- a/src/Common/Volnitsky.h
+++ b/src/Common/Volnitsky.h
@@ -163,31 +163,41 @@ namespace VolnitskyTraits
                         auto seq_pos = pos;
                         UTF8::syncBackward(seq_pos, begin);
 
-                        const auto u32 = UTF8::convert(seq_pos);
-                        const auto l_u32 = Poco::Unicode::toLower(u32);
-                        const auto u_u32 = Poco::Unicode::toUpper(u32);
-
-                        /// symbol is case-independent
-                        if (l_u32 == u_u32)
+                        int u32 = UTF8::convert(seq_pos);
+                        /// Invalid UTF-8
+                        if (u32 < 0)
+                        {
                             putNGramBase(n, offset);
+                        }
                         else
                         {
-                            /// where is the given ngram in respect to the start of UTF-8 sequence?
-                            const auto seq_ngram_offset = pos - seq_pos;
+                            int l_u32 = Poco::Unicode::toLower(u32);
+                            int u_u32 = Poco::Unicode::toUpper(u32);
 
-                            Seq seq;
+                            /// symbol is case-independent
+                            if (l_u32 == u_u32)
+                            {
+                                putNGramBase(n, offset);
+                            }
+                            else
+                            {
+                                /// where is the given ngram in respect to the start of UTF-8 sequence?
+                                const auto seq_ngram_offset = pos - seq_pos;
 
-                            /// put ngram for lowercase
-                            UTF8::convert(l_u32, seq, sizeof(seq));
-                            chars.c0 = seq[seq_ngram_offset];
-                            chars.c1 = seq[seq_ngram_offset + 1];
-                            putNGramBase(n, offset);
+                                Seq seq;
 
-                            /// put ngram for uppercase
-                            UTF8::convert(u_u32, seq, sizeof(seq));
-                            chars.c0 = seq[seq_ngram_offset]; //-V519
-                            chars.c1 = seq[seq_ngram_offset + 1]; //-V519
-                            putNGramBase(n, offset);
+                                /// put ngram for lowercase
+                                UTF8::convert(l_u32, seq, sizeof(seq));
+                                chars.c0 = seq[seq_ngram_offset];
+                                chars.c1 = seq[seq_ngram_offset + 1];
+                                putNGramBase(n, offset);
+
+                                /// put ngram for uppercase
+                                UTF8::convert(u_u32, seq, sizeof(seq));
+                                chars.c0 = seq[seq_ngram_offset]; //-V519
+                                chars.c1 = seq[seq_ngram_offset + 1]; //-V519
+                                putNGramBase(n, offset);
+                            }
                         }
                     }
                     else
@@ -199,16 +209,38 @@ namespace VolnitskyTraits
                         /// where is the given ngram in respect to the start of first UTF-8 sequence?
                         const auto seq_ngram_offset = pos - first_seq_pos;
 
-                        const auto first_u32 = UTF8::convert(first_seq_pos);
-                        const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
-                        const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
+                        int first_u32 = UTF8::convert(first_seq_pos);
+                        int first_l_u32;
+                        int first_u_u32;
+
+                        if (first_u32 < 0)
+                        {
+                            first_l_u32 = first_u32;
+                            first_u_u32 = first_u32;
+                        }
+                        else
+                        {
+                            first_l_u32 = Poco::Unicode::toLower(first_u32);
+                            first_u_u32 = Poco::Unicode::toUpper(first_u32);
+                        }
 
                         /// second sequence always start immediately after u_pos
                         auto second_seq_pos = pos + 1;
 
-                        const auto second_u32 = UTF8::convert(second_seq_pos); /// TODO This assumes valid UTF-8 or zero byte after needle.
-                        const auto second_l_u32 = Poco::Unicode::toLower(second_u32);
-                        const auto second_u_u32 = Poco::Unicode::toUpper(second_u32);
+                        int second_u32 = UTF8::convert(second_seq_pos); /// This assumes valid UTF-8 or zero byte after needle.
+                        int second_l_u32;
+                        int second_u_u32;
+
+                        if (second_u32 < 0)
+                        {
+                            second_l_u32 = second_u32;
+                            second_u_u32 = second_u32;
+                        }
+                        else
+                        {
+                            second_l_u32 = Poco::Unicode::toLower(second_u32);
+                            second_u_u32 = Poco::Unicode::toUpper(second_u32);
+                        }
 
                         /// both symbols are case-independent
                         if (first_l_u32 == first_u_u32 && second_l_u32 == second_u_u32)
diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h
index a7b5f5a9a13..fc13466f4fe 100644
--- a/src/Functions/LowerUpperUTF8Impl.h
+++ b/src/Functions/LowerUpperUTF8Impl.h
@@ -144,6 +144,7 @@ struct LowerUpperUTF8Impl
                 if (dst_code_point > 0)
                 {
                     int dst_sequence_length = UTF8::convert(dst_code_point, dst, src_end - src);
+                    assert(dst_sequence_length >= 0);
 
                     /// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8.
                     /// As an example, this happens for ß and ẞ.
diff --git a/src/Functions/randomStringUTF8.cpp b/src/Functions/randomStringUTF8.cpp
index 1873a5afde3..a5954eaaf9f 100644
--- a/src/Functions/randomStringUTF8.cpp
+++ b/src/Functions/randomStringUTF8.cpp
@@ -119,8 +119,14 @@ public:
                 UInt32 code_point2 = generate_code_point(rand >> 32);
 
                 /// We have padding in column buffers that we can overwrite.
-                pos += UTF8::convert(code_point1, pos, sizeof(int));
-                last_writen_bytes = UTF8::convert(code_point2, pos, sizeof(int));
+                auto length1 = UTF8::convert(code_point1, pos, sizeof(int));
+                auto length2 = UTF8::convert(code_point2, pos, sizeof(int));
+
+                assert(length1 >= 0);
+                assert(length2 >= 0);
+
+                pos += length1;
+                last_writen_bytes = length2;
                 pos += last_writen_bytes;
             }
             offset = pos - data_to.data() + 1;

From cf05c17cfa341e1162be515bebf59e10e37d9455 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 27 Jan 2021 20:36:53 +0300
Subject: [PATCH 0259/1238] Rewrite QueryPlan tree optimizations traverse.

---
 .../QueryPlan/Optimizations/Optimizations.h   |  43 +++++++-
 .../Optimizations/liftUpArrayJoin.cpp         |  13 ++-
 .../QueryPlan/Optimizations/limitPushDown.cpp |  24 +++--
 .../Optimizations/mergeExpressions.cpp        |   7 +-
 .../QueryPlan/Optimizations/optimizeTree.cpp  | 102 ++++++++++++++++++
 src/Processors/QueryPlan/QueryPlan.cpp        |  57 +---------
 6 files changed, 172 insertions(+), 74 deletions(-)
 create mode 100644 src/Processors/QueryPlan/Optimizations/optimizeTree.cpp

diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h
index 93b4be90a98..37b32d6a095 100644
--- a/src/Processors/QueryPlan/Optimizations/Optimizations.h
+++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h
@@ -1,4 +1,5 @@
 #include <Processors/QueryPlan/QueryPlan.h>
+#include <array>
 
 namespace DB
 {
@@ -6,18 +7,52 @@ namespace DB
 namespace QueryPlanOptimizations
 {
 
+/// This is the main function which optimizes the whole QueryPlan tree.
+void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes);
+
+/// Optimization is a function applied to QueryPlan::Node.
+/// It can read and update subtree of specified node.
+/// It return true if some change of thee happened.
+/// New nodes should be added to QueryPlan::Nodes list.
+/// It is not needed to remove old nodes from the list.
+///
+/// Optimization must guarantee that:
+///  * the structure of tree is correct
+///  * no more then `read_depth` layers of subtree was read
+///  * no more then `update_depth` layers of subtree was updated
+struct Optimization
+{
+    using Function = bool (*)(QueryPlan::Node *, QueryPlan::Nodes &);
+    const Function run = nullptr;
+    const size_t read_depth;
+    const size_t update_depth;
+};
+
+/// Move ARRAY JOIN up if possible.
+bool tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
+
 /// Move LimitStep down if possible.
-void tryPushDownLimit(QueryPlanStepPtr & parent, QueryPlan::Node * child_node);
+bool tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
 
 /// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
 bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
 
 /// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
 /// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
-bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * child_node);
+bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
 
-/// Move ARRAY JOIN up if possible.
-void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_node, QueryPlan::Nodes & nodes);
+inline const auto & getOptimizations()
+{
+    static const std::array<Optimization, 4> optimizations =
+    {{
+        {tryLiftUpArrayJoin, 2, 2},
+        {tryPushDownLimit, 2, 2},
+        {trySplitFilter, 1, 2},
+        {tryMergeExpressions, 2, 1},
+     }};
+
+    return optimizations;
+}
 
 }
 
diff --git a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
index 474124d970d..026ceccebfb 100644
--- a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
+++ b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
@@ -8,8 +8,13 @@
 namespace DB::QueryPlanOptimizations
 {
 
-void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_node, QueryPlan::Nodes & nodes)
+bool tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
 {
+    if (parent_node->children.size() != 1)
+        return false;
+
+    QueryPlan::Node * child_node = parent_node->children.front();
+
     auto & parent = parent_node->step;
     auto & child = child_node->step;
     auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
@@ -17,7 +22,7 @@ void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_n
     auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
 
     if (!(expression_step || filter_step) || !array_join_step)
-        return;
+        return false;
 
     const auto & array_join = array_join_step->arrayJoin();
     const auto & expression = expression_step ? expression_step->getExpression()
@@ -27,7 +32,7 @@ void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_n
 
     /// No actions can be moved before ARRAY JOIN.
     if (split_actions.first->empty())
-        return;
+        return false;
 
     auto description = parent->getStepDescription();
 
@@ -52,7 +57,7 @@ void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_n
         child->setStepDescription(std::move(description));
 
         array_join_step->updateInputStream(child->getOutputStream(), expected_header);
-        return;
+        return false;
     }
 
     /// Add new expression step before ARRAY JOIN.
diff --git a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
index 8d2f5f50fc8..fd6c2e19f6b 100644
--- a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
+++ b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
@@ -54,31 +54,37 @@ static bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
     return updated;
 }
 
-void tryPushDownLimit(QueryPlanStepPtr & parent, QueryPlan::Node * child_node)
+bool tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
 {
+    if (parent_node->children.size() != 1)
+        return false;
+
+    QueryPlan::Node * child_node = parent_node->children.front();
+
+    auto & parent = parent_node->step;
     auto & child = child_node->step;
     auto * limit = typeid_cast<LimitStep *>(parent.get());
 
     if (!limit)
-        return;
+        return false;
 
     /// Skip LIMIT WITH TIES by now.
     if (limit->withTies())
-        return;
+        return false;
 
     const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
 
     /// Skip everything which is not transform.
     if (!transforming)
-        return;
+        return false;
 
     /// Special cases for sorting steps.
     if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
-        return;
+        return false;
 
     /// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
     if (typeid_cast<const TotalsHavingStep *>(child.get()))
-        return;
+        return false;
 
     /// Now we should decide if pushing down limit possible for this step.
 
@@ -87,16 +93,16 @@ void tryPushDownLimit(QueryPlanStepPtr & parent, QueryPlan::Node * child_node)
 
     /// Cannot push down if child changes the number of rows.
     if (!transform_traits.preserves_number_of_rows)
-        return;
+        return false;
 
     /// Cannot push down if data was sorted exactly by child stream.
     if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
-        return;
+        return false;
 
     /// Now we push down limit only if it doesn't change any stream properties.
     /// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
     if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
-        return;
+        return false;
 
     /// Input stream for Limit have changed.
     limit->updateInputStream(transforming->getInputStreams().front());
diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
index 3bbfe0e1efb..4b4bf540cc5 100644
--- a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
+++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
@@ -6,8 +6,13 @@
 namespace DB::QueryPlanOptimizations
 {
 
-bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * child_node)
+bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
 {
+    if (parent_node->children.size() != 1)
+        return false;
+
+    QueryPlan::Node * child_node = parent_node->children.front();
+
     auto & parent = parent_node->step;
     auto & child = child_node->step;
 
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
new file mode 100644
index 00000000000..8ac5a4482f1
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -0,0 +1,102 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <stack>
+
+namespace DB::QueryPlanOptimizations
+{
+
+void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
+{
+    const auto & optimizations = getOptimizations();
+
+    struct Frame
+    {
+        QueryPlan::Node * node;
+        Frame * parent = nullptr;
+
+        /// Will update only depth_limit layers of tree (if no other optimizations happen).
+        size_t depth_limit = 0;
+
+        size_t next_child = 0;
+
+        size_t read_depth_limit = 0;
+    };
+
+    std::stack<Frame> stack;
+    stack.push(Frame{.node = &root});
+
+    while (!stack.empty())
+    {
+        auto & frame = stack.top();
+
+        if (frame.depth_limit != 1)
+        {
+            /// Traverse all children first.
+            if (frame.next_child < frame.node->children.size())
+            {
+                stack.push(Frame
+                {
+                       .node = frame.node->children[frame.next_child],
+                       .parent = &frame,
+                       .depth_limit = frame.depth_limit ? (frame.depth_limit - 1) : 0,
+                });
+
+                ++frame.next_child;
+                continue;
+            }
+        }
+
+        if (frame.depth_limit == 0 || frame.read_depth_limit)
+        {
+            size_t max_update_depth = 0;
+
+            /// Apply all optimizations.
+            for (const auto & optimization : optimizations)
+            {
+                /// Just in case, skip optimization if it is not initialized.
+                if (!optimization.run)
+                    continue;
+
+                /// Skip optimization if read_depth_limit is applied.
+                if (frame.read_depth_limit && optimization.read_depth <= frame.read_depth_limit)
+                    continue;
+
+                /// Try to apply optimization.
+                if (optimization.run(frame.node, nodes))
+                    max_update_depth = std::max<size_t>(max_update_depth, optimization.update_depth);
+            }
+
+            /// Nothing was applied.
+            if (max_update_depth == 0)
+            {
+                stack.pop();
+                continue;
+            }
+
+            /// Traverse `max_update_depth` layers of tree again.
+            frame.depth_limit = max_update_depth;
+            frame.next_child = 0;
+
+            /// Also go to parents and tell them to apply some optimizations again.
+            Frame * cur_frame = &frame;
+            for (size_t cur_depth = 0; cur_frame && cur_frame->depth_limit; ++cur_depth)
+            {
+                /// If cur_frame is traversed first time, all optimizations will apply anyway.
+                if (cur_frame->depth_limit == 0)
+                    break;
+
+                /// Stop if limit is applied and stricter then current.
+                if (cur_frame->read_depth_limit && cur_frame->read_depth_limit <= cur_depth)
+                    break;
+
+                cur_frame->read_depth_limit = cur_depth;
+                cur_frame = cur_frame->parent;
+            }
+
+            continue;
+        }
+
+        stack.pop();
+    }
+}
+
+}
diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp
index 49249e99864..755944fdf9f 100644
--- a/src/Processors/QueryPlan/QueryPlan.cpp
+++ b/src/Processors/QueryPlan/QueryPlan.cpp
@@ -7,9 +7,6 @@
 #include <Interpreters/ArrayJoinAction.h>
 #include <stack>
 #include <Processors/QueryPlan/Optimizations/Optimizations.h>
-#include "ExpressionStep.h"
-#include "ArrayJoinStep.h"
-#include "FilterStep.h"
 
 namespace DB
 {
@@ -338,59 +335,7 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio
 
 void QueryPlan::optimize()
 {
-    /* Stack contains info for every nodes in the path from tree root to the current node.
-     * Every optimization changes only current node and it's children.
-     * Optimization may change QueryPlanStep, but not QueryPlan::Node (only add a new one).
-     * So, QueryPlan::Node::children will be always valid.
-     */
-
-    struct Frame
-    {
-        Node * node;
-        size_t next_child = 0;
-    };
-
-    std::stack<Frame> stack;
-    stack.push(Frame{.node = root});
-
-    while (!stack.empty())
-    {
-        auto & frame = stack.top();
-
-        if (frame.next_child == 0)
-        {
-            if (frame.node->children.size() == 1)
-            {
-                QueryPlanOptimizations::tryPushDownLimit(frame.node->step, frame.node->children.front());
-
-                while (QueryPlanOptimizations::tryMergeExpressions(frame.node, frame.node->children.front()));
-
-                if (frame.node->children.size() == 1)
-                    QueryPlanOptimizations::tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
-
-                QueryPlanOptimizations::trySplitFilter(frame.node, nodes);
-            }
-        }
-
-        if (frame.next_child < frame.node->children.size())
-        {
-            stack.push(Frame{frame.node->children[frame.next_child]});
-            ++frame.next_child;
-        }
-        else
-        {
-            if (frame.node->children.size() == 1)
-            {
-                while (QueryPlanOptimizations::tryMergeExpressions(frame.node, frame.node->children.front()));
-
-                QueryPlanOptimizations::trySplitFilter(frame.node, nodes);
-
-                QueryPlanOptimizations::tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
-            }
-
-            stack.pop();
-        }
-    }
+    QueryPlanOptimizations::optimizeTree(*root, nodes);
 }
 
 }

From 80dfa40feb1334c27b0638909d72acf01d7a4be2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 27 Jan 2021 20:42:04 +0300
Subject: [PATCH 0260/1238] Add a test

---
 tests/queries/0_stateless/01674_unicode_asan.reference | 1 +
 tests/queries/0_stateless/01674_unicode_asan.sql       | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 tests/queries/0_stateless/01674_unicode_asan.reference
 create mode 100644 tests/queries/0_stateless/01674_unicode_asan.sql

diff --git a/tests/queries/0_stateless/01674_unicode_asan.reference b/tests/queries/0_stateless/01674_unicode_asan.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/tests/queries/0_stateless/01674_unicode_asan.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/01674_unicode_asan.sql b/tests/queries/0_stateless/01674_unicode_asan.sql
new file mode 100644
index 00000000000..d34ff23be48
--- /dev/null
+++ b/tests/queries/0_stateless/01674_unicode_asan.sql
@@ -0,0 +1,2 @@
+SELECT positionCaseInsensitiveUTF8('иголка.ру', 'иголка.р�\0') AS res;
+SELECT positionCaseInsensitiveUTF8('иголка.ру', randomString(rand() % 100)) FROM system.numbers; -- { serverError 2 }

From a33963e211d305edc80d453a75bff2c7347ec5c0 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 27 Jan 2021 20:54:25 +0300
Subject: [PATCH 0261/1238] Better raft server startup

---
 programs/server/Server.cpp                    |  2 +
 src/Coordination/NuKeeperServer.cpp           | 56 ++++++++++++++++---
 src/Coordination/NuKeeperServer.h             | 12 ++--
 src/Coordination/NuKeeperStateMachine.h       |  2 +-
 .../TestKeeperStorageDispatcher.cpp           | 26 +++++++--
 .../TestKeeperStorageDispatcher.h             |  5 ++
 src/Interpreters/Context.cpp                  | 14 ++++-
 src/Interpreters/Context.h                    |  1 +
 src/Server/TestKeeperTCPHandler.cpp           | 21 +++++--
 src/Server/TestKeeperTCPHandler.h             |  2 +-
 .../configs/use_test_keeper.xml               |  8 +++
 11 files changed, 124 insertions(+), 25 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 04919e8504c..fefabd8be71 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -904,6 +904,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections);
             else
                 LOG_INFO(log, "Closed connections to servers for tables.");
+
+            global_context->shutdownTestKeeperStorageDispatcher();
         }
 
         /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 8b8288424d9..a005febd67d 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -19,22 +19,22 @@ namespace ErrorCodes
     extern const int RAFT_ERROR;
 }
 
-NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, int port_, bool can_become_leader_)
+NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, int port_)
     : server_id(server_id_)
     , hostname(hostname_)
     , port(port_)
     , endpoint(hostname + ":" + std::to_string(port))
-    , can_become_leader(can_become_leader_)
     , state_machine(nuraft::cs_new<NuKeeperStateMachine>())
     , state_manager(nuraft::cs_new<InMemoryStateManager>(server_id, endpoint))
 {
 }
 
-bool NuKeeperServer::addServer(int server_id_, const std::string & server_uri_, bool can_become_leader_)
+void NuKeeperServer::addServer(int server_id_, const std::string & server_uri_, bool can_become_leader_)
 {
     nuraft::srv_config config(server_id_, 0, server_uri_, "", /*FIXME follower=*/ !can_become_leader_);
     auto ret1 = raft_instance->add_srv(config);
-    return ret1->get_result_code() == nuraft::cmd_result_code::OK;
+    if (ret1->get_result_code() != nuraft::cmd_result_code::OK)
+        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot add server to RAFT quorum with code {}, message '{}'", ret1->get_result_code(), ret1->get_result_str());
 }
 
 
@@ -71,7 +71,7 @@ void NuKeeperServer::startup()
 TestKeeperStorage::ResponsesForSessions NuKeeperServer::shutdown(const TestKeeperStorage::RequestsForSessions & expired_requests)
 {
     TestKeeperStorage::ResponsesForSessions responses;
-    if (can_become_leader)
+    if (isLeader())
     {
         try
         {
@@ -161,7 +161,18 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKe
 
         auto result = raft_instance->append_entries(entries);
         if (!result->get_accepted())
-            throw Exception(ErrorCodes::RAFT_ERROR, "Cannot send requests to RAFT, mostly because we are not leader, code {}, message: '{}'", result->get_result_code(), result->get_result_str());
+        {
+            TestKeeperStorage::ResponsesForSessions responses;
+            for (const auto & [session_id, request] : requests)
+            {
+                auto response = request->makeResponse();
+                response->xid = request->xid;
+                response->zxid = 0; /// FIXME what we can do with it?
+                response->error = Coordination::Error::ZSESSIONEXPIRED;
+                responses.push_back(DB::TestKeeperStorage::ResponseForSession{session_id, response});
+            }
+            return responses;
+        }
 
         if (result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
         {
@@ -183,7 +194,6 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKe
     }
 }
 
-
 int64_t NuKeeperServer::getSessionID()
 {
     auto entry = nuraft::buffer::alloc(sizeof(int64_t));
@@ -203,4 +213,36 @@ int64_t NuKeeperServer::getSessionID()
     return bs_resp.get_i64();
 }
 
+bool NuKeeperServer::isLeader() const
+{
+    return raft_instance->is_leader();
+}
+
+bool NuKeeperServer::waitForServer(int32_t id) const
+{
+    for (size_t i = 0; i < 10; ++i)
+    {
+        if (raft_instance->get_srv_config(id) != nullptr)
+            return true;
+        LOG_DEBUG(&Poco::Logger::get("NuRaftInit"), "Waiting for server {} to join the cluster", id);
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+    return false;
+}
+
+void NuKeeperServer::waitForServers(const std::vector<int32_t> & ids) const
+{
+    for (int32_t id : ids)
+        waitForServer(id);
+}
+
+void NuKeeperServer::waitForCatchUp() const
+{
+    while (raft_instance->is_catching_up() || raft_instance->is_receiving_snapshot())
+    {
+        LOG_DEBUG(&Poco::Logger::get("NuRaftInit"), "Waiting current RAFT instance to catch up");
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+}
+
 }
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index 4c10614cd5c..b9488cafc69 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -21,8 +21,6 @@ private:
 
     std::string endpoint;
 
-    bool can_become_leader;
-
     nuraft::ptr<NuKeeperStateMachine> state_machine;
 
     nuraft::ptr<nuraft::state_mgr> state_manager;
@@ -40,7 +38,7 @@ private:
     TestKeeperStorage::ResponsesForSessions readZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer);
 
 public:
-    NuKeeperServer(int server_id_, const std::string & hostname_, int port_, bool can_become_leader_);
+    NuKeeperServer(int server_id_, const std::string & hostname_, int port_);
 
     void startup();
 
@@ -48,7 +46,13 @@ public:
 
     int64_t getSessionID();
 
-    bool addServer(int server_id_, const std::string & server_uri, bool can_become_leader_);
+    void addServer(int server_id_, const std::string & server_uri, bool can_become_leader_);
+
+    bool isLeader() const;
+
+    bool waitForServer(int32_t server_id) const;
+    void waitForServers(const std::vector<int32_t> & ids) const;
+    void waitForCatchUp() const;
 
     TestKeeperStorage::ResponsesForSessions shutdown(const TestKeeperStorage::RequestsForSessions & expired_requests);
 };
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index 368e088a2f9..7767f552cec 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -47,7 +47,7 @@ public:
         return storage;
     }
 
-    TestKeeperStorage::ResponsesForSessions processReadRequest(const TestKeeperStorage::RequestForSession & requests);
+    TestKeeperStorage::ResponsesForSessions processReadRequest(const TestKeeperStorage::RequestForSession & request_for_session);
 
 private:
     struct StorageSnapshot
diff --git a/src/Coordination/TestKeeperStorageDispatcher.cpp b/src/Coordination/TestKeeperStorageDispatcher.cpp
index 7ce81df0bfd..f6ca389f2cf 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.cpp
+++ b/src/Coordination/TestKeeperStorageDispatcher.cpp
@@ -86,6 +86,7 @@ void TestKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigura
     bool my_can_become_leader = true;
 
     std::vector<std::tuple<int, std::string, int, bool>> server_configs;
+    std::vector<int32_t> ids;
     for (const auto & server_key : keys)
     {
         int server_id = config.getInt("test_keeper_server.raft_configuration." + server_key + ".id");
@@ -102,14 +103,26 @@ void TestKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigura
         {
             server_configs.emplace_back(server_id, hostname, port, can_become_leader);
         }
+        ids.push_back(server_id);
     }
 
-    server = std::make_unique<NuKeeperServer>(myid, myhostname, myport, my_can_become_leader);
+    server = std::make_unique<NuKeeperServer>(myid, myhostname, myport);
     server->startup();
     if (my_can_become_leader)
     {
         for (const auto & [id, hostname, port, can_become_leader] : server_configs)
-            server->addServer(id, hostname + ":" + std::to_string(port), can_become_leader);
+        {
+            do
+            {
+                server->addServer(id, hostname + ":" + std::to_string(port), can_become_leader);
+            }
+            while (!server->waitForServer(id));
+        }
+    }
+    else
+    {
+        server->waitForServers(ids);
+        server->waitForCatchUp();
     }
 
     processing_thread = ThreadFromGlobalPool([this] { processingThread(); });
@@ -135,9 +148,12 @@ void TestKeeperStorageDispatcher::shutdown()
         if (server)
         {
             TestKeeperStorage::RequestsForSessions expired_requests;
-            TestKeeperStorage::RequestForSession request;
-            while (requests_queue.tryPop(request))
-                expired_requests.push_back(TestKeeperStorage::RequestForSession{request});
+            if (server->isLeader())
+            {
+                TestKeeperStorage::RequestForSession request;
+                while (requests_queue.tryPop(request))
+                    expired_requests.push_back(TestKeeperStorage::RequestForSession{request});
+            }
 
             auto expired_responses = server->shutdown(expired_requests);
 
diff --git a/src/Coordination/TestKeeperStorageDispatcher.h b/src/Coordination/TestKeeperStorageDispatcher.h
index 5107f2f9cba..a6c6118f9c4 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.h
+++ b/src/Coordination/TestKeeperStorageDispatcher.h
@@ -47,6 +47,11 @@ public:
 
     bool putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
 
+    bool isLeader() const
+    {
+        return server->isLeader();
+    }
+
     int64_t getSessionID()
     {
         std::lock_guard lock(session_id_mutex);
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 4c396bd29f4..fc8d8654573 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -446,9 +446,7 @@ struct ContextShared
         trace_collector.reset();
         /// Stop zookeeper connection
         zookeeper.reset();
-        /// Stop test_keeper storage
-        if (test_keeper_storage_dispatcher)
-            test_keeper_storage_dispatcher->shutdown();
+
     }
 
     bool hasTraceCollector() const
@@ -1593,6 +1591,16 @@ std::shared_ptr<TestKeeperStorageDispatcher> & Context::getTestKeeperStorageDisp
     return shared->test_keeper_storage_dispatcher;
 }
 
+void Context::shutdownTestKeeperStorageDispatcher() const
+{
+    std::lock_guard lock(shared->test_keeper_storage_dispatcher_mutex);
+    if (shared->test_keeper_storage_dispatcher)
+    {
+        shared->test_keeper_storage_dispatcher->shutdown();
+        shared->test_keeper_storage_dispatcher.reset();
+    }
+}
+
 zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const
 {
     std::lock_guard lock(shared->auxiliary_zookeepers_mutex);
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 537ddcc0ec8..e643c80183c 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -576,6 +576,7 @@ public:
 
     void initializeTestKeeperStorageDispatcher() const;
     std::shared_ptr<TestKeeperStorageDispatcher> & getTestKeeperStorageDispatcher() const;
+    void shutdownTestKeeperStorageDispatcher() const;
 
     /// Set auxiliary zookeepers configuration at server starting or configuration reloading.
     void reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr & config);
diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp
index 81eaee3382c..04e5c6ece1d 100644
--- a/src/Server/TestKeeperTCPHandler.cpp
+++ b/src/Server/TestKeeperTCPHandler.cpp
@@ -227,16 +227,19 @@ TestKeeperTCPHandler::TestKeeperTCPHandler(IServer & server_, const Poco::Net::S
     , test_keeper_storage_dispatcher(global_context.getTestKeeperStorageDispatcher())
     , operation_timeout(0, global_context.getConfigRef().getUInt("test_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
     , session_timeout(0, global_context.getConfigRef().getUInt("test_keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
-    , session_id(test_keeper_storage_dispatcher->getSessionID())
     , poll_wrapper(std::make_unique<SocketInterruptablePollWrapper>(socket_))
     , responses(std::make_unique<ThreadSafeResponseQueue>())
 {
 }
 
-void TestKeeperTCPHandler::sendHandshake()
+void TestKeeperTCPHandler::sendHandshake(bool is_leader)
 {
     Coordination::write(Coordination::SERVER_HANDSHAKE_LENGTH, *out);
-    Coordination::write(Coordination::ZOOKEEPER_PROTOCOL_VERSION, *out);
+    if (is_leader)
+        Coordination::write(Coordination::ZOOKEEPER_PROTOCOL_VERSION, *out);
+    else /// Specially ignore connections if we are not leader, client will throw exception
+        Coordination::write(42, *out);
+
     Coordination::write(Coordination::DEFAULT_SESSION_TIMEOUT_MS, *out);
     Coordination::write(session_id, *out);
     std::array<char, Coordination::PASSWORD_LENGTH> passwd{};
@@ -316,7 +319,17 @@ void TestKeeperTCPHandler::runImpl()
         return;
     }
 
-    sendHandshake();
+    if (test_keeper_storage_dispatcher->isLeader())
+    {
+        session_id = test_keeper_storage_dispatcher->getSessionID();
+        sendHandshake(true);
+    }
+    else
+    {
+        sendHandshake(false);
+        LOG_WARNING(log, "Ignoring connection because we are not leader");
+        return;
+    }
 
     auto response_fd = poll_wrapper->getResponseFD();
     auto response_callback = [this, response_fd] (const Coordination::ZooKeeperResponsePtr & response)
diff --git a/src/Server/TestKeeperTCPHandler.h b/src/Server/TestKeeperTCPHandler.h
index e7372e8dd82..bb74513afce 100644
--- a/src/Server/TestKeeperTCPHandler.h
+++ b/src/Server/TestKeeperTCPHandler.h
@@ -45,7 +45,7 @@ private:
 
     void runImpl();
 
-    void sendHandshake();
+    void sendHandshake(bool is_leader);
     void receiveHandshake();
 
     std::pair<Coordination::OpNum, Coordination::XID> receiveRequest();
diff --git a/tests/integration/test_testkeeper_multinode/configs/use_test_keeper.xml b/tests/integration/test_testkeeper_multinode/configs/use_test_keeper.xml
index 20d731b8553..b6139005d2f 100644
--- a/tests/integration/test_testkeeper_multinode/configs/use_test_keeper.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/use_test_keeper.xml
@@ -4,5 +4,13 @@
             <host>node1</host>
             <port>9181</port>
         </node>
+        <node index="2">
+            <host>node2</host>
+            <port>9181</port>
+        </node>
+        <node index="3">
+            <host>node3</host>
+            <port>9181</port>
+        </node>
     </zookeeper>
 </yandex>

From acde56ff8a32873b589d73d21fb4ba3caa6c31de Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 27 Jan 2021 21:08:06 +0300
Subject: [PATCH 0262/1238] Add separate pool for message brokers (RabbitMQ and
 Kafka)

background_message_broker_schedule_pool_size was introduced in #13939,
but never used actually, use it for RabbitMQ and Kafka.
---
 docs/en/operations/settings/settings.md   | 15 +++++++++++++++
 src/Interpreters/Context.cpp              | 13 +++++++++++++
 src/Interpreters/Context.h                |  1 +
 src/Storages/Kafka/StorageKafka.cpp       |  2 +-
 src/Storages/RabbitMQ/StorageRabbitMQ.cpp |  6 +++---
 5 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 85a3b8bd941..85608fc0732 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1944,6 +1944,21 @@ Possible values:
 
 Default value: 16.
 
+## background_message_broker_schedule_pool_size {#background_message_broker_schedule_pool_size}
+
+Sets the number of threads performing background tasks for message streaming. This setting is applied at the ClickHouse server start and can’t be changed in a user session.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: 16.
+
+**See Also**
+
+-   [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine
+-   [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine
+
 ## validate_polygons {#validate_polygons}
 
 Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent.
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 69e09b36e64..9be01318be9 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -342,6 +342,7 @@ struct ContextShared
     mutable std::optional<BackgroundSchedulePool> buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables.
     mutable std::optional<BackgroundSchedulePool> schedule_pool;    /// A thread pool that can run different jobs in background (used in replicated tables)
     mutable std::optional<BackgroundSchedulePool> distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends)
+    mutable std::optional<BackgroundSchedulePool> message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka)
     MultiVersion<Macros> macros;                            /// Substitutions extracted from config.
     std::unique_ptr<DDLWorker> ddl_worker;                  /// Process ddl commands from zk.
     /// Rules for selecting the compression settings, depending on the size of the part.
@@ -440,6 +441,7 @@ struct ContextShared
         buffer_flush_schedule_pool.reset();
         schedule_pool.reset();
         distributed_schedule_pool.reset();
+        message_broker_schedule_pool.reset();
         ddl_worker.reset();
 
         /// Stop trace collector if any
@@ -1526,6 +1528,17 @@ BackgroundSchedulePool & Context::getDistributedSchedulePool() const
     return *shared->distributed_schedule_pool;
 }
 
+BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const
+{
+    auto lock = getLock();
+    if (!shared->message_broker_schedule_pool)
+        shared->message_broker_schedule_pool.emplace(
+            settings.background_message_broker_schedule_pool_size,
+            CurrentMetrics::BackgroundDistributedSchedulePoolTask,
+            "BgMsgBrkSchPool");
+    return *shared->message_broker_schedule_pool;
+}
+
 bool Context::hasDistributedDDL() const
 {
     return getConfigRef().has("distributed_ddl");
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 8e15d0a4fed..83a46ef5a2b 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -611,6 +611,7 @@ public:
 
     BackgroundSchedulePool & getBufferFlushSchedulePool() const;
     BackgroundSchedulePool & getSchedulePool() const;
+    BackgroundSchedulePool & getMessageBrokerSchedulePool() const;
     BackgroundSchedulePool & getDistributedSchedulePool() const;
 
     /// Has distributed_ddl configuration or not.
diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp
index 388c21c6ad6..45e4ec538a1 100644
--- a/src/Storages/Kafka/StorageKafka.cpp
+++ b/src/Storages/Kafka/StorageKafka.cpp
@@ -196,7 +196,7 @@ StorageKafka::StorageKafka(
     auto task_count = thread_per_consumer ? num_consumers : 1;
     for (size_t i = 0; i < task_count; ++i)
     {
-        auto task = global_context.getSchedulePool().createTask(log->name(), [this, i]{ threadFunc(i); });
+        auto task = global_context.getMessageBrokerSchedulePool().createTask(log->name(), [this, i]{ threadFunc(i); });
         task->deactivate();
         tasks.emplace_back(std::make_shared<TaskContext>(std::move(task)));
     }
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index f41c4805d24..3ee9dda2bf3 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -112,13 +112,13 @@ StorageRabbitMQ::StorageRabbitMQ(
 
     /// One looping task for all consumers as they share the same connection == the same handler == the same event loop
     event_handler->updateLoopState(Loop::STOP);
-    looping_task = global_context.getSchedulePool().createTask("RabbitMQLoopingTask", [this]{ loopingFunc(); });
+    looping_task = global_context.getMessageBrokerSchedulePool().createTask("RabbitMQLoopingTask", [this]{ loopingFunc(); });
     looping_task->deactivate();
 
-    streaming_task = global_context.getSchedulePool().createTask("RabbitMQStreamingTask", [this]{ streamingToViewsFunc(); });
+    streaming_task = global_context.getMessageBrokerSchedulePool().createTask("RabbitMQStreamingTask", [this]{ streamingToViewsFunc(); });
     streaming_task->deactivate();
 
-    connection_task = global_context.getSchedulePool().createTask("RabbitMQConnectionTask", [this]{ connectionFunc(); });
+    connection_task = global_context.getMessageBrokerSchedulePool().createTask("RabbitMQConnectionTask", [this]{ connectionFunc(); });
     connection_task->deactivate();
 
     if (queue_base.empty())

From 419c916612ff3de0ca858d83ec41ad552ceb7f9c Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 27 Jan 2021 10:16:46 -0800
Subject: [PATCH 0263/1238] replace strcat with memcpy

---
 src/Functions/FunctionsCoding.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h
index b54550536ff..428f4891990 100644
--- a/src/Functions/FunctionsCoding.h
+++ b/src/Functions/FunctionsCoding.h
@@ -298,6 +298,7 @@ public:
             const ColumnString::Chars & vec_src = col_in->getChars();
             const ColumnString::Offsets & offsets_src = col_in->getOffsets();
             size_t src_offset = 0;
+            char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] = "::ffff:";
 
             for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); out_offset += IPV6_BINARY_LENGTH, ++i)
             {
@@ -307,8 +308,10 @@ public:
                 /// Keeping it simple by just prefixing `::ffff:` to the IPv4 address to represent it as a valid IPv6 address.
                 if (tryParseIPv4(reinterpret_cast<const char *>(&vec_src[src_offset])))
                 {
-                    char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] = "::ffff:";
-                    std::strcat(src_ipv4_buf, reinterpret_cast<const char *>(&vec_src[src_offset]));
+                    std::memcpy(
+                        src_ipv4_buf + std::strlen("::ffff:"),
+                        reinterpret_cast<const char *>(&vec_src[src_offset]),
+                        std::strlen(reinterpret_cast<const char *>(&vec_src[src_offset])));
                     parseIPv6(src_ipv4_buf, reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
                 }
                 else

From 54f47c468cc3dfa67e87c3a6486294351585937d Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 27 Jan 2021 21:34:48 +0300
Subject: [PATCH 0264/1238] mark all create database tests as sequential-only

---
 tests/queries/skip_list.json | 91 +++++++++++++++++++++++++++++++++++-
 1 file changed, 89 insertions(+), 2 deletions(-)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index c01cfee5993..b48d642befd 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -314,11 +314,14 @@
         /// Pessimistic list of tests which work badly in parallel.
         /// Probably they need better investigation.
         "00062_replicated_merge_tree_alter_zookeeper",
+        "00080_show_tables_and_system_tables",
+        "00101_materialized_views_and_insert_without_explicit_database",
         "00109_shard_totals_after_having",
         "00110_external_sort",
         "00116_storage_set",
         "00121_drop_column_zookeeper",
         "00133_long_shard_memory_tracker_and_exception_safety",
+        "00158_buffer_and_nonexistent_table",
         "00180_attach_materialized_view",
         "00226_zookeeper_deduplication_and_unexpected_parts",
         "00236_replicated_drop_on_non_leader_zookeeper",
@@ -339,9 +342,12 @@
         "00571_non_exist_database_when_create_materializ_view",
         "00575_illegal_column_exception_when_drop_depen_column",
         "00599_create_view_with_subquery",
+        "00604_show_create_database",
         "00612_http_max_query_size",
         "00619_union_highlite",
         "00620_optimize_on_nonleader_replica_zookeeper",
+        "00623_truncate_table",
+        "00623_truncate_table_throw_exception",
         "00625_arrays_in_nested",
         "00626_replace_partition_from_table",
         "00626_replace_partition_from_table_zookeeper",
@@ -354,20 +360,28 @@
         "00699_materialized_view_mutations",
         "00701_rollup",
         "00715_fetch_merged_or_mutated_part_zookeeper",
+        "00716_allow_ddl",
+        "00719_parallel_ddl_db",
+        "00740_database_in_nested_view",
+        "00741_client_comment_multiline",
         "00751_default_databasename_for_view",
         "00753_alter_attach",
         "00754_alter_modify_column_partitions",
         "00754_alter_modify_order_by_replicated_zookeeper",
         "00763_long_lock_buffer_alter_destination_table",
+        "00800_versatile_storage_join",
         "00804_test_alter_compression_codecs",
         "00804_test_custom_compression_codecs",
         "00804_test_custom_compression_codes_log_storages",
         "00804_test_delta_codec_compression",
+        "00815_left_join_on_stepanel",
         "00834_cancel_http_readonly_queries_on_client_close",
         "00834_kill_mutation",
         "00834_kill_mutation_replicated_zookeeper",
         "00840_long_concurrent_select_and_drop_deadlock",
+        "00857_global_joinsavel_table_alias",
         "00899_long_attach_memory_limit",
+        "00910_buffer_prewhere",
         "00910_zookeeper_custom_compression_codecs_replicated",
         "00926_adaptive_index_granularity_merge_tree",
         "00926_adaptive_index_granularity_pk",
@@ -385,49 +399,94 @@
         "00988_constraints_replication_zookeeper",
         "00989_parallel_parts_loading",
         "00993_system_parts_race_condition_drop_zookeeper",
+        "01012_show_tables_limit",
         "01013_sync_replica_timeout_zookeeper",
+        "01014_lazy_database_basic",
         "01014_lazy_database_concurrent_recreate_reattach_and_show_tables",
         "01015_attach_part",
+        "01015_database_bad_tables",
         "01018_ddl_dictionaries_concurrent_requrests",
         "01018_ddl_dictionaries_create",
         "01018_ddl_dictionaries_select",
+        "01018_ddl_dictionaries_special",
+        "01018_dictionaries_from_dictionaries",
+        "01018_ip_dictionary",
         "01021_only_tuple_columns",
+        "01023_materialized_view_query_context",
         "01031_mutations_interpreter_and_context",
         "01033_dictionaries_lifetime",
         "01035_concurrent_move_partition_from_table_zookeeper",
+        "01036_no_superfluous_dict_reload_on_create_database",
+        "01036_no_superfluous_dict_reload_on_create_database_2",
+        "01037_polygon_dicts_correctness_all",
+        "01037_polygon_dicts_correctness_fast",
+        "01037_polygon_dicts_simple_functions",
+        "01038_dictionary_lifetime_min_zero_sec",
+        "01040_dictionary_invalidate_query_switchover_long",
+        "01041_create_dictionary_if_not_exists",
+        "01042_system_reload_dictionary_reloads_completely",
+        "01043_dictionary_attribute_properties_values",
+        "01045_dictionaries_restrictions",
         "01045_zookeeper_system_mutations_with_parts_names",
+        "01048_exists_query",
+        "01053_drop_database_mat_view",
         "01053_ssd_dictionary",
+        "01054_cache_dictionary_bunch_update",
+        "01054_cache_dictionary_overflow_cell",
         "01055_compact_parts_1",
+        "01056_create_table_as",
         "01060_avro",
         "01060_shutdown_table_after_detach",
+        "01069_database_memory",
         "01070_materialize_ttl",
         "01070_modify_ttl",
         "01070_mutations_with_dependencies",
         "01071_live_view_detach_dependency",
         "01071_prohibition_secondary_index_with_old_format_merge_tree",
         "01073_attach_if_not_exists",
+        "01073_show_tables_not_like",
+        "01076_cache_dictionary_datarace_exception_ptr",
         "01076_parallel_alter_replicated_zookeeper",
         "01079_parallel_alter_add_drop_column_zookeeper",
         "01079_parallel_alter_detach_table_zookeeper",
+        "01080_check_for_error_incorrect_size_of_nested_column",
         "01083_expressions_in_engine_arguments",
+        "01084_regexp_empty",
         "01085_max_distributed_connections_http",
         "01092_memory_profiler",
         "01098_temporary_and_external_tables",
+        "01103_distributed_product_mode_local_column_renames",
         "01107_atomic_db_detach_attach",
         "01108_restart_replicas_rename_deadlock_zookeeper",
+        "01109_exchange_tables",
         "01110_dictionary_layout_without_arguments",
+        "01113_local_dictionary_type_conversion",
         "01114_database_atomic",
+        "01114_mysql_database_engine_segfault",
+        "01115_join_with_dictionary",
+        "01125_dict_ddl_cannot_add_column",
         "01127_month_partitioning_consistency_select",
         "01130_in_memory_parts_partitons",
         "01135_default_and_alter_zookeeper",
         "01148_zookeeper_path_macros_unfolding",
+        "01150_ddl_guard_rwr",
+        "01185_create_or_replace_table",
         "01190_full_attach_syntax",
+        "01191_rename_dictionary",
+        "01192_rename_database_zookeeper",
         "01193_metadata_loading",
         "01200_mutations_memory_consumption",
+        "01224_no_superfluous_dict_reload",
+        "01225_drop_dictionary_as_table",
+        "01225_show_create_table_from_dictionary",
+        "01231_distributed_aggregation_memory_efficient_mix_levels",
+        "01232_extremes",
         "01238_http_memory_tracking",
         "01249_bad_arguments_for_bloom_filter",
         "01251_dict_is_in_infinite_loop",
+        "01254_dict_create_without_db",
         "01254_dict_load_after_detach_attach",
+        "01257_dictionary_mismatch_types",
         "01259_dictionary_custom_settings_ddl",
         "01267_alter_default_key_columns_zookeeper",
         "01268_dictionary_direct_layout",
@@ -441,18 +500,25 @@
         "01293_system_distribution_queue",
         "01294_lazy_database_concurrent",
         "01294_lazy_database_concurrent_recreate_reattach_and_show_tables",
+        "01294_system_distributed_on_cluster",
+        "01296_create_row_policy_in_current_database",
         "01305_replica_create_drop_zookeeper",
         "01307_multiple_leaders_zookeeper",
         "01318_long_unsuccessful_mutation_zookeeper",
         "01319_manual_write_to_replicas",
+        "01320_create_sync_race_condition_zookeeper",
         "01338_long_select_and_alter",
         "01338_long_select_and_alter_zookeeper",
         "01355_alter_column_with_order",
         "01355_ilike",
         "01357_version_collapsing_attach_detach_zookeeper",
         "01375_compact_parts_codecs",
+        "01376_GROUP_BY_injective_elimination_dictGet",
         "01378_alter_rename_with_ttl_zookeeper",
+        "01383_remote_ambiguous_column_shard",
         "01388_clear_all_columns",
+        "01391_join_on_dict_crash",
+        "01392_column_resolve",
         "01396_inactive_replica_cleanup_nodes_zookeeper",
         "01412_cache_dictionary_race",
         "01414_mutations_and_errors_zookeeper",
@@ -461,20 +527,41 @@
         "01417_freeze_partition_verbose",
         "01417_freeze_partition_verbose_zookeeper",
         "01430_modify_sample_by_zookeeper",
+        "01444_create_table_drop_database_race",
         "01454_storagememory_data_race_challenge",
+        "01455_rank_correlation_spearman",
         "01456_modify_column_type_via_add_drop_update",
         "01457_create_as_table_function_structure",
         "01459_manual_write_to_replicas",
         "01460_DistributedFilesToInsert",
         "01465_ttl_recompression",
+        "01470_show_databases_like",
         "01471_calculate_ttl_during_merge",
+        "01487_distributed_in_not_default_db",
         "01493_alter_remove_properties_zookeeper",
         "01493_storage_set_persistency",
         "01494_storage_join_persistency",
+        "01501_cache_dictionary_all_fields",
+        "01507_clickhouse_server_start_with_embedded_config",
+        "01509_dictionary_preallocate",
+        "01516_create_table_primary_key",
         "01516_drop_table_stress",
+        "01517_drop_mv_with_inner_table",
+        "01526_complex_key_dict_direct_layout",
+        "01527_clickhouse_local_optimize",
+        "01527_dist_sharding_key_dictGet_reload",
+        "01530_drop_database_atomic_sync",
         "01541_max_memory_usage_for_user",
-        "01646_system_restart_replicas_smoke", // system restart replicas is a global query
+        "01542_dictionary_load_exception_race",
+        "01575_disable_detach_table_of_dictionary",
         "01600_count_of_parts_metrics", // tests global system metrics
+        "01600_detach_permanently",
+        "01600_log_queries_with_extensive_info",
+        "01600_multiple_left_join_with_aliases",
+        "01601_detach_permanently",
+        "01602_show_create_view",
+        "01603_rename_overwrite_bug",
+        "01646_system_restart_replicas_smoke", // system restart replicas is a global query
         "attach",
         "ddl_dictionaries",
         "dictionary",
@@ -482,6 +569,6 @@
         "live_view",
         "memory_leak",
         "memory_limit",
-        "polygon_dicts" // they use an explicitly specified database
+        "polygon_dicts", // they use an explicitly specified database
     ]
 }

From f4f66e953787a0a939d304784c7fd9ca202eebb3 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 27 Jan 2021 21:37:28 +0300
Subject: [PATCH 0265/1238] try to use predictable names for stdout/err when
 possible

---
 tests/clickhouse-test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 70f7fe18982..8f9aaeb6ed8 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -342,7 +342,7 @@ def run_tests_array(all_tests_with_params):
                             SERVER_DIED = True
                             break
 
-                    file_suffix = ('.' + str(os.getpid())) if is_concurrent else ''
+                    file_suffix = ('.' + str(os.getpid())) if is_concurrent and args.test_runs > 1 else ''
                     reference_file = os.path.join(suite_dir, name) + '.reference'
                     stdout_file = os.path.join(suite_tmp_dir, name) + file_suffix + '.stdout'
                     stderr_file = os.path.join(suite_tmp_dir, name) + file_suffix + '.stderr'

From c74631c650b42e4d107254a03bd82c2c2913039b Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 27 Jan 2021 21:54:05 +0300
Subject: [PATCH 0266/1238] Fix test and add logical error

---
 .../ReplicatedMergeTreePartCheckThread.cpp    |  3 +-
 tests/integration/test_check_table/test.py    | 76 ++++++++++---------
 2 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
index f08b94d21df..fc762b21046 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
@@ -18,6 +18,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int TABLE_DIFFERS_TOO_MUCH;
+    extern const int LOGICAL_ERROR;
 }
 
 static const auto PART_CHECK_ERROR_SLEEP_MS = 5 * 1000;
@@ -194,7 +195,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible(
         if (!storage.queue.remove(zookeeper, part_name))
         {
             /// The part was not in our queue. Why did it happen?
-            LOG_ERROR(log, "Missing part {} is not in our queue.", part_name);
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing part {} is not in our queue.", part_name);
         }
 
         /** This situation is possible if on all the replicas where the part was, it deteriorated.
diff --git a/tests/integration/test_check_table/test.py b/tests/integration/test_check_table/test.py
index 916b2ead7f7..d204f6c5810 100644
--- a/tests/integration/test_check_table/test.py
+++ b/tests/integration/test_check_table/test.py
@@ -13,18 +13,6 @@ def started_cluster():
     try:
         cluster.start()
 
-        for node in [node1, node2]:
-            node.query('''
-            CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
-            ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
-                '''.format(replica=node.name))
-
-        node1.query('''
-            CREATE TABLE non_replicated_mt(date Date, id UInt32, value Int32)
-            ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id
-            SETTINGS min_bytes_for_wide_part=0;
-        ''')
-
         yield cluster
 
     finally:
@@ -54,6 +42,14 @@ def remove_part_from_disk(node, table, part_name):
 
 
 def test_check_normal_table_corruption(started_cluster):
+    node1.query("DROP TABLE IF EXISTS non_replicated_mt")
+
+    node1.query('''
+        CREATE TABLE non_replicated_mt(date Date, id UInt32, value Int32)
+        ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id
+        SETTINGS min_bytes_for_wide_part=0;
+    ''')
+
     node1.query("INSERT INTO non_replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
     assert node1.query("CHECK TABLE non_replicated_mt PARTITION 201902",
                        settings={"check_query_single_value_result": 0}) == "201902_1_1_0\t1\t\n"
@@ -94,8 +90,14 @@ def test_check_normal_table_corruption(started_cluster):
 
 
 def test_check_replicated_table_simple(started_cluster):
-    node1.query("TRUNCATE TABLE replicated_mt")
-    node2.query("SYSTEM SYNC REPLICA replicated_mt")
+    for node in [node1, node2]:
+        node.query("DROP TABLE IF EXISTS replicated_mt")
+
+        node.query('''
+        CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
+        ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
+            '''.format(replica=node.name))
+
     node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
     node2.query("SYSTEM SYNC REPLICA replicated_mt")
 
@@ -119,34 +121,40 @@ def test_check_replicated_table_simple(started_cluster):
 
 
 def test_check_replicated_table_corruption(started_cluster):
-    node1.query("TRUNCATE TABLE replicated_mt")
-    node2.query("SYSTEM SYNC REPLICA replicated_mt")
-    node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
-    node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-01-02'), 3, 10), (toDate('2019-01-02'), 4, 12)")
-    node2.query("SYSTEM SYNC REPLICA replicated_mt")
+    for node in [node1, node2]:
+        node.query("DROP TABLE IF EXISTS replicated_mt_1")
 
-    assert node1.query("SELECT count() from replicated_mt") == "4\n"
-    assert node2.query("SELECT count() from replicated_mt") == "4\n"
+        node.query('''
+        CREATE TABLE replicated_mt_1(date Date, id UInt32, value Int32)
+        ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt_1', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
+            '''.format(replica=node.name))
+
+    node1.query("INSERT INTO replicated_mt_1 VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
+    node1.query("INSERT INTO replicated_mt_1 VALUES (toDate('2019-01-02'), 3, 10), (toDate('2019-01-02'), 4, 12)")
+    node2.query("SYSTEM SYNC REPLICA replicated_mt_1")
+
+    assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
+    assert node2.query("SELECT count() from replicated_mt_1") == "4\n"
 
     part_name = node1.query(
-        "SELECT name from system.parts where table = 'replicated_mt' and partition_id = '201901' and active = 1").strip()
+        "SELECT name from system.parts where table = 'replicated_mt_1' and partition_id = '201901' and active = 1").strip()
 
-    corrupt_data_part_on_disk(node1, "replicated_mt", part_name)
-    assert node1.query("CHECK TABLE replicated_mt PARTITION 201901", settings={
-        "check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and queueing a fetch.\n".format(
+    corrupt_data_part_on_disk(node1, "replicated_mt_1", part_name)
+    assert node1.query("CHECK TABLE replicated_mt_1 PARTITION 201901", settings={
+        "check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and will try to fetch.\n".format(
         p=part_name)
 
-    node1.query("SYSTEM SYNC REPLICA replicated_mt")
-    assert node1.query("CHECK TABLE replicated_mt PARTITION 201901",
+    node1.query("SYSTEM SYNC REPLICA replicated_mt_1")
+    assert node1.query("CHECK TABLE replicated_mt_1 PARTITION 201901",
                        settings={"check_query_single_value_result": 0}) == "{}\t1\t\n".format(part_name)
-    assert node1.query("SELECT count() from replicated_mt") == "4\n"
+    assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
 
-    remove_part_from_disk(node2, "replicated_mt", part_name)
-    assert node2.query("CHECK TABLE replicated_mt PARTITION 201901", settings={
-        "check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and queueing a fetch.\n".format(
+    remove_part_from_disk(node2, "replicated_mt_1", part_name)
+    assert node2.query("CHECK TABLE replicated_mt_1 PARTITION 201901", settings={
+        "check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and will try to fetch.\n".format(
         p=part_name)
 
-    node1.query("SYSTEM SYNC REPLICA replicated_mt")
-    assert node1.query("CHECK TABLE replicated_mt PARTITION 201901",
+    node1.query("SYSTEM SYNC REPLICA replicated_mt_1")
+    assert node1.query("CHECK TABLE replicated_mt_1 PARTITION 201901",
                        settings={"check_query_single_value_result": 0}) == "{}\t1\t\n".format(part_name)
-    assert node1.query("SELECT count() from replicated_mt") == "4\n"
+    assert node1.query("SELECT count() from replicated_mt_1") == "4\n"

From b244499d6916923761b07fbe46edc700511985ee Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 27 Jan 2021 22:24:59 +0300
Subject: [PATCH 0267/1238] Fix build.

---
 src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp | 1 +
 src/Processors/QueryPlan/Optimizations/limitPushDown.cpp   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
index 026ceccebfb..a45bf76202d 100644
--- a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
+++ b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
@@ -79,6 +79,7 @@ bool tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
                                               filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
 
     parent->setStepDescription(description + " [split]");
+    return true;
 }
 
 }
diff --git a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
index fd6c2e19f6b..0b2fcdfb209 100644
--- a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
+++ b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
@@ -108,6 +108,7 @@ bool tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
     limit->updateInputStream(transforming->getInputStreams().front());
 
     parent.swap(child);
+    return true;
 }
 
 }

From 0cb8a8070ed54a6d7bc8ff97f1ce61bee2fb703b Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Wed, 27 Jan 2021 22:50:18 +0300
Subject: [PATCH 0268/1238] Update skip_list.json

---
 tests/queries/skip_list.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index b48d642befd..79aae32da45 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -569,6 +569,6 @@
         "live_view",
         "memory_leak",
         "memory_limit",
-        "polygon_dicts", // they use an explicitly specified database
+        "polygon_dicts" // they use an explicitly specified database
     ]
 }

From 3f305cecc3dd165d7c2d4d0224bbceddb0c4dfb3 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 27 Jan 2021 22:52:18 +0300
Subject: [PATCH 0269/1238] add empty line after error messages in client

---
 programs/client/Client.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 06bd7d84526..ef12974adea 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -931,13 +931,13 @@ private:
             }
             std::cerr << "Received exception from server (version "
                 << server_version << "):" << std::endl << "Code: "
-                << server_exception->code() << ". " << text << std::endl;
+                << server_exception->code() << ". " << text << std::endl << std::endl;
         }
 
         if (client_exception)
         {
             fmt::print(stderr,
-                "Error on processing query '{}':\n{}\n",
+                "Error on processing query '{}':\n{}\n\n",
                 full_query, client_exception->message());
         }
 

From 3b7373e9928ba808632b6f9aa1b894a49e07bb84 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 27 Jan 2021 22:53:07 +0300
Subject: [PATCH 0270/1238] More comments.

---
 .../QueryPlan/Optimizations/optimizeTree.cpp  | 27 +++++++++++++------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
index 8ac5a4482f1..962452b24af 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -13,11 +13,15 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
         QueryPlan::Node * node;
         Frame * parent = nullptr;
 
-        /// Will update only depth_limit layers of tree (if no other optimizations happen).
-        size_t depth_limit = 0;
+        /// If not zero, traverse only traverse_depth_limit layers of tree (if no other optimizations happen).
+        /// Otherwise, traverse all children.
+        size_t traverse_depth_limit = 0;
 
+        /// Next child to process.
         size_t next_child = 0;
 
+        /// If not zero, optimizations to current node again.
+        /// Skip optimizations which read less then `read_depth_limit` layers of tree.
         size_t read_depth_limit = 0;
     };
 
@@ -28,7 +32,9 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
     {
         auto & frame = stack.top();
 
-        if (frame.depth_limit != 1)
+        /// If traverse_depth_limit == 0, then traverse without limit (first entrance)
+        /// If traverse_depth_limit > 1, then traverse with (limit - 1)
+        if (frame.traverse_depth_limit != 1)
         {
             /// Traverse all children first.
             if (frame.next_child < frame.node->children.size())
@@ -37,7 +43,7 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
                 {
                        .node = frame.node->children[frame.next_child],
                        .parent = &frame,
-                       .depth_limit = frame.depth_limit ? (frame.depth_limit - 1) : 0,
+                       .traverse_depth_limit = frame.traverse_depth_limit ? (frame.traverse_depth_limit - 1) : 0,
                 });
 
                 ++frame.next_child;
@@ -45,7 +51,9 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
             }
         }
 
-        if (frame.depth_limit == 0 || frame.read_depth_limit)
+        /// If frame.traverse_depth_limit == 0, apply optimizations on first entrance.
+        /// If frame.read_depth_limit, then one of children was updated, and we may need to repeat some optimizations.
+        if (frame.traverse_depth_limit == 0 || frame.read_depth_limit)
         {
             size_t max_update_depth = 0;
 
@@ -73,15 +81,18 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
             }
 
             /// Traverse `max_update_depth` layers of tree again.
-            frame.depth_limit = max_update_depth;
+            frame.traverse_depth_limit = max_update_depth;
             frame.next_child = 0;
 
             /// Also go to parents and tell them to apply some optimizations again.
+            /// Check: for our parent we set read_depth_limit = 1, which means it can skip optimizations
+            ///        which use ony 1 layer of tree (not read current node).
+            /// Note that frame.read_depth_limit will be zeroed.
             Frame * cur_frame = &frame;
-            for (size_t cur_depth = 0; cur_frame && cur_frame->depth_limit; ++cur_depth)
+            for (size_t cur_depth = 0; cur_frame && cur_frame->traverse_depth_limit; ++cur_depth)
             {
                 /// If cur_frame is traversed first time, all optimizations will apply anyway.
-                if (cur_frame->depth_limit == 0)
+                if (cur_frame->traverse_depth_limit == 0)
                     break;
 
                 /// Stop if limit is applied and stricter then current.

From ff371ccdb69a47d064a087c4ad6a434fbba3a85a Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Wed, 27 Jan 2021 23:22:05 +0300
Subject: [PATCH 0271/1238] Fixed data race in function DictGetNoType

---
 src/Functions/FunctionsExternalDictionaries.h | 31 ++++++++++++++++---
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index e48265aa2b7..cf8eb2f90df 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -267,30 +267,54 @@ public:
         if (arguments.size() < 3)
             throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
 
+        /// TODO: We can load only dictionary structure
+
         String dictionary_name;
+
         if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
             dictionary_name = name_col->getValue<String>();
         else
             throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
                 + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
 
+        String attribute_name;
+
         if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
             attribute_name = name_col->getValue<String>();
         else
             throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName()
                 + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
 
-        dictionary = helper.getDictionary(dictionary_name);
+        auto dictionary = helper.getDictionary(dictionary_name);
 
         return helper.getDictionaryAttribute(dictionary, attribute_name).type;
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
     {
+        std::cerr << "FunctionDictGetNoType::executeImpl " << this << std::endl;
+
         if (input_rows_count == 0)
             return result_type->createColumn();
 
-        /// TODO: Use accurateCast if argument is integer
+        String dictionary_name;
+
+        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
+            dictionary_name = name_col->getValue<String>();
+        else
+            throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
+                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+
+        String attribute_name;
+
+        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
+            attribute_name = name_col->getValue<String>();
+        else
+            throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName()
+                + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+
+        auto dictionary = helper.getDictionary(dictionary_name);
+
         if (!WhichDataType(arguments[2].type).isUInt64() && !isTuple(arguments[2].type))
             throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function "
                     + getName() + ", must be UInt64 or tuple(...).",
@@ -361,9 +385,6 @@ public:
 
 private:
     mutable FunctionDictHelper helper;
-    /// Initialized in getReturnTypeImpl
-    mutable std::shared_ptr<const IDictionaryBase> dictionary;
-    mutable String attribute_name;
 };
 
 template <typename DataType, typename Name, DictionaryGetFunctionType dictionary_get_function_type>

From e5125b8c73797ce51d61c5f3bce487af3f3bdf37 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Thu, 28 Jan 2021 02:20:57 +0300
Subject: [PATCH 0272/1238] add comments and test for compatibility

---
 src/DataStreams/ITTLAlgorithm.h               |  6 ++
 src/DataStreams/TTLAggregationAlgorithm.h     |  2 +
 src/DataStreams/TTLColumnAlgorithm.h          |  2 +
 src/DataStreams/TTLDeleteAlgorithm.h          |  2 +
 src/DataStreams/TTLUpdateInfoAlgorithm.h      |  1 +
 .../MergeTree/MergeTreeDataPartTTLInfo.cpp    | 13 ++--
 .../MergeTree/MergeTreeDataPartTTLInfo.h      |  3 +-
 src/Storages/System/StorageSystemParts.cpp    |  7 +-
 src/Storages/TTLDescription.cpp               | 11 ++-
 tests/integration/test_ttl_replicated/test.py | 75 +++++++++++++++++++
 10 files changed, 111 insertions(+), 11 deletions(-)

diff --git a/src/DataStreams/ITTLAlgorithm.h b/src/DataStreams/ITTLAlgorithm.h
index 429ca4bcc61..d87d43d8c0c 100644
--- a/src/DataStreams/ITTLAlgorithm.h
+++ b/src/DataStreams/ITTLAlgorithm.h
@@ -8,6 +8,10 @@
 namespace DB
 {
 
+/**
+ * Represents the actions, which are required to do
+ * with data, when TTL is expired: delete, aggregate, etc.
+ */
 class ITTLAlgorithm
 {
 public:
@@ -18,6 +22,8 @@ public:
     virtual ~ITTLAlgorithm() = default;
 
     virtual void execute(Block & block) = 0;
+
+    /// Updates TTL metadata of the data_part.
     virtual void finalize(const MutableDataPartPtr & data_part) const = 0;
 
     bool isMinTTLExpired() const { return force || isTTLExpired(old_ttl_info.min); }
diff --git a/src/DataStreams/TTLAggregationAlgorithm.h b/src/DataStreams/TTLAggregationAlgorithm.h
index 977e755ca8b..c2f40bab6b9 100644
--- a/src/DataStreams/TTLAggregationAlgorithm.h
+++ b/src/DataStreams/TTLAggregationAlgorithm.h
@@ -7,6 +7,8 @@
 namespace DB
 {
 
+/// Aggregates rows according to 'TTL expr GROUP BY key' description.
+/// Aggregation key must be the prefix of the sorting key.
 class TTLAggregationAlgorithm final : public ITTLAlgorithm
 {
 public:
diff --git a/src/DataStreams/TTLColumnAlgorithm.h b/src/DataStreams/TTLColumnAlgorithm.h
index 3b1c199292d..e09dd663af0 100644
--- a/src/DataStreams/TTLColumnAlgorithm.h
+++ b/src/DataStreams/TTLColumnAlgorithm.h
@@ -5,6 +5,8 @@
 namespace DB
 {
 
+/// Deletes (replaces to default) values in column according to column's TTL description.
+/// If all values in column are replaced with defaults, this column won't be written to part.
 class TTLColumnAlgorithm final : public ITTLAlgorithm
 {
 public:
diff --git a/src/DataStreams/TTLDeleteAlgorithm.h b/src/DataStreams/TTLDeleteAlgorithm.h
index 36da59da46e..8ab3f8b63e8 100644
--- a/src/DataStreams/TTLDeleteAlgorithm.h
+++ b/src/DataStreams/TTLDeleteAlgorithm.h
@@ -5,6 +5,8 @@
 namespace DB
 {
 
+/// Deletes rows according to table TTL description with
+/// possible optional condition in 'WHERE' clause.
 class TTLDeleteAlgorithm final : public ITTLAlgorithm
 {
 public:
diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.h b/src/DataStreams/TTLUpdateInfoAlgorithm.h
index 4a680c5bb3a..c1ef0e1c90d 100644
--- a/src/DataStreams/TTLUpdateInfoAlgorithm.h
+++ b/src/DataStreams/TTLUpdateInfoAlgorithm.h
@@ -5,6 +5,7 @@
 namespace DB
 {
 
+/// Calculates new ttl_info and does nothing with data.
 class TTLUpdateInfoAlgorithm : public ITTLAlgorithm
 {
 public:
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
index e9e4a2b2998..e130fbc1798 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
@@ -70,7 +70,7 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in)
         updatePartMinMaxTTL(table_ttl.min, table_ttl.max);
     }
 
-    auto fill_ttl_info_map = [](const JSON & json_part, TTLInfoMap & ttl_info_map)
+    auto fill_ttl_info_map = [this](const JSON & json_part, TTLInfoMap & ttl_info_map, bool update_min_max)
     {
         for (auto elem : json_part) // NOLINT
         {
@@ -79,28 +79,31 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in)
             ttl_info.max = elem["max"].getUInt();
             String expression = elem["expression"].getString();
             ttl_info_map.emplace(expression, ttl_info);
+
+            if (update_min_max)
+                updatePartMinMaxTTL(ttl_info.min, ttl_info.max);
         }
     };
 
     if (json.has("moves"))
     {
         const JSON & moves = json["moves"];
-        fill_ttl_info_map(moves, moves_ttl);
+        fill_ttl_info_map(moves, moves_ttl, false);
     }
     if (json.has("recompression"))
     {
         const JSON & recompressions = json["recompression"];
-        fill_ttl_info_map(recompressions, recompression_ttl);
+        fill_ttl_info_map(recompressions, recompression_ttl, false);
     }
     if (json.has("group_by"))
     {
         const JSON & group_by = json["group_by"];
-        fill_ttl_info_map(group_by, group_by_ttl);
+        fill_ttl_info_map(group_by, group_by_ttl, true);
     }
     if (json.has("rows_where"))
     {
         const JSON & rows_where = json["rows_where"];
-        fill_ttl_info_map(rows_where, rows_where_ttl);
+        fill_ttl_info_map(rows_where, rows_where_ttl, true);
     }
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
index 8b972116384..9d1606ee44a 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
@@ -71,7 +71,8 @@ struct MergeTreeDataPartTTLInfos
 
     bool empty() const
     {
-        return !part_min_ttl && moves_ttl.empty() && recompression_ttl.empty() && group_by_ttl.empty();
+        /// part_min_ttl in minimum of rows, rows_where and group_by TTLs
+        return !part_min_ttl && moves_ttl.empty() && recompression_ttl.empty();
     }
 };
 
diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp
index 8845644f5db..bc5a96c6159 100644
--- a/src/Storages/System/StorageSystemParts.cpp
+++ b/src/Storages/System/StorageSystemParts.cpp
@@ -71,7 +71,11 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_)
 
         {"group_by_ttl_info.expression",                std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
         {"group_by_ttl_info.min",                       std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
-        {"group_by_ttl_info.max",                       std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())}
+        {"group_by_ttl_info.max",                       std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+
+        {"rows_where_ttl_info.expression",              std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
+        {"rows_where_ttl_info.min",                     std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+        {"rows_where_ttl_info.max",                     std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())}
     }
     )
 {
@@ -186,6 +190,7 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto
 
         add_ttl_info_map(part->ttl_infos.recompression_ttl);
         add_ttl_info_map(part->ttl_infos.group_by_ttl);
+        add_ttl_info_map(part->ttl_infos.rows_where_ttl);
 
         /// _state column should be the latest.
         if (has_state_column)
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index 19195e6ba6d..41c20b2714b 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -230,15 +230,13 @@ TTLDescription TTLDescription::getTTLFromAST(
 
                 if (!data.has_aggregate_function)
                     throw Exception(ErrorCodes::BAD_TTL_EXPRESSION,
-                    "Invalid expression for assignment of column {}. Should be an aggregate function", assignment.column_name);
+                    "Invalid expression for assignment of column {}. Should contain an aggregate function", assignment.column_name);
 
                 expression = addTypeConversionToAST(std::move(expression), columns.getPhysical(assignment.column_name).type->getName());
                 aggregations.emplace_back(assignment.column_name, std::move(expression));
+                aggregation_columns_set.insert(assignment.column_name);
             }
 
-            for (const auto & [name, _] : aggregations)
-                aggregation_columns_set.insert(name);
-
             if (aggregation_columns_set.size() != ttl_element->group_by_assignments.size())
                 throw Exception(
                     "Multiple aggregations set for one column in TTL Expression",
@@ -247,6 +245,10 @@ TTLDescription TTLDescription::getTTLFromAST(
             result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size());
 
             const auto & primary_key_expressions = primary_key.expression_list_ast->children;
+
+            /// Wrap with 'any' aggregate function primary key columns,
+            /// which are not in 'GROUP BY' key and was not set explicitly.
+            /// The separate step, because not all primary key columns are ordinary columns.
             for (size_t i = ttl_element->group_by_key.size(); i < primary_key_expressions.size(); ++i)
             {
                 if (!aggregation_columns_set.count(pk_columns[i]))
@@ -257,6 +259,7 @@ TTLDescription TTLDescription::getTTLFromAST(
                 }
             }
 
+            /// Wrap with 'any' aggregate function other columns, which was not set explicitly.
             for (const auto & column : columns.getOrdinary())
             {
                 if (!aggregation_columns_set.count(column.name) && !used_primary_key_columns_set.count(column.name))
diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py
index 9418aeaaf01..84b3340925c 100644
--- a/tests/integration/test_ttl_replicated/test.py
+++ b/tests/integration/test_ttl_replicated/test.py
@@ -9,6 +9,11 @@ cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance('node1', with_zookeeper=True)
 node2 = cluster.add_instance('node2', with_zookeeper=True)
 
+node3 = cluster.add_instance('node3', with_zookeeper=True)
+node4 = cluster.add_instance('node4', with_zookeeper=True, image='yandex/clickhouse-server', tag='20.12.4.5', stay_alive=True, with_installed_binary=True)
+
+node5 = cluster.add_instance('node5', with_zookeeper=True, image='yandex/clickhouse-server', tag='20.12.4.5', stay_alive=True, with_installed_binary=True)
+node6 = cluster.add_instance('node6', with_zookeeper=True, image='yandex/clickhouse-server', tag='20.12.4.5', stay_alive=True, with_installed_binary=True)
 
 @pytest.fixture(scope="module")
 def started_cluster():
@@ -329,3 +334,73 @@ def test_ttl_empty_parts(started_cluster):
     error_msg = '<Error> default.test_ttl_empty_parts (ReplicatedMergeTreeCleanupThread)'
     assert not node1.contains_in_log(error_msg)
     assert not node2.contains_in_log(error_msg)
+
+@pytest.mark.parametrize(
+    ('node_left', 'node_right', 'num_run'),
+    [(node1, node2, 0), (node3, node4, 1), (node5, node6, 2)]
+)
+def test_ttl_compatibility(started_cluster, node_left, node_right, num_run):
+    drop_table([node_left, node_right], "test_ttl")
+    drop_table([node_left, node_right], "test_ttl_group_by")
+    drop_table([node_left, node_right], "test_ttl_where")
+
+    for node in [node_left, node_right]:
+        node.query(
+            '''
+                CREATE TABLE test_ttl(date DateTime, id UInt32)
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_{suff}', '{replica}')
+                ORDER BY id PARTITION BY toDayOfMonth(date)
+                TTL date + INTERVAL 3 SECOND
+            '''.format(suff=num_run, replica=node.name))
+
+        node.query(
+            '''
+                CREATE TABLE test_ttl_group_by(date DateTime, id UInt32, val UInt64)
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_group_by_{suff}', '{replica}')
+                ORDER BY id PARTITION BY toDayOfMonth(date)
+                TTL date + INTERVAL 3 SECOND GROUP BY id SET val = sum(val)
+            '''.format(suff=num_run, replica=node.name))
+
+        node.query(
+            '''
+                CREATE TABLE test_ttl_where(date DateTime, id UInt32)
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_where_{suff}', '{replica}')
+                ORDER BY id PARTITION BY toDayOfMonth(date)
+                TTL date + INTERVAL 3 SECOND DELETE WHERE id % 2 = 1
+            '''.format(suff=num_run, replica=node.name))
+
+    node_left.query("INSERT INTO test_ttl VALUES (now(), 1)")
+    node_left.query("INSERT INTO test_ttl VALUES (toDateTime('2100-10-11 10:00:00'), 2)")
+    node_right.query("INSERT INTO test_ttl VALUES (now(), 3)")
+    node_right.query("INSERT INTO test_ttl VALUES (toDateTime('2100-10-11 10:00:00'), 4)")
+
+    node_left.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 1)")
+    node_left.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 2)")
+    node_right.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 3)")
+    node_right.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 4)")
+
+    node_left.query("INSERT INTO test_ttl_where VALUES (now(), 1)")
+    node_left.query("INSERT INTO test_ttl_where VALUES (now(), 2)")
+    node_right.query("INSERT INTO test_ttl_where VALUES (now(), 3)")
+    node_right.query("INSERT INTO test_ttl_where VALUES (now(), 4)")
+
+    if node_left.with_installed_binary:
+        node_left.restart_with_latest_version()
+
+    if node_right.with_installed_binary:
+        node_right.restart_with_latest_version()
+    
+    time.sleep(5) # Wait for TTL
+
+    node_right.query("OPTIMIZE TABLE test_ttl FINAL")
+    node_right.query("OPTIMIZE TABLE test_ttl_group_by FINAL")
+    node_right.query("OPTIMIZE TABLE test_ttl_where FINAL")
+
+    assert node_left.query("SELECT id FROM test_ttl ORDER BY id") == "2\n4\n"
+    assert node_right.query("SELECT id FROM test_ttl ORDER BY id") == "2\n4\n"
+
+    assert node_left.query("SELECT val FROM test_ttl_group_by ORDER BY id") == "10\n"
+    assert node_right.query("SELECT val FROM test_ttl_group_by ORDER BY id") == "10\n"
+
+    assert node_left.query("SELECT id FROM test_ttl_where ORDER BY id") == "2\n4\n"
+    assert node_right.query("SELECT id FROM test_ttl_where ORDER BY id") == "2\n4\n"

From 6c7ecc1aa0bb070d56fa54de9d8f7c8d7105e466 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Thu, 28 Jan 2021 02:44:18 +0300
Subject: [PATCH 0273/1238] allow to run all style checks in one file

---
 utils/check-style/check-style-all | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100755 utils/check-style/check-style-all

diff --git a/utils/check-style/check-style-all b/utils/check-style/check-style-all
new file mode 100755
index 00000000000..fa06a882794
--- /dev/null
+++ b/utils/check-style/check-style-all
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+./check-style -n
+./check-typos
+./check-whitespaces -n
+./check-duplicate-includes.sh
+./shellcheck-run.sh

From 9c2130cc17842c93b5af01a4e091afaec3481c8d Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Thu, 28 Jan 2021 03:13:30 +0300
Subject: [PATCH 0274/1238] allow to run style check from other directories

---
 utils/check-style/check-style-all | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/utils/check-style/check-style-all b/utils/check-style/check-style-all
index fa06a882794..c34224e5469 100755
--- a/utils/check-style/check-style-all
+++ b/utils/check-style/check-style-all
@@ -1,7 +1,8 @@
 #!/usr/bin/env bash
 
-./check-style -n
-./check-typos
-./check-whitespaces -n
-./check-duplicate-includes.sh
-./shellcheck-run.sh
+dir=$(dirname $0)
+$dir/check-style -n
+$dir/check-typos
+$dir/check-whitespaces -n
+$dir/check-duplicate-includes.sh
+$dir/shellcheck-run.sh

From 71d3482ceaabdca0d0dc48266c7f7add16b4b3e2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 28 Jan 2021 03:43:42 +0300
Subject: [PATCH 0275/1238] Fix potentially unsafe code in Poco

---
 contrib/poco | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/poco b/contrib/poco
index 2c32e17c7df..e11f3c97157 160000
--- a/contrib/poco
+++ b/contrib/poco
@@ -1 +1 @@
-Subproject commit 2c32e17c7dfee1f8bf24227b697cdef5fddf0823
+Subproject commit e11f3c971570cf6a31006cd21cadf41a259c360a

From d69af4333db11a52fdd5432b79d6d94daff28408 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 28 Jan 2021 03:46:12 +0300
Subject: [PATCH 0276/1238] Better asserts

---
 src/Functions/LowerUpperUTF8Impl.h | 2 +-
 src/Functions/randomStringUTF8.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h
index fc13466f4fe..f1ec9e7da1c 100644
--- a/src/Functions/LowerUpperUTF8Impl.h
+++ b/src/Functions/LowerUpperUTF8Impl.h
@@ -144,7 +144,7 @@ struct LowerUpperUTF8Impl
                 if (dst_code_point > 0)
                 {
                     int dst_sequence_length = UTF8::convert(dst_code_point, dst, src_end - src);
-                    assert(dst_sequence_length >= 0);
+                    assert(dst_sequence_length >= 0 && dst_sequence_length <= 4);
 
                     /// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8.
                     /// As an example, this happens for ß and ẞ.
diff --git a/src/Functions/randomStringUTF8.cpp b/src/Functions/randomStringUTF8.cpp
index a5954eaaf9f..ca7af32b238 100644
--- a/src/Functions/randomStringUTF8.cpp
+++ b/src/Functions/randomStringUTF8.cpp
@@ -122,8 +122,8 @@ public:
                 auto length1 = UTF8::convert(code_point1, pos, sizeof(int));
                 auto length2 = UTF8::convert(code_point2, pos, sizeof(int));
 
-                assert(length1 >= 0);
-                assert(length2 >= 0);
+                assert(length1 >= 0 && length1 <= 4);
+                assert(length2 >= 0 && length2 <= 4);
 
                 pos += length1;
                 last_writen_bytes = length2;

From 9e1c1157b50abe0ac2d09198b812ba5612849775 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 28 Jan 2021 04:08:48 +0300
Subject: [PATCH 0277/1238] Fix error

---
 src/Functions/randomStringUTF8.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/Functions/randomStringUTF8.cpp b/src/Functions/randomStringUTF8.cpp
index ca7af32b238..0813c1e6465 100644
--- a/src/Functions/randomStringUTF8.cpp
+++ b/src/Functions/randomStringUTF8.cpp
@@ -120,12 +120,11 @@ public:
 
                 /// We have padding in column buffers that we can overwrite.
                 auto length1 = UTF8::convert(code_point1, pos, sizeof(int));
-                auto length2 = UTF8::convert(code_point2, pos, sizeof(int));
-
                 assert(length1 >= 0 && length1 <= 4);
-                assert(length2 >= 0 && length2 <= 4);
-
                 pos += length1;
+
+                auto length2 = UTF8::convert(code_point2, pos, sizeof(int));
+                assert(length2 >= 0 && length2 <= 4);
                 last_writen_bytes = length2;
                 pos += last_writen_bytes;
             }

From 708e68b98313be485f3414aa1f4591dcea633592 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 28 Jan 2021 04:28:19 +0300
Subject: [PATCH 0278/1238] Fix stack overflow in coroutine

---
 src/DataTypes/DataTypeFactory.cpp                |  8 +++++++-
 .../01675_data_type_coroutine.reference          |  1 +
 .../0_stateless/01675_data_type_coroutine.sh     | 16 ++++++++++++++++
 3 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01675_data_type_coroutine.reference
 create mode 100755 tests/queries/0_stateless/01675_data_type_coroutine.sh

diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp
index 2f100202ee9..dc3ce039dbd 100644
--- a/src/DataTypes/DataTypeFactory.cpp
+++ b/src/DataTypes/DataTypeFactory.cpp
@@ -29,8 +29,14 @@ namespace ErrorCodes
 
 DataTypePtr DataTypeFactory::get(const String & full_name) const
 {
+    /// Data type parser can be invoked from coroutines with small stack.
+    /// Value 315 is known to cause stack overflow in some test configurations (debug build, sanitizers)
+    /// let's make the threshold significantly lower.
+    /// It is impractical for user to have complex data types with this depth.
+    static constexpr size_t data_type_max_parse_depth = 200;
+
     ParserDataType parser;
-    ASTPtr ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+    ASTPtr ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", 0, data_type_max_parse_depth);
     return get(ast);
 }
 
diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.reference b/tests/queries/0_stateless/01675_data_type_coroutine.reference
new file mode 100644
index 00000000000..7326d960397
--- /dev/null
+++ b/tests/queries/0_stateless/01675_data_type_coroutine.reference
@@ -0,0 +1 @@
+Ok
diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.sh b/tests/queries/0_stateless/01675_data_type_coroutine.sh
new file mode 100755
index 00000000000..781e43e4134
--- /dev/null
+++ b/tests/queries/0_stateless/01675_data_type_coroutine.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+I=0
+while true
+do
+    I=$((I + 1))
+    TYPE=$(perl -e "print 'Array(' x $I; print 'UInt8'; print ')' x $I")
+    ${CLICKHOUSE_CLIENT} --max_parser_depth 1000000 --query "SELECT * FROM remote('127.0.0.{1,2}', generateRandom('x $TYPE', 1, 1, 1)) LIMIT 1 FORMAT Null" 2>&1 | grep -q -F 'Maximum parse depth' && break;
+done
+
+#echo "I = ${I}"
+echo 'Ok'

From 8ffc2bb72678fb1ab1a12bcbd7ee67824bbd353b Mon Sep 17 00:00:00 2001
From: benbiti <wangshouben@hotmail.com>
Date: Thu, 28 Jan 2021 10:11:43 +0800
Subject: [PATCH 0279/1238] update aggregate-functions translations in Chinese
 Doc

---
 docs/zh/sql-reference/aggregate-functions/index.md | 14 +++++++-------
 .../sql-reference/aggregate-functions/reference.md |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/docs/zh/sql-reference/aggregate-functions/index.md b/docs/zh/sql-reference/aggregate-functions/index.md
index 436a8f433ea..2344c3e6dc0 100644
--- a/docs/zh/sql-reference/aggregate-functions/index.md
+++ b/docs/zh/sql-reference/aggregate-functions/index.md
@@ -1,11 +1,12 @@
 ---
+toc_folder_title: 聚合函数
 toc_priority: 33
-toc_title: 聚合函数
+toc_title: 简介
 ---
 
 # 聚合函数 {#aggregate-functions}
 
-聚合函数在 [正常](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) 方式如预期的数据库专家。
+聚合函数如数据库专家预期的方式 [正常](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) 工作。
 
 ClickHouse还支持:
 
@@ -14,7 +15,7 @@ ClickHouse还支持:
 
 ## 空处理 {#null-processing}
 
-在聚合过程中，所有 `NULL`s被跳过。
+在聚合过程中，所有 `NULL` 被跳过。
 
 **例:**
 
@@ -30,7 +31,7 @@ ClickHouse还支持:
 └───┴──────┘
 ```
 
-比方说，你需要在总的值 `y` 列:
+比方说，你需要计算 `y` 列的总数:
 
 ``` sql
 SELECT sum(y) FROM t_null_big
@@ -40,9 +41,8 @@ SELECT sum(y) FROM t_null_big
     │      7 │
     └────────┘
 
-该 `sum` 函数解释 `NULL` 作为 `0`. 特别是，这意味着，如果函数接收输入的选择，其中所有的值 `NULL`，那么结果将是 `0`，不 `NULL`.
 
-现在你可以使用 `groupArray` 函数从创建一个数组 `y` 列:
+现在你可以使用 `groupArray` 函数用 `y` 列创建一个数组:
 
 ``` sql
 SELECT groupArray(y) FROM t_null_big
@@ -54,6 +54,6 @@ SELECT groupArray(y) FROM t_null_big
 └───────────────┘
 ```
 
-`groupArray` 不包括 `NULL` 在生成的数组中。
+在 `groupArray` 生成的数组中不包括 `NULL`。
 
 [原始文章](https://clickhouse.tech/docs/en/query_language/agg_functions/) <!--hide-->
diff --git a/docs/zh/sql-reference/aggregate-functions/reference.md b/docs/zh/sql-reference/aggregate-functions/reference.md
index cf7dddb9b7e..3a224886a00 100644
--- a/docs/zh/sql-reference/aggregate-functions/reference.md
+++ b/docs/zh/sql-reference/aggregate-functions/reference.md
@@ -1,9 +1,9 @@
 ---
 toc_priority: 36
-toc_title: 聚合函数
+toc_title: 参考手册
 ---
 
-# 聚合函数引用 {#aggregate-functions-reference}
+# 参考手册 {#aggregate-functions-reference}
 
 ## count {#agg_function-count}
 

From 297cfb4857836295f25cdb783d6b6afe78d7e017 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 28 Jan 2021 06:34:44 +0300
Subject: [PATCH 0280/1238] Less parser depth

---
 src/Parsers/ParserDataType.cpp | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp
index 0148f2f3bb9..3d3f393a300 100644
--- a/src/Parsers/ParserDataType.cpp
+++ b/src/Parsers/ParserDataType.cpp
@@ -14,21 +14,29 @@ namespace
 {
 
 /// Wrapper to allow mixed lists of nested and normal types.
-class ParserNestedTableOrExpression : public IParserBase
+/// Parameters are either:
+/// - Nested table elements;
+/// - Enum element in form of 'a' = 1;
+/// - literal;
+/// - another data type (or identifier)
+class ParserDataTypeArgument : public IParserBase
 {
-    private:
-        const char * getName() const override { return "data type or expression"; }
-        bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
-        {
-            ParserNestedTable parser1;
+private:
+    const char * getName() const override { return "data type argument"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
+    {
+        ParserNestedTable nested_parser;
+        ParserDataType data_type_parser;
+        ParserLiteral literal_parser;
 
-            if (parser1.parse(pos, node, expected))
-                return true;
+        const char * operators[] = {"=", "equals", nullptr};
+        ParserLeftAssociativeBinaryOperatorList enum_parser(operators, std::make_unique<ParserLiteral>());
 
-            ParserExpression parser2;
-
-            return parser2.parse(pos, node, expected);
-        }
+        return nested_parser.parse(pos, node, expected)
+            || enum_parser.parse(pos, node, expected)
+            || literal_parser.parse(pos, node, expected)
+            || data_type_parser.parse(pos, node, expected);
+    }
 };
 
 }
@@ -104,7 +112,7 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ++pos;
 
     /// Parse optional parameters
-    ParserList args_parser(std::make_unique<ParserNestedTableOrExpression>(), std::make_unique<ParserToken>(TokenType::Comma));
+    ParserList args_parser(std::make_unique<ParserDataTypeArgument>(), std::make_unique<ParserToken>(TokenType::Comma));
     ASTPtr expr_list_args;
 
     if (!args_parser.parse(pos, expr_list_args, expected))

From 2acff24aaf63afeb0dc57ab4206ac7bb9dec8ef0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 28 Jan 2021 06:42:20 +0300
Subject: [PATCH 0281/1238] The test most likely would not work in Arcadia

---
 tests/queries/0_stateless/arcadia_skip_list.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 1b42e38a8e9..a34621fc55a 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -196,3 +196,4 @@
 01181_db_atomic_drop_on_cluster
 01658_test_base64Encode_mysql_compatibility
 01659_test_base64Decode_mysql_compatibility
+01675_data_type_coroutine

From 68c542aec681bbf841f45fc879c1e97052449555 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 28 Jan 2021 07:43:59 +0300
Subject: [PATCH 0282/1238] Update test

---
 .../0_stateless/00945_bloom_filter_index.sql  | 28 +++++++++----------
 .../01414_low_cardinality_nullable.sql        |  6 ++--
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.sql b/tests/queries/0_stateless/00945_bloom_filter_index.sql
index 82321a75c67..ad9c807fc5a 100644
--- a/tests/queries/0_stateless/00945_bloom_filter_index.sql
+++ b/tests/queries/0_stateless/00945_bloom_filter_index.sql
@@ -163,23 +163,23 @@ DROP TABLE IF EXISTS bloom_filter_lc_null_types_test;
 DROP TABLE IF EXISTS bloom_filter_array_lc_null_types_test;
 
 CREATE TABLE bloom_filter_array_lc_null_types_test (
-    order_key   Array(LowCardinality(Nullable((UInt64)))),
+    order_key   Array(LowCardinality(Nullable(UInt64))),
 
-    i8 Array(LowCardinality(Nullable((Int8)))),
-    i16 Array(LowCardinality(Nullable((Int16)))),
-    i32 Array(LowCardinality(Nullable((Int32)))),
-    i64 Array(LowCardinality(Nullable((Int64)))),
-    u8 Array(LowCardinality(Nullable((UInt8)))),
-    u16 Array(LowCardinality(Nullable((UInt16)))),
-    u32 Array(LowCardinality(Nullable((UInt32)))),
-    u64 Array(LowCardinality(Nullable((UInt64)))),
-    f32 Array(LowCardinality(Nullable((Float32)))),
-    f64 Array(LowCardinality(Nullable((Float64)))),
+    i8 Array(LowCardinality(Nullable(Int8))),
+    i16 Array(LowCardinality(Nullable(Int16))),
+    i32 Array(LowCardinality(Nullable(Int32))),
+    i64 Array(LowCardinality(Nullable(Int64))),
+    u8 Array(LowCardinality(Nullable(UInt8))),
+    u16 Array(LowCardinality(Nullable(UInt16))),
+    u32 Array(LowCardinality(Nullable(UInt32))),
+    u64 Array(LowCardinality(Nullable(UInt64))),
+    f32 Array(LowCardinality(Nullable(Float32))),
+    f64 Array(LowCardinality(Nullable(Float64))),
 
-    date Array(LowCardinality(Nullable((Date)))),
+    date Array(LowCardinality(Nullable(Date))),
     date_time Array(LowCardinality(Nullable(DateTime('Europe/Moscow')))),
 
-    str Array(LowCardinality(Nullable((String)))),
+    str Array(LowCardinality(Nullable(String))),
     fixed_string Array(LowCardinality(Nullable(FixedString(5)))),
     INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string)
     TYPE bloom_filter GRANULARITY 1)
@@ -286,7 +286,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string
 DROP TABLE IF EXISTS bloom_filter_array_lc_null_types_test;
 
 DROP TABLE IF EXISTS bloom_filter_array_offsets_lc_str;
-CREATE TABLE bloom_filter_array_offsets_lc_str (order_key int, str Array(LowCardinality((String))), INDEX idx str TYPE bloom_filter(1.) GRANULARITY 1024) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 1024;
+CREATE TABLE bloom_filter_array_offsets_lc_str (order_key int, str Array(LowCardinality(String)), INDEX idx str TYPE bloom_filter(1.) GRANULARITY 1024) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 1024;
 INSERT INTO bloom_filter_array_offsets_lc_str SELECT number AS i, if(i%2, ['value'], []) FROM system.numbers LIMIT 10000;
 SELECT count() FROM bloom_filter_array_offsets_lc_str WHERE has(str, 'value');
 DROP TABLE IF EXISTS bloom_filter_array_offsets_lc_str;
diff --git a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql
index 9a554ead776..596e90adfd6 100644
--- a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql
+++ b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql
@@ -1,7 +1,7 @@
 DROP TABLE IF EXISTS lc_nullable;
 
 CREATE TABLE lc_nullable (
-    order_key   Array(LowCardinality(Nullable((UInt64)))),
+    order_key   Array(LowCardinality(Nullable(UInt64))),
 
     i8  Array(LowCardinality(Nullable(Int8))),
     i16 Array(LowCardinality(Nullable(Int16))),
@@ -14,10 +14,10 @@ CREATE TABLE lc_nullable (
     f32 Array(LowCardinality(Nullable(Float32))),
     f64 Array(LowCardinality(Nullable(Float64))),
 
-    date Array(LowCardinality(Nullable((Date)))),
+    date Array(LowCardinality(Nullable(Date))),
     date_time Array(LowCardinality(Nullable(DateTime('Europe/Moscow')))),
 
-    str Array(LowCardinality(Nullable((String)))),
+    str Array(LowCardinality(Nullable(String))),
     fixed_string Array(LowCardinality(Nullable(FixedString(5))))
 ) ENGINE = MergeTree() ORDER BY order_key;
 

From 579f8da573900dd51c87616a518dc10ad1c0f77d Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Thu, 28 Jan 2021 09:32:41 +0300
Subject: [PATCH 0283/1238] Added SSE-C support in S3 client.

---
 .../engines/table-engines/integrations/s3.md  |  4 +-
 .../mergetree-family/mergetree.md             |  4 +-
 src/Disks/S3/registerDiskS3.cpp               |  3 +
 src/IO/S3Common.cpp                           | 70 +++++++------------
 src/IO/S3Common.h                             | 17 +----
 src/Storages/StorageS3.cpp                    |  1 +
 src/Storages/StorageS3Settings.cpp            |  3 +-
 src/Storages/StorageS3Settings.h              |  1 +
 8 files changed, 37 insertions(+), 66 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index d8cceb4d511..5858a0803e6 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -136,8 +136,7 @@ The following settings can be specified in configuration file for given endpoint
 -   `access_key_id` and `secret_access_key` — Optional. Specifies credentials to use with given endpoint.
 -   `use_environment_credentials` — Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint.
 -   `header` — Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint.
-
-This configuration also applies to S3 disks in `MergeTree` table engine family.
+-   `server_side_encryption_customer_key_base64` — Optional. If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
 
 Example:
 
@@ -149,6 +148,7 @@ Example:
         <!-- <secret_access_key>SECRET_ACCESS_KEY</secret_access_key> -->
         <!-- <use_environment_credentials>false</use_environment_credentials> -->
         <!-- <header>Authorization: Bearer SOME-TOKEN</header> -->
+        <!-- <server_side_encryption_customer_key_base64>BASE64-ENCODED-KEY</server_side_encryption_customer_key_base64> -->
     </endpoint-name>
 </s3>
 ```
diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 084d05ec0a0..2626cde1cdc 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -671,6 +671,7 @@ Configuration markup:
             <endpoint>https://storage.yandexcloud.net/my-bucket/root-path/</endpoint>
             <access_key_id>your_access_key_id</access_key_id>
             <secret_access_key>your_secret_access_key</secret_access_key>
+            <server_side_encryption_customer_key_base64>your_base64_encoded_customer_key</server_side_encryption_customer_key_base64>
             <proxy>
                 <uri>http://proxy1</uri>
                 <uri>http://proxy2</uri>
@@ -706,7 +707,8 @@ Optional parameters:
 -   `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`. 
 -   `cache_enabled` — Allows to cache mark and index files on local FS. Default value is `true`. 
 -   `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`. 
--   `skip_access_check` — If true disk access checks will not be performed on disk start-up. Default value is `false`.
+-   `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
+-   `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
 
 
 S3 disk can be configured as `main` or `cold` storage:
diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp
index f9eddebdf88..1878d2f8ead 100644
--- a/src/Disks/S3/registerDiskS3.cpp
+++ b/src/Disks/S3/registerDiskS3.cpp
@@ -7,6 +7,7 @@
 #include "DiskS3.h"
 #include "Disks/DiskCacheWrapper.h"
 #include "Disks/DiskFactory.h"
+#include "Storages/StorageS3Settings.h"
 #include "ProxyConfiguration.h"
 #include "ProxyListConfiguration.h"
 #include "ProxyResolverConfiguration.h"
@@ -137,6 +138,8 @@ void registerDiskS3(DiskFactory & factory)
             uri.is_virtual_hosted_style,
             config.getString(config_prefix + ".access_key_id", ""),
             config.getString(config_prefix + ".secret_access_key", ""),
+            config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""),
+            {},
             config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", false))
         );
 
diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp
index fbcd4ed97f1..f9962735ddc 100644
--- a/src/IO/S3Common.cpp
+++ b/src/IO/S3Common.cpp
@@ -13,6 +13,7 @@
 #    include <aws/core/platform/Environment.h>
 #    include <aws/core/utils/logging/LogMacros.h>
 #    include <aws/core/utils/logging/LogSystemInterface.h>
+#    include <aws/core/utils/HashingUtils.h>
 #    include <aws/s3/S3Client.h>
 #    include <aws/core/http/HttpClientFactory.h>
 #    include <IO/S3/PocoHTTPClientFactory.h>
@@ -273,56 +274,12 @@ namespace S3
         return ret;
     }
 
-    /// This method is not static because it requires ClientFactory to be initialized.
-    std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
-        const String & endpoint,
-        bool is_virtual_hosted_style,
-        const String & access_key_id,
-        const String & secret_access_key,
-        bool use_environment_credentials,
-        const RemoteHostFilter & remote_host_filter,
-        unsigned int s3_max_redirects)
-    {
-        PocoHTTPClientConfiguration client_configuration(remote_host_filter, s3_max_redirects);
-
-        if (!endpoint.empty())
-            client_configuration.endpointOverride = endpoint;
-
-        return create(client_configuration,
-            is_virtual_hosted_style,
-            access_key_id,
-            secret_access_key,
-            use_environment_credentials);
-    }
-
-    std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
-        const PocoHTTPClientConfiguration & cfg_,
-        bool is_virtual_hosted_style,
-        const String & access_key_id,
-        const String & secret_access_key,
-        bool use_environment_credentials)
-    {
-        Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
-
-        PocoHTTPClientConfiguration client_configuration = cfg_;
-        client_configuration.updateSchemeAndRegion();
-
-        return std::make_shared<Aws::S3::S3Client>(
-            std::make_shared<S3CredentialsProviderChain>(
-                client_configuration,
-                credentials,
-                use_environment_credentials), // AWS credentials provider.
-            std::move(client_configuration), // Client configuration.
-            Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, // Sign policy.
-            is_virtual_hosted_style || client_configuration.endpointOverride.empty() // Use virtual addressing if endpoint is not specified.
-        );
-    }
-
     std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
         const PocoHTTPClientConfiguration & cfg_,
         bool is_virtual_hosted_style,
         const String & access_key_id,
         const String & secret_access_key,
+        const String & server_side_encryption_customer_key_base64,
         HeaderCollection headers,
         bool use_environment_credentials)
     {
@@ -331,7 +288,28 @@ namespace S3
 
         Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
 
-        auto auth_signer = std::make_shared<S3AuthSigner>(client_configuration, std::move(credentials), std::move(headers), use_environment_credentials);
+        if (!server_side_encryption_customer_key_base64.empty())
+        {
+            /// See S3Client::GeneratePresignedUrlWithSSEC().
+
+            headers.push_back({Aws::S3::SSEHeaders::SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM,
+                Aws::S3::Model::ServerSideEncryptionMapper::GetNameForServerSideEncryption(Aws::S3::Model::ServerSideEncryption::AES256)});
+
+            headers.push_back({Aws::S3::SSEHeaders::SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY,
+                server_side_encryption_customer_key_base64});
+
+            Aws::Utils::ByteBuffer buffer = Aws::Utils::HashingUtils::Base64Decode(server_side_encryption_customer_key_base64);
+            String str_buffer(reinterpret_cast<char *>(buffer.GetUnderlyingData()), buffer.GetLength());
+            headers.push_back({Aws::S3::SSEHeaders::SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5,
+                Aws::Utils::HashingUtils::Base64Encode(Aws::Utils::HashingUtils::CalculateMD5(str_buffer))});
+        }
+
+        auto auth_signer = std::make_shared<S3AuthSigner>(
+            client_configuration,
+            std::move(credentials),
+            std::move(headers),
+            use_environment_credentials);
+
         return std::make_shared<Aws::S3::S3Client>(
             std::move(auth_signer),
             std::move(client_configuration), // Client configuration.
diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h
index c367444395d..b071daefee1 100644
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@@ -31,27 +31,12 @@ public:
 
     static ClientFactory & instance();
 
-    std::shared_ptr<Aws::S3::S3Client> create(
-        const String & endpoint,
-        bool is_virtual_hosted_style,
-        const String & access_key_id,
-        const String & secret_access_key,
-        bool use_environment_credentials,
-        const RemoteHostFilter & remote_host_filter,
-        unsigned int s3_max_redirects);
-
-    std::shared_ptr<Aws::S3::S3Client> create(
-        const PocoHTTPClientConfiguration & cfg,
-        bool is_virtual_hosted_style,
-        const String & access_key_id,
-        const String & secret_access_key,
-        bool use_environment_credentials);
-
     std::shared_ptr<Aws::S3::S3Client> create(
         const PocoHTTPClientConfiguration & cfg,
         bool is_virtual_hosted_style,
         const String & access_key_id,
         const String & secret_access_key,
+        const String & server_side_encryption_customer_key_base64,
         HeaderCollection headers,
         bool use_environment_credentials);
 
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 0af115dc0b5..ec83103ae41 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -234,6 +234,7 @@ StorageS3::StorageS3(
         uri_.is_virtual_hosted_style,
         credentials.GetAWSAccessKeyId(),
         credentials.GetAWSSecretKey(),
+        settings.server_side_encryption_customer_key_base64,
         std::move(settings.headers),
         settings.use_environment_credentials.value_or(global_context.getConfigRef().getBool("s3.use_environment_credentials", false))
     );
diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp
index 54384ac8253..6d97e6fae95 100644
--- a/src/Storages/StorageS3Settings.cpp
+++ b/src/Storages/StorageS3Settings.cpp
@@ -30,6 +30,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U
             auto endpoint = config.getString(config_elem + "." + key + ".endpoint");
             auto access_key_id = config.getString(config_elem + "." + key + ".access_key_id", "");
             auto secret_access_key = config.getString(config_elem + "." + key + ".secret_access_key", "");
+            auto server_side_encryption_customer_key_base64 = config.getString(config_elem + "." + key + ".server_side_encryption_customer_key_base64", "");
             std::optional<bool> use_environment_credentials;
             if (config.has(config_elem + "." + key + ".use_environment_credentials"))
             {
@@ -51,7 +52,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U
                 }
             }
 
-            settings.emplace(endpoint, S3AuthSettings{std::move(access_key_id), std::move(secret_access_key), std::move(headers), use_environment_credentials});
+            settings.emplace(endpoint, S3AuthSettings{std::move(access_key_id), std::move(secret_access_key), std::move(server_side_encryption_customer_key_base64), std::move(headers), use_environment_credentials});
         }
     }
 }
diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h
index 88f964774c6..59b98ebdfdd 100644
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@@ -27,6 +27,7 @@ struct S3AuthSettings
 {
     const String access_key_id;
     const String secret_access_key;
+    const String server_side_encryption_customer_key_base64;
 
     const HeaderCollection headers;
 

From 355c99568e360d891bc657f0bd81e2f531a1ec3f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 28 Jan 2021 10:16:36 +0300
Subject: [PATCH 0284/1238] Fix error

---
 src/Common/StringSearcher.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h
index fb731bfcda3..c6f0cff71f1 100644
--- a/src/Common/StringSearcher.h
+++ b/src/Common/StringSearcher.h
@@ -103,8 +103,11 @@ public:
             /// Invalid UTF-8
             if (first_u32 < 0)
             {
-                l = needle[0];
-                u = needle[0];
+                /// Process it verbatim as a sequence of bytes.
+                size_t src_len = UTF8::seqLength(*needle);
+
+                memcpy(l_seq, needle, src_len);
+                memcpy(u_seq, needle, src_len);
             }
             else
             {
@@ -113,10 +116,11 @@ public:
 
                 /// lower and uppercase variants of the first octet of the first character in `needle`
                 UTF8::convert(first_l_u32, l_seq, sizeof(l_seq));
-                l = l_seq[0];
                 UTF8::convert(first_u_u32, u_seq, sizeof(u_seq));
-                u = u_seq[0];
             }
+
+            l = l_seq[0];
+            u = u_seq[0];
         }
 
 #ifdef __SSE4_1__

From 94430d917376c4df1af0fcf34e6282f4e97decb8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 28 Jan 2021 10:21:54 +0300
Subject: [PATCH 0285/1238] Make Fuzzer more reliable

---
 docker/test/fuzzer/run-fuzzer.sh | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index 20cdc5bf10c..a1f84cdbbba 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -21,13 +21,16 @@ function clone
 
     git init
     git remote add origin https://github.com/ClickHouse/ClickHouse
-    git fetch --depth=100 origin "$SHA_TO_TEST"
-    git fetch --depth=100 origin master # Used to obtain the list of modified or added tests
+
+    # Network is unreliable. GitHub neither.
+    for _ in {1..100}; do git fetch --depth=100 origin "$SHA_TO_TEST" && break; sleep 1; done
+    # Used to obtain the list of modified or added tests
+    for _ in {1..100}; do git fetch --depth=100 origin master && break; sleep 1; done
 
     # If not master, try to fetch pull/.../{head,merge}
     if [ "$PR_TO_TEST" != "0" ]
     then
-        git fetch --depth=100 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*"
+        for _ in {1..100}; do git fetch --depth=100 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*" && break; sleep 1; done
     fi
 
     git checkout "$SHA_TO_TEST"

From 4c0c2d03fbd4a34bfd3bd5ef6269be0a1d070bc5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 28 Jan 2021 10:21:54 +0300
Subject: [PATCH 0286/1238] Make Fuzzer more reliable

---
 docker/test/fuzzer/run-fuzzer.sh | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index b036f99e91d..9af401238a3 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -21,13 +21,16 @@ function clone
 
     git init
     git remote add origin https://github.com/ClickHouse/ClickHouse
-    git fetch --depth=100 origin "$SHA_TO_TEST"
-    git fetch --depth=100 origin master # Used to obtain the list of modified or added tests
+
+    # Network is unreliable. GitHub neither.
+    for _ in {1..100}; do git fetch --depth=100 origin "$SHA_TO_TEST" && break; sleep 1; done
+    # Used to obtain the list of modified or added tests
+    for _ in {1..100}; do git fetch --depth=100 origin master && break; sleep 1; done
 
     # If not master, try to fetch pull/.../{head,merge}
     if [ "$PR_TO_TEST" != "0" ]
     then
-        git fetch --depth=100 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*"
+        for _ in {1..100}; do git fetch --depth=100 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*" && break; sleep 1; done
     fi
 
     git checkout "$SHA_TO_TEST"

From 402e031a22682d0da294f53746c3c1259e0d8607 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 28 Jan 2021 11:07:18 +0300
Subject: [PATCH 0287/1238] Throw exception only in debug mode

---
 src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
index fc762b21046..c9175ad14db 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
@@ -195,7 +195,11 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible(
         if (!storage.queue.remove(zookeeper, part_name))
         {
             /// The part was not in our queue. Why did it happen?
+#ifdef NDEBUG
+            LOG_ERROR(log, "Missing part {} is not in our queue.", part_name);
+#else
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing part {} is not in our queue.", part_name);
+#endif
         }
 
         /** This situation is possible if on all the replicas where the part was, it deteriorated.

From b9b573976e465670990c041ec303b59df1ee3919 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 28 Jan 2021 11:26:10 +0300
Subject: [PATCH 0288/1238] Minor code improvement in JOIN

---
 src/Interpreters/HashJoin.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index ac2429334e4..ad1a37c2703 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -688,7 +688,7 @@ public:
         if constexpr (has_defaults)
             applyLazyDefaults();
 
-        for (size_t j = 0; j < right_indexes.size(); ++j)
+        for (size_t j = 0, size = right_indexes.size(); j < size; ++j)
             columns[j]->insertFrom(*block.getByPosition(right_indexes[j]).column, row_num);
     }
 
@@ -701,7 +701,7 @@ public:
     {
         if (lazy_defaults_count)
         {
-            for (size_t j = 0; j < right_indexes.size(); ++j)
+            for (size_t j = 0, size = right_indexes.size(); j < size; ++j)
                 JoinCommon::addDefaultValues(*columns[j], type_name[j].first, lazy_defaults_count);
             lazy_defaults_count = 0;
         }

From 18f1fd0caad0af4bb3d38274664babb01bf284fc Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 28 Jan 2021 11:40:12 +0300
Subject: [PATCH 0289/1238] Make integration odbc tests idempotent

---
 tests/integration/test_odbc_interaction/test.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py
index 0ec89be9413..084fc407f39 100644
--- a/tests/integration/test_odbc_interaction/test.py
+++ b/tests/integration/test_odbc_interaction/test.py
@@ -262,18 +262,20 @@ def test_sqlite_odbc_cached_dictionary(started_cluster):
     assert_eq_with_retry(node1, "select dictGetUInt8('sqlite3_odbc_cached', 'Z', toUInt64(1))", "12")
 
 
-def test_postgres_odbc_hached_dictionary_with_schema(started_cluster):
+def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster):
     conn = get_postgres_conn()
     cursor = conn.cursor()
+    cursor.execute("truncate table clickhouse.test_table")
     cursor.execute("insert into clickhouse.test_table values(1, 'hello'),(2, 'world')")
     node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed")
     assert_eq_with_retry(node1, "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(1))", "hello")
     assert_eq_with_retry(node1, "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(2))", "world")
 
 
-def test_postgres_odbc_hached_dictionary_no_tty_pipe_overflow(started_cluster):
+def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster):
     conn = get_postgres_conn()
     cursor = conn.cursor()
+    cursor.execute("truncate table clickhouse.test_table")
     cursor.execute("insert into clickhouse.test_table values(3, 'xxx')")
     for i in range(100):
         try:

From 879adc5437754c677abd76915de485a973298037 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 28 Jan 2021 11:55:20 +0300
Subject: [PATCH 0290/1238] Add new line to status

---
 tests/clickhouse-test | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index e168f9372de..b2d97679b47 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -434,6 +434,9 @@ def run_tests_array(all_tests_with_params):
                                 if os.path.exists(stderr_file):
                                     os.remove(stderr_file)
 
+            if status and not status.endswith('\n'):
+                status += '\n'
+
             sys.stdout.write(status)
             sys.stdout.flush()
         except KeyboardInterrupt as e:

From 895600f67612d6db21a7aba53920e9827d93773c Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 28 Jan 2021 11:57:12 +0300
Subject: [PATCH 0291/1238] Add lsof to fasttest

---
 docker/test/fasttest/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile
index ac22a9dfaf0..e508c217e25 100644
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@@ -61,6 +61,7 @@ RUN apt-get update \
         software-properties-common \
         tzdata \
         unixodbc \
+        lsof \
        --yes --no-install-recommends
 
 RUN pip3 install numpy scipy pandas

From e1a138cda31678263848ae3e3e6bada0b715c245 Mon Sep 17 00:00:00 2001
From: tison <wander4096@gmail.com>
Date: Thu, 28 Jan 2021 16:59:42 +0800
Subject: [PATCH 0292/1238] correct style zh doc typo

---
 docs/zh/development/style.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md
index 8f104e3a7d8..c8e883920dd 100644
--- a/docs/zh/development/style.md
+++ b/docs/zh/development/style.md
@@ -118,7 +118,7 @@ for (auto & stream : streams)
     stream.second->finalize();
 ```
 
-**18.** 行的某尾不应该包含空格。
+**18.** 行的末尾不应该包含空格。
 
 **19.** 源文件应该用 UTF-8 编码。
 

From 670d054cf3b2e161e9a79525bcae13d8f160c353 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 28 Jan 2021 12:08:48 +0300
Subject: [PATCH 0293/1238] Remove redundant lsof

---
 docker/test/fasttest/Dockerfile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile
index 46c0173c226..03b7b2fc53a 100644
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@@ -62,7 +62,6 @@ RUN apt-get update \
         software-properties-common \
         tzdata \
         unixodbc \
-        lsof \
        --yes --no-install-recommends
 
 RUN pip3 install numpy scipy pandas

From b5d2611aeae5972224790ed6732f52a595d9b219 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Thu, 28 Jan 2021 12:36:17 +0300
Subject: [PATCH 0294/1238] Clickhouse client query param CTE added test

---
 .../01674_clickhouse_client_query_param_cte.reference      | 1 +
 .../0_stateless/01674_clickhouse_client_query_param_cte.sh | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 100644 tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.reference
 create mode 100755 tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.sh

diff --git a/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.reference b/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.reference
new file mode 100644
index 00000000000..9daeafb9864
--- /dev/null
+++ b/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.reference
@@ -0,0 +1 @@
+test
diff --git a/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.sh b/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.sh
new file mode 100755
index 00000000000..ee75f675eb3
--- /dev/null
+++ b/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT --param_paramName="test" -q "WITH subquery AS (SELECT {paramName:String}) SELECT * FROM subquery"

From e2ade3c57463e9e971bfddf9bee25b120e9a7312 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 28 Jan 2021 12:40:08 +0300
Subject: [PATCH 0295/1238] Check where and prewhere identifiers exist.

---
 src/Interpreters/ExpressionAnalyzer.cpp | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 77f3c9d7537..116c47eda55 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -844,7 +844,12 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
     step.required_output.push_back(prewhere_column_name);
     step.can_remove_required_output.push_back(true);
 
-    auto filter_type = (*step.actions()->getIndex().find(prewhere_column_name))->result_type;
+    const auto & index = step.actions()->getIndex();
+    auto it = index.find(prewhere_column_name);
+    if (it == index.end())
+        throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier: '{}'", prewhere_column_name);
+
+    auto filter_type = (*it)->result_type;
     if (!filter_type->canBeUsedInBooleanContext())
         throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(),
                         ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
@@ -944,7 +949,12 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
     step.required_output.push_back(where_column_name);
     step.can_remove_required_output = {true};
 
-    auto filter_type = (*step.actions()->getIndex().find(where_column_name))->result_type;
+    const auto & index = step.actions()->getIndex();
+    auto it = index.find(where_column_name);
+    if (it == index.end())
+        throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier: '{}'", where_column_name);
+
+    auto filter_type = (*it)->result_type;
     if (!filter_type->canBeUsedInBooleanContext())
         throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(),
                         ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);

From 3d0f878535b530620e061e750178ccbb0fc18585 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 28 Jan 2021 12:44:12 +0300
Subject: [PATCH 0296/1238] Added test.

---
 .../0_stateless/01674_where_prewhere_array_crash.reference   | 0
 .../queries/0_stateless/01674_where_prewhere_array_crash.sql | 5 +++++
 2 files changed, 5 insertions(+)
 create mode 100644 tests/queries/0_stateless/01674_where_prewhere_array_crash.reference
 create mode 100644 tests/queries/0_stateless/01674_where_prewhere_array_crash.sql

diff --git a/tests/queries/0_stateless/01674_where_prewhere_array_crash.reference b/tests/queries/0_stateless/01674_where_prewhere_array_crash.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01674_where_prewhere_array_crash.sql b/tests/queries/0_stateless/01674_where_prewhere_array_crash.sql
new file mode 100644
index 00000000000..d6eef000b36
--- /dev/null
+++ b/tests/queries/0_stateless/01674_where_prewhere_array_crash.sql
@@ -0,0 +1,5 @@
+drop table if exists tab;
+create table tab  (x UInt64, `arr.a` Array(UInt64), `arr.b` Array(UInt64)) engine = MergeTree order by x;
+select x from tab array join arr prewhere x != 0 where arr; -- { serverError 47; }
+select x from tab array join arr prewhere arr where x != 0; -- { serverError 47; }
+drop table if exists tab;

From f3dd1aadc37598babf558b2d4df56587c9349e25 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Thu, 28 Jan 2021 13:06:44 +0300
Subject: [PATCH 0297/1238] Syntax and links

---
 docs/en/operations/system-tables/quotas.md       | 16 ++++++++--------
 docs/en/sql-reference/statements/create/quota.md | 14 +++++++++-----
 docs/ru/sql-reference/statements/create/quota.md | 13 ++++++++-----
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/docs/en/operations/system-tables/quotas.md b/docs/en/operations/system-tables/quotas.md
index f4f52a4a131..3e797c9bdc6 100644
--- a/docs/en/operations/system-tables/quotas.md
+++ b/docs/en/operations/system-tables/quotas.md
@@ -7,16 +7,16 @@ Columns:
 - `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Quota ID.
 - `storage`([String](../../sql-reference/data-types/string.md)) — Storage of quotas. Possible value: “users.xml” if a quota configured in the users.xml file, “disk” if a quota configured by an SQL-query.
 - `keys` ([Array](../../sql-reference/data-types/array.md)([Enum8](../../sql-reference/data-types/enum.md))) — Key specifies how the quota should be shared. If two connections use the same quota and key, they share the same amounts of resources. Values:
-- `[]` — All users share the same quota.
-- `['user_name']` — Connections with the same user name share the same quota.
-- `['ip_address']` — Connections from the same IP share the same quota.
-- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
-- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`.
-- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`.
+    - `[]` — All users share the same quota.
+    - `['user_name']` — Connections with the same user name share the same quota.
+    - `['ip_address']` — Connections from the same IP share the same quota.
+    - `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
+    - `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`.
+    - `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`.
 - `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds.
 - `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values:
-- `0` — The quota applies to users specify in the `apply_to_list`.
-- `1` — The quota applies to all users except those listed in `apply_to_except`.
+    - `0` — The quota applies to users specify in the `apply_to_list`.
+    - `1` — The quota applies to all users except those listed in `apply_to_except`.
 - `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/[roles](../../operations/access-rights.md#role-management) that the quota should be applied to.
 - `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/roles that the quota should not apply to.
 
diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md
index 8f3b89790e4..ec980af921f 100644
--- a/docs/en/sql-reference/statements/create/quota.md
+++ b/docs/en/sql-reference/statements/create/quota.md
@@ -11,13 +11,17 @@ Syntax:
 
 ``` sql
 CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
-    [KEYED BY {USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
-    [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
-        {MAX { {QUERIES | ERRORS | RESULT_ROWS | RESULT_BYTES | READ_ROWS | READ_BYTES | EXECUTION_TIME} = number } [,...] |
+    [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
+    [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
+        {MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
          NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
 
+Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
+
+Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
+
 `ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
 
 **Examples** 
@@ -25,11 +29,11 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
 Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
 
 ``` sql
-CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER;
+CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
 ```
 
 For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
 
 ``` sql
-CREATE QUOTA qB FOR INTERVAL 30 MINUTE MAX EXECUTION_TIME = 0.5, FOR INTERVAL 5 QUATER MAX QUERIES = 321, ERRORS = 10 TO default;
+CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
 ```
diff --git a/docs/ru/sql-reference/statements/create/quota.md b/docs/ru/sql-reference/statements/create/quota.md
index 8ae3cc45ee1..65762071ea2 100644
--- a/docs/ru/sql-reference/statements/create/quota.md
+++ b/docs/ru/sql-reference/statements/create/quota.md
@@ -11,12 +11,15 @@ toc_title: "\u041a\u0432\u043e\u0442\u0430"
 
 ``` sql
 CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
-    [KEYED BY {USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
-    [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
-        {MAX { {QUERIES | ERRORS | RESULT_ROWS | RESULT_BYTES | READ_ROWS | READ_BYTES | EXECUTION_TIME} = number } [,...] |
+    [KEYED BY {user_name | ip_address | client_key | client_key, user_name | client_key, ip_address} | NOT KEYED]
+    [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
+        {MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
          NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
+Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
+
+Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
 
 В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
 
@@ -25,13 +28,13 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
 Ограничить максимальное количество запросов для текущего пользователя — не более 123 запросов за каждые 15 месяцев:
 
 ``` sql
-CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER;
+CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
 ```
 
 Ограничить по умолчанию максимальное время выполнения запроса — не более полсекунды за каждые 30 минут, а также максимальное число запросов — не более 321 и максимальное число ошибок — не более 10 за каждые 5 кварталов:
 
 ``` sql
-CREATE QUOTA qB FOR INTERVAL 30 MINUTE MAX EXECUTION_TIME = 0.5, FOR INTERVAL 5 QUATER MAX QUERIES = 321, ERRORS = 10 TO default;
+CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
 ```
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/quota) 

From 09e0fa45d297a594d34a43073649dfddf9f9d0f2 Mon Sep 17 00:00:00 2001
From: emhlbmc <lunatictwo@163.com>
Date: Thu, 28 Jan 2021 18:13:31 +0800
Subject: [PATCH 0298/1238] Update mergetree.md

---
 docs/zh/engines/table-engines/mergetree-family/mergetree.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md
index 2fffcbe7ef3..353dd5f5bc8 100644
--- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md
@@ -401,7 +401,7 @@ TTL date_time + INTERVAL 15 HOUR
 
 ### 列 TTL {#mergetree-column-ttl}
 
-当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期，则ClickHouse 会从文件系统中的数据片段中此列。
+当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期，则ClickHouse 会从文件系统中的数据片段中删除此列。
 
 `TTL`子句不能被用于主键字段。
 

From 0749d20712707c9431015aad0e0092efc262b677 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Thu, 28 Jan 2021 13:17:50 +0300
Subject: [PATCH 0299/1238] Same for ALTER

---
 docs/en/sql-reference/statements/alter/quota.md | 15 ++++++++++-----
 docs/ru/sql-reference/statements/alter/quota.md | 16 +++++++++++-----
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/docs/en/sql-reference/statements/alter/quota.md b/docs/en/sql-reference/statements/alter/quota.md
index 18083e4a523..905c57503fc 100644
--- a/docs/en/sql-reference/statements/alter/quota.md
+++ b/docs/en/sql-reference/statements/alter/quota.md
@@ -12,23 +12,28 @@ Syntax:
 ``` sql
 ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
     [RENAME TO new_name]
-    [KEYED BY {USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
-    [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
-        {MAX { {QUERIES | ERRORS | RESULT_ROWS | RESULT_BYTES | READ_ROWS | READ_BYTES | EXECUTION_TIME} = number } [,...] |
+    [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
+    [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
+        {MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
         NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
+Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
+
+Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
+
+`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
 
 **Examples**
 
 Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
 
 ``` sql
-ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER;
+ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
 ```
 
 For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
 
 ``` sql
-ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 MINUTE MAX EXECUTION_TIME = 0.5, FOR INTERVAL 5 QUATER MAX QUERIES = 321, ERRORS = 10 TO default;
+ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
 ```
diff --git a/docs/ru/sql-reference/statements/alter/quota.md b/docs/ru/sql-reference/statements/alter/quota.md
index 1abb6336321..0bdac1381da 100644
--- a/docs/ru/sql-reference/statements/alter/quota.md
+++ b/docs/ru/sql-reference/statements/alter/quota.md
@@ -12,25 +12,31 @@ toc_title: QUOTA
 ``` sql
 ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
     [RENAME TO new_name]
-    [KEYED BY {USER_NAME | IP_ADDRESS | CLIENT_KEY | CLIENT_KEY, USER_NAME | CLIENT_KEY, IP_ADDRESS} | NOT KEYED]
-    [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
-        {MAX { {QUERIES | ERRORS | RESULT_ROWS | RESULT_BYTES | READ_ROWS | READ_BYTES | EXECUTION_TIME} = number } [,...] |
+    [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
+    [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
+        {MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
         NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
 
+Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
+
+Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
+
+В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
+
 **Примеры**
 
 Ограничить для текущего пользователя максимальное число запросов — не более 123 запросов за каждые 15 месяцев:
 
 ``` sql
-ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER;
+ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
 ```
 
 Ограничить по умолчанию максимальное время выполнения запроса — не более полсекунды за каждые 30 минут, а также максимальное число запросов — не более 321 и максимальное число ошибок — не более 10 за каждые 5 кварталов:
 
 ``` sql
-ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 MINUTE MAX EXECUTION_TIME = 0.5, FOR INTERVAL 5 QUATER MAX QUERIES = 321, ERRORS = 10 TO default;
+ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
 ```
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/alter/quota/) <!--hide-->

From e1765e7f882dca74c226448c5fa6fd0194f4054c Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 28 Jan 2021 14:00:24 +0300
Subject: [PATCH 0300/1238] Add method trivial to ActionsDAG

---
 src/Interpreters/ActionsDAG.cpp                            | 4 ++--
 src/Interpreters/ActionsDAG.h                              | 2 +-
 src/Processors/QueryPlan/Optimizations/Optimizations.h     | 2 +-
 src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp | 4 ++--
 src/Processors/QueryPlan/Optimizations/splitFilter.cpp     | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 4c3a4cbe0fa..498141821ed 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -609,10 +609,10 @@ bool ActionsDAG::hasStatefulFunctions() const
     return false;
 }
 
-bool ActionsDAG::empty() const
+bool ActionsDAG::trivial() const
 {
     for (const auto & node : nodes)
-        if (node.type != ActionType::INPUT)
+        if (node.type == ActionType::FUNCTION || node.type == ActionType::ARRAY_JOIN)
             return false;
 
     return true;
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index c82496b2a8a..b12da30e24f 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -223,7 +223,7 @@ public:
 
     bool hasArrayJoin() const;
     bool hasStatefulFunctions() const;
-    bool empty() const; /// If actions only contain inputs.
+    bool trivial() const; /// If actions has no functions or array join.
 
     const ActionsSettings & getSettings() const { return settings; }
 
diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h
index 37b32d6a095..d6e9d345a23 100644
--- a/src/Processors/QueryPlan/Optimizations/Optimizations.h
+++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h
@@ -45,7 +45,7 @@ inline const auto & getOptimizations()
 {
     static const std::array<Optimization, 4> optimizations =
     {{
-        {tryLiftUpArrayJoin, 2, 2},
+        {tryLiftUpArrayJoin, 2, 3},
         {tryPushDownLimit, 2, 2},
         {trySplitFilter, 1, 2},
         {tryMergeExpressions, 2, 1},
diff --git a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
index a45bf76202d..0dc022b14af 100644
--- a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
+++ b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
@@ -31,13 +31,13 @@ bool tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
     auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
 
     /// No actions can be moved before ARRAY JOIN.
-    if (split_actions.first->empty())
+    if (split_actions.first->trivial())
         return false;
 
     auto description = parent->getStepDescription();
 
     /// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
-    if (split_actions.second->empty())
+    if (split_actions.second->trivial())
     {
         auto expected_header = parent->getOutputStream().header;
 
diff --git a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
index fd82bd69a9e..09ce500ee54 100644
--- a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
+++ b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
@@ -21,7 +21,7 @@ bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
 
     auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
 
-    if (split.second->empty())
+    if (split.second->trivial())
         return false;
 
     if (filter_step->removesFilterColumn())

From 105ecef628b8273e92096e230c767328f85476eb Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Thu, 28 Jan 2021 14:11:34 +0300
Subject: [PATCH 0301/1238] LowCardinality UUID fix

---
 src/DataTypes/DataTypeLowCardinality.cpp             | 12 +++++++-----
 src/DataTypes/DataTypeUUID.h                         |  1 +
 .../00688_low_cardinality_syntax.reference           |  3 +++
 .../0_stateless/00688_low_cardinality_syntax.sql     |  9 +++++++++
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp
index a433d39c561..9614c150c7d 100644
--- a/src/DataTypes/DataTypeLowCardinality.cpp
+++ b/src/DataTypes/DataTypeLowCardinality.cpp
@@ -885,15 +885,17 @@ MutableColumnUniquePtr DataTypeLowCardinality::createColumnUniqueImpl(const IDat
     if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(&keys_type))
         type = nullable_type->getNestedType().get();
 
-    if (isString(type))
+    WhichDataType which(type);
+
+    if (which.isString())
         return creator(static_cast<ColumnString *>(nullptr));
-    if (isFixedString(type))
+    else if (which.isFixedString())
         return creator(static_cast<ColumnFixedString *>(nullptr));
-    if (typeid_cast<const DataTypeDate *>(type))
+    else if (which.isDate())
         return creator(static_cast<ColumnVector<UInt16> *>(nullptr));
-    if (typeid_cast<const DataTypeDateTime *>(type))
+    else if (which.isDateTime())
         return creator(static_cast<ColumnVector<UInt32> *>(nullptr));
-    if (isColumnedAsNumber(type))
+    else if (which.isInt() || which.isUInt() || which.isFloat())
     {
         MutableColumnUniquePtr column;
         TypeListNativeNumbers::forEach(CreateColumnVector(column, *type, creator));
diff --git a/src/DataTypes/DataTypeUUID.h b/src/DataTypes/DataTypeUUID.h
index e9f1d22325b..6290d05cc3b 100644
--- a/src/DataTypes/DataTypeUUID.h
+++ b/src/DataTypes/DataTypeUUID.h
@@ -31,6 +31,7 @@ public:
 
     bool canBeUsedInBitOperations() const override { return true; }
     bool canBeInsideNullable() const override { return true; }
+    bool canBeInsideLowCardinality() const override { return false; }
 
     bool canBePromoted() const override { return false; }
 };
diff --git a/tests/queries/0_stateless/00688_low_cardinality_syntax.reference b/tests/queries/0_stateless/00688_low_cardinality_syntax.reference
index 035402c889d..ca27069a7df 100644
--- a/tests/queries/0_stateless/00688_low_cardinality_syntax.reference
+++ b/tests/queries/0_stateless/00688_low_cardinality_syntax.reference
@@ -18,3 +18,6 @@ c
 d
 cb
 db
+-
+61f0c404-5cb3-11e7-907b-a6006ad3dba0	61f0c404-5cb3-11e7-907b-a6006ad3dba0	61f0c404-5cb3-11e7-907b-a6006ad3dba0
+\N	\N	\N
diff --git a/tests/queries/0_stateless/00688_low_cardinality_syntax.sql b/tests/queries/0_stateless/00688_low_cardinality_syntax.sql
index 98d7b7f5f8a..3ca7b482b84 100644
--- a/tests/queries/0_stateless/00688_low_cardinality_syntax.sql
+++ b/tests/queries/0_stateless/00688_low_cardinality_syntax.sql
@@ -71,3 +71,12 @@ select (toLowCardinality('a') as val) || 'b' group by val;
 select toLowCardinality(z) as val from (select arrayJoin(['c', 'd']) as z) group by val;
 select (toLowCardinality(z) as val) || 'b'  from (select arrayJoin(['c', 'd']) as z) group by val;
 
+select '-';
+drop table if exists lc_str_uuid;
+create table lc_str_uuid(str1 String, str2 LowCardinality(String), str3 StringWithDictionary) ENGINE=Memory;
+select toUUID(str1), toUUID(str2), toUUID(str3) from lc_str_uuid;
+select toUUID(str1, '', NULL), toUUID(str2, '', NULL), toUUID(str3, '', NULL) from lc_str_uuid;
+insert into lc_str_uuid values ('61f0c404-5cb3-11e7-907b-a6006ad3dba0', '61f0c404-5cb3-11e7-907b-a6006ad3dba0', '61f0c404-5cb3-11e7-907b-a6006ad3dba0');
+select toUUID(str1), toUUID(str2), toUUID(str3) from lc_str_uuid;
+select toUUID(str1, '', NULL), toUUID(str2, '', NULL), toUUID(str3, '', NULL) from lc_str_uuid;
+drop table if exists lc_str_uuid;

From 5d774c0cd90c8f872406841fb6a152237bc4b2f2 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Thu, 28 Jan 2021 19:13:32 +0800
Subject: [PATCH 0302/1238] find method to get user_files_path

---
 .../01658_read_file_to_stringcolumn.reference        | 12 ++++++++++++
 .../0_stateless/01658_read_file_to_stringcolumn.sh   |  9 +++------
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
index eb5f1795f18..a22076de920 100644
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
@@ -1,3 +1,15 @@
+aaaaaaaaa	bbbbbbbbb
+:0
+:0
+:0
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+ccccccccc	aaaaaaaaa	bbbbbbbbb
+:0
+:107
+:79
+:35
+:35
+:35
 699415
 aaaaaaaaa	bbbbbbbbb
 ccccccccc	aaaaaaaaa	bbbbbbbbb
diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index cc8ed3f7294..6d0f6178cba 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -6,9 +6,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 # Data preparation.
-# When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple
-user_files_path=$(clickhouse-client --query "select data_path from system.databases where name='default'" | sed -En 's/data\/default/user_files/p')
-#user_files_path=$(grep user_files_path ${CLICKHOUSE_CONFIG} | awk '{match($0,"<user_files_path>(.*)</user_files_path>",path); print path[1]}')
+# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as:
+# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')"
+user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | awk '{match($0,"File (.*)/nonexist.txt",path); print path[1]}')
 mkdir -p ${user_files_path}/
 echo -n aaaaaaaaa > ${user_files_path}/a.txt
 echo -n bbbbbbbbb > ${user_files_path}/b.txt
@@ -16,8 +16,6 @@ echo -n ccccccccc > ${user_files_path}/c.txt
 echo -n ccccccccc > /tmp/c.txt
 mkdir -p ${user_files_path}/dir
 
-# Skip the client test part, for being unable to get the correct user_files_path
-if false; then
 
 ### 1st TEST in CLIENT mode.
 ${CLICKHOUSE_CLIENT} --query "drop table if exists data;"
@@ -43,7 +41,6 @@ echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('${user_fil
 echo "clickhouse-client --query "'"select file('"'${user_files_path}/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null
 echo "clickhouse-client --query "'"select file('"'../../../../a.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null
 
-fi
 
 ### 2nd TEST in LOCAL mode.
 

From 4ef56a41cb71ab0ae07dbdec50711cd8841023db Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 28 Jan 2021 14:15:45 +0300
Subject: [PATCH 0303/1238] Simplify optimizeTree

---
 .../QueryPlan/Optimizations/optimizeTree.cpp  | 78 +++++--------------
 1 file changed, 20 insertions(+), 58 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
index 962452b24af..da031378e0c 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -11,18 +11,13 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
     struct Frame
     {
         QueryPlan::Node * node;
-        Frame * parent = nullptr;
 
-        /// If not zero, traverse only traverse_depth_limit layers of tree (if no other optimizations happen).
+        /// If not zero, traverse only depth_limit layers of tree (if no other optimizations happen).
         /// Otherwise, traverse all children.
-        size_t traverse_depth_limit = 0;
+        size_t depth_limit = 0;
 
         /// Next child to process.
         size_t next_child = 0;
-
-        /// If not zero, optimizations to current node again.
-        /// Skip optimizations which read less then `read_depth_limit` layers of tree.
-        size_t read_depth_limit = 0;
     };
 
     std::stack<Frame> stack;
@@ -34,7 +29,7 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
 
         /// If traverse_depth_limit == 0, then traverse without limit (first entrance)
         /// If traverse_depth_limit > 1, then traverse with (limit - 1)
-        if (frame.traverse_depth_limit != 1)
+        if (frame.depth_limit != 1)
         {
             /// Traverse all children first.
             if (frame.next_child < frame.node->children.size())
@@ -42,8 +37,7 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
                 stack.push(Frame
                 {
                        .node = frame.node->children[frame.next_child],
-                       .parent = &frame,
-                       .traverse_depth_limit = frame.traverse_depth_limit ? (frame.traverse_depth_limit - 1) : 0,
+                       .depth_limit = frame.depth_limit ? (frame.depth_limit - 1) : 0,
                 });
 
                 ++frame.next_child;
@@ -51,61 +45,29 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
             }
         }
 
-        /// If frame.traverse_depth_limit == 0, apply optimizations on first entrance.
-        /// If frame.read_depth_limit, then one of children was updated, and we may need to repeat some optimizations.
-        if (frame.traverse_depth_limit == 0 || frame.read_depth_limit)
+        size_t max_update_depth = 0;
+
+        /// Apply all optimizations.
+        for (const auto & optimization : optimizations)
         {
-            size_t max_update_depth = 0;
-
-            /// Apply all optimizations.
-            for (const auto & optimization : optimizations)
-            {
-                /// Just in case, skip optimization if it is not initialized.
-                if (!optimization.run)
-                    continue;
-
-                /// Skip optimization if read_depth_limit is applied.
-                if (frame.read_depth_limit && optimization.read_depth <= frame.read_depth_limit)
-                    continue;
-
-                /// Try to apply optimization.
-                if (optimization.run(frame.node, nodes))
-                    max_update_depth = std::max<size_t>(max_update_depth, optimization.update_depth);
-            }
-
-            /// Nothing was applied.
-            if (max_update_depth == 0)
-            {
-                stack.pop();
+            /// Just in case, skip optimization if it is not initialized.
+            if (!optimization.run)
                 continue;
-            }
 
-            /// Traverse `max_update_depth` layers of tree again.
-            frame.traverse_depth_limit = max_update_depth;
+            /// Try to apply optimization.
+            if (optimization.run(frame.node, nodes))
+                max_update_depth = std::max<size_t>(max_update_depth, optimization.update_depth);
+        }
+
+        /// Traverse `max_update_depth` layers of tree again.
+        if (max_update_depth)
+        {
+            frame.depth_limit = max_update_depth;
             frame.next_child = 0;
-
-            /// Also go to parents and tell them to apply some optimizations again.
-            /// Check: for our parent we set read_depth_limit = 1, which means it can skip optimizations
-            ///        which use ony 1 layer of tree (not read current node).
-            /// Note that frame.read_depth_limit will be zeroed.
-            Frame * cur_frame = &frame;
-            for (size_t cur_depth = 0; cur_frame && cur_frame->traverse_depth_limit; ++cur_depth)
-            {
-                /// If cur_frame is traversed first time, all optimizations will apply anyway.
-                if (cur_frame->traverse_depth_limit == 0)
-                    break;
-
-                /// Stop if limit is applied and stricter then current.
-                if (cur_frame->read_depth_limit && cur_frame->read_depth_limit <= cur_depth)
-                    break;
-
-                cur_frame->read_depth_limit = cur_depth;
-                cur_frame = cur_frame->parent;
-            }
-
             continue;
         }
 
+        /// Nothing was applied.
         stack.pop();
     }
 }

From 7d1e4b8594949c0d28da954c04ddbc075080653e Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Thu, 28 Jan 2021 14:38:24 +0300
Subject: [PATCH 0304/1238] First draft

---
 .../functions/other-functions.md              | 114 +++++++++++++++++-
 1 file changed, 110 insertions(+), 4 deletions(-)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 08d34770f57..4b768f5d1dd 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -182,13 +182,119 @@ If `NULL` is passed to the function as input, then it returns the `Nullable(Noth
 Gets the size of the block.
 In ClickHouse, queries are always run on blocks (sets of column parts). This function allows getting the size of the block that you called it for.
 
-## byteSize(...) {#function-bytesize}
+## byteSize {#function-bytesize}
+
+Returns estimation of uncompressed byte size of its arguments in memory.
+
+E.g. for [UInt32](/../../sql-reference/data-types/int-uint.md) argument it will return constant 4, for [String](/../../sql-reference/data-types/string.md) arguments — the string length + 9 (terminating zero + length).
 
-Get an estimate of uncompressed byte size of its arguments in memory.
-E.g. for UInt32 argument it will return constant 4, for String argument - the string length + 9 (terminating zero + length).
 The function can take multiple arguments. The typical application is byteSize(*).
 
-Use case: Suppose you have a service that stores data for multiple clients in one table. Users will pay per data volume. So, you need to implement accounting of users data volume. The function will allow to calculate the data size on per-row basis.
+Use case: suppose you have a service that stores data for multiple clients in one table. Users will pay per data volume. So, you need to implement accounting of users data volume. The function will allow to calculate the data size on per-row basis.
+
+**Syntax**
+
+```sql
+byteSize(arguments)
+```
+
+**Parameters**
+
+-   `argument` — Value.
+
+**Returned value**
+
+-   Estimation of byte size of the arguments in memory.
+
+Type: [UInt64](/../../sql-reference/data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT byteSize('string');
+```
+
+Result:
+
+┌─byteSize('string')─┐
+│                 15 │
+└────────────────────┘
+
+For this table:
+
+Query:
+
+```sql
+CREATE TABLE test
+(
+    `key` Int32,
+    `u8` UInt8,
+    `u16` UInt16,
+    `u32` UInt32,
+    `u64` UInt64,
+    `i8` Int8,
+    `i16` Int16,
+    `i32` Int32,
+    `i64` Int64,
+    `f32` Float32,
+    `f64` Float64
+)
+ENGINE = MergeTree
+ORDER BY key;
+```
+
+Insert this values:
+
+Query:
+
+```sql
+insert into test values(1, 8, 16, 32, 64,  -8, -16, -32, -64, 32.32, 64.64);
+```
+
+Query:
+
+```sql
+SELECT key, toTypeName(u8), byteSize(u8), toTypeName(u16), byteSize(u16), toTypeName(u32), byteSize(u32), toTypeName(u64), byteSize(u64) FROM test ORDER BY key ASC;
+
+SELECT key, toTypeName(i8), byteSize(i8), toTypeName(i16), byteSize(i16), toTypeName(i32), byteSize(i32), toTypeName(i64), byteSize(i64), FROM test ORDER BY key ASC;
+
+SELECT key, toTypeName(f32), byteSize(f32), toTypeName(f64), byteSize(f64) FROM test ORDER BY key ASC;
+```
+
+Result:
+
+``` text
+┌─key─┬─toTypeName(u8)─┬─byteSize(u8)─┬─toTypeName(u16)─┬─byteSize(u16)─┬─toTypeName(u32)─┬─byteSize(u32)─┬─toTypeName(u64)─┬─byteSize(u64)─┐
+│   1 │ UInt8          │            1 │ UInt16          │             2 │ UInt32          │             4 │ UInt64          │             8 │
+└─────┴────────────────┴──────────────┴─────────────────┴───────────────┴─────────────────┴───────────────┴─────────────────┴───────────────┘
+
+┌─key─┬─toTypeName(i8)─┬─byteSize(i8)─┬─toTypeName(i16)─┬─byteSize(i16)─┬─toTypeName(i32)─┬─byteSize(i32)─┬─toTypeName(i64)─┬─byteSize(i64)─┐
+│   1 │ Int8           │            1 │ Int16           │             2 │ Int32           │             4 │ Int64           │             8 │
+└─────┴────────────────┴──────────────┴─────────────────┴───────────────┴─────────────────┴───────────────┴─────────────────┴───────────────┘
+
+┌─key─┬─toTypeName(f32)─┬─byteSize(f32)─┬─toTypeName(f64)─┬─byteSize(f64)─┐
+│   1 │ Float32         │             4 │ Float64         │             8 │
+└─────┴─────────────────┴───────────────┴─────────────────┴───────────────┘
+```
+
+With multiple arguments:
+
+Query:
+
+```sql
+SELECT byteSize(NULL, 1, 0.3, '');
+```
+
+Result:
+
+```text
+
+┌─byteSize(NULL, 1, 0.3, '')─┐
+│                         19 │
+└────────────────────────────┘
+```
 
 ## materialize(x) {#materializex}
 

From 8d0d2ca8e00324975d6c743e794c4167a0e45c00 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 28 Jan 2021 15:07:26 +0300
Subject: [PATCH 0305/1238] Add some partition tests

---
 src/Coordination/NuKeeperServer.cpp           |   8 +-
 src/Coordination/NuKeeperServer.h             |   2 +-
 .../TestKeeperStorageDispatcher.cpp           |  37 +++-
 .../configs/enable_test_keeper1.xml           |   7 +-
 .../configs/enable_test_keeper2.xml           |   7 +-
 .../configs/enable_test_keeper3.xml           |   7 +-
 .../test_testkeeper_multinode/test.py         | 172 ++++++++++++++++++
 7 files changed, 224 insertions(+), 16 deletions(-)

diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index a005febd67d..8995b51a13b 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -29,9 +29,9 @@ NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, in
 {
 }
 
-void NuKeeperServer::addServer(int server_id_, const std::string & server_uri_, bool can_become_leader_)
+void NuKeeperServer::addServer(int server_id_, const std::string & server_uri_, bool can_become_leader_, int32_t priority)
 {
-    nuraft::srv_config config(server_id_, 0, server_uri_, "", /*FIXME follower=*/ !can_become_leader_);
+    nuraft::srv_config config(server_id_, 0, server_uri_, "", /* follower= */ !can_become_leader_, priority);
     auto ret1 = raft_instance->add_srv(config);
     if (ret1->get_result_code() != nuraft::cmd_result_code::OK)
         throw Exception(ErrorCodes::RAFT_ERROR, "Cannot add server to RAFT quorum with code {}, message '{}'", ret1->get_result_code(), ret1->get_result_str());
@@ -146,7 +146,7 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(n
 
 TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKeeperStorage::RequestsForSessions & requests)
 {
-    if (requests.size() == 1 && requests[0].request->isReadRequest())
+    if (isLeader() && requests.size() == 1 && requests[0].request->isReadRequest())
     {
         return state_machine->processReadRequest(requests[0]);
     }
@@ -238,7 +238,7 @@ void NuKeeperServer::waitForServers(const std::vector<int32_t> & ids) const
 
 void NuKeeperServer::waitForCatchUp() const
 {
-    while (raft_instance->is_catching_up() || raft_instance->is_receiving_snapshot())
+    while (raft_instance->is_catching_up() || raft_instance->is_receiving_snapshot() || raft_instance->is_leader())
     {
         LOG_DEBUG(&Poco::Logger::get("NuRaftInit"), "Waiting current RAFT instance to catch up");
         std::this_thread::sleep_for(std::chrono::milliseconds(100));
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index b9488cafc69..7fd70ac26e2 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -46,7 +46,7 @@ public:
 
     int64_t getSessionID();
 
-    void addServer(int server_id_, const std::string & server_uri, bool can_become_leader_);
+    void addServer(int server_id_, const std::string & server_uri, bool can_become_leader_, int32_t priority);
 
     bool isLeader() const;
 
diff --git a/src/Coordination/TestKeeperStorageDispatcher.cpp b/src/Coordination/TestKeeperStorageDispatcher.cpp
index f6ca389f2cf..685fa58f8ad 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.cpp
+++ b/src/Coordination/TestKeeperStorageDispatcher.cpp
@@ -74,18 +74,43 @@ bool TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperReques
     return true;
 }
 
+namespace
+{
+    bool shouldBuildQuorum(int32_t myid, int32_t my_priority, bool my_can_become_leader, const std::vector<std::tuple<int, std::string, int, bool, int32_t>> & server_configs)
+    {
+        if (!my_can_become_leader)
+            return false;
+
+        int32_t minid = myid;
+        bool has_equal_priority = false;
+        for (const auto & [id, hostname, port, can_become_leader, priority] : server_configs)
+        {
+            if (my_priority < priority)
+                return false;
+            else if (my_priority == priority)
+                has_equal_priority = true;
+            minid = std::min(minid, id);
+        }
+
+        if (has_equal_priority)
+            return minid == myid;
+        else
+            return true;
+    }
+}
 
 void TestKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration & config)
 {
     int myid = config.getInt("test_keeper_server.server_id");
     std::string myhostname;
     int myport;
+    int32_t my_priority = 1;
 
     Poco::Util::AbstractConfiguration::Keys keys;
     config.keys("test_keeper_server.raft_configuration", keys);
     bool my_can_become_leader = true;
 
-    std::vector<std::tuple<int, std::string, int, bool>> server_configs;
+    std::vector<std::tuple<int, std::string, int, bool, int32_t>> server_configs;
     std::vector<int32_t> ids;
     for (const auto & server_key : keys)
     {
@@ -93,28 +118,30 @@ void TestKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigura
         std::string hostname = config.getString("test_keeper_server.raft_configuration." + server_key + ".hostname");
         int port = config.getInt("test_keeper_server.raft_configuration." + server_key + ".port");
         bool can_become_leader = config.getBool("test_keeper_server.raft_configuration." + server_key + ".can_become_leader", true);
+        int32_t priority = config.getInt("test_keeper_server.raft_configuration." + server_key + ".priority", 1);
         if (server_id == myid)
         {
             myhostname = hostname;
             myport = port;
             my_can_become_leader = can_become_leader;
+            my_priority = priority;
         }
         else
         {
-            server_configs.emplace_back(server_id, hostname, port, can_become_leader);
+            server_configs.emplace_back(server_id, hostname, port, can_become_leader, priority);
         }
         ids.push_back(server_id);
     }
 
     server = std::make_unique<NuKeeperServer>(myid, myhostname, myport);
     server->startup();
-    if (my_can_become_leader)
+    if (shouldBuildQuorum(myid, my_priority, my_can_become_leader, server_configs))
     {
-        for (const auto & [id, hostname, port, can_become_leader] : server_configs)
+        for (const auto & [id, hostname, port, can_become_leader, priority] : server_configs)
         {
             do
             {
-                server->addServer(id, hostname + ":" + std::to_string(port), can_become_leader);
+                server->addServer(id, hostname + ":" + std::to_string(port), can_become_leader, priority);
             }
             while (!server->waitForServer(id));
         }
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
index 486942aec71..81f68f50c7c 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
@@ -10,18 +10,21 @@
                 <hostname>node1</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
             </server>
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
                 <port>44444</port>
-                <can_become_leader>false</can_become_leader>
+                <can_become_leader>true</can_become_leader>
+                <priority>2</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>44444</port>
-                <can_become_leader>false</can_become_leader>
+                <can_become_leader>true</can_become_leader>
+                <priority>1</priority>
             </server>
         </raft_configuration>
     </test_keeper_server>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
index 94873883943..73340973367 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
@@ -10,18 +10,21 @@
                 <hostname>node1</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
             </server>
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
                 <port>44444</port>
-                <can_become_leader>false</can_become_leader>
+                <can_become_leader>true</can_become_leader>
+                <priority>2</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>44444</port>
-                <can_become_leader>false</can_become_leader>
+                <can_become_leader>true</can_become_leader>
+                <priority>1</priority>
             </server>
         </raft_configuration>
     </test_keeper_server>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
index 0219a0e5763..fbc51489d11 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
@@ -10,18 +10,21 @@
                 <hostname>node1</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
             </server>
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
                 <port>44444</port>
-                <can_become_leader>false</can_become_leader>
+                <can_become_leader>true</can_become_leader>
+                <priority>2</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>44444</port>
-                <can_become_leader>false</can_become_leader>
+                <can_become_leader>true</can_become_leader>
+                <priority>1</priority>
             </server>
         </raft_configuration>
     </test_keeper_server>
diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index d76e72ee92e..8d35e30400a 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -5,6 +5,7 @@ import string
 import os
 import time
 from multiprocessing.dummy import Pool
+from helpers.network import PartitionManager
 
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'])
@@ -23,6 +24,8 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
+def smaller_exception(ex):
+    return '\n'.join(str(ex).split('\n')[0:2])
 
 def test_simple_replicated_table(started_cluster):
 
@@ -37,3 +40,172 @@ def test_simple_replicated_table(started_cluster):
     assert node1.query("SELECT COUNT() FROM t") == "10\n"
     assert node2.query("SELECT COUNT() FROM t") == "10\n"
     assert node3.query("SELECT COUNT() FROM t") == "10\n"
+
+
+
+def test_blocade_leader(started_cluster):
+    for i, node in enumerate([node1, node2, node3]):
+        node.query("CREATE TABLE t1 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t1', '{}') ORDER BY tuple()".format(i + 1))
+
+    node2.query("INSERT INTO t1 SELECT number FROM numbers(10)")
+
+    node1.query("SYSTEM SYNC REPLICA t1", timeout=10)
+    node3.query("SYSTEM SYNC REPLICA t1", timeout=10)
+
+    assert node1.query("SELECT COUNT() FROM t1") == "10\n"
+    assert node2.query("SELECT COUNT() FROM t1") == "10\n"
+    assert node3.query("SELECT COUNT() FROM t1") == "10\n"
+
+    with PartitionManager() as pm:
+        pm.partition_instances(node2, node1)
+        pm.partition_instances(node3, node1)
+
+        for i in range(100):
+            try:
+                node2.query("INSERT INTO t1 SELECT rand() FROM numbers(100)")
+                break
+            except Exception as ex:
+                print("Got exception node2", smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            assert False, "Cannot insert anything node2"
+
+        for i in range(100):
+            try:
+                node3.query("INSERT INTO t1 SELECT rand() FROM numbers(100)")
+                break
+            except Exception as ex:
+                print("Got exception node3", smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            assert False, "Cannot insert anything node3"
+
+    for n, node in enumerate([node1, node2, node3]):
+        for i in range(100):
+            try:
+                node.query("SYSTEM RESTART REPLICA t1")
+                break
+            except Exception as ex:
+                print("Got exception node{}".format(n + 1), smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            assert False, "Cannot reconnect for node{}".format(n + 1)
+
+    for i in range(100):
+        try:
+            node1.query("INSERT INTO t1 SELECT rand() FROM numbers(100)")
+            break
+        except Exception as ex:
+            print("Got exception node1", smaller_exception(ex))
+            time.sleep(0.5)
+    else:
+        assert False, "Cannot insert anything node1"
+
+    for n, node in enumerate([node1, node2, node3]):
+        for i in range(100):
+            try:
+                node.query("SYSTEM SYNC REPLICA t1", timeout=10)
+                break
+            except Exception as ex:
+                print("Got exception node{}".format(n + 1), smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            assert False, "Cannot sync replica node{}".format(n+1)
+
+    assert node1.query("SELECT COUNT() FROM t1") == "310\n"
+    assert node2.query("SELECT COUNT() FROM t1") == "310\n"
+    assert node3.query("SELECT COUNT() FROM t1") == "310\n"
+
+
+def test_blocade_leader_twice(started_cluster):
+    for i, node in enumerate([node1, node2, node3]):
+        node.query("CREATE TABLE t2 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t2', '{}') ORDER BY tuple()".format(i + 1))
+
+    node2.query("INSERT INTO t2 SELECT number FROM numbers(10)")
+
+    node1.query("SYSTEM SYNC REPLICA t2", timeout=10)
+    node3.query("SYSTEM SYNC REPLICA t2", timeout=10)
+
+    assert node1.query("SELECT COUNT() FROM t2") == "10\n"
+    assert node2.query("SELECT COUNT() FROM t2") == "10\n"
+    assert node3.query("SELECT COUNT() FROM t2") == "10\n"
+
+    with PartitionManager() as pm:
+        pm.partition_instances(node2, node1)
+        pm.partition_instances(node3, node1)
+
+        for i in range(100):
+            try:
+                node2.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
+                break
+            except Exception as ex:
+                print("Got exception node2", smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            assert False, "Cannot reconnect for node2"
+
+        for i in range(100):
+            try:
+                node3.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
+                break
+            except Exception as ex:
+                print("Got exception node3", smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            assert False, "Cannot reconnect for node3"
+
+
+        # Total network partition
+        pm.partition_instances(node3, node2)
+
+        for i in range(30):
+            try:
+                node3.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
+                assert False, "Node3 became leader?"
+            except Exception as ex:
+                time.sleep(0.5)
+
+        for i in range(30):
+            try:
+                node2.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
+                assert False, "Node2 became leader?"
+            except Exception as ex:
+                time.sleep(0.5)
+
+
+    for n, node in enumerate([node1, node2, node3]):
+        for i in range(100):
+            try:
+                node.query("SYSTEM RESTART REPLICA t2")
+                break
+            except Exception as ex:
+                print("Got exception node{}".format(n + 1), smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            assert False, "Cannot reconnect for node{}".format(n + 1)
+
+    for n, node in enumerate([node1, node2, node3]):
+        for i in range(100):
+            try:
+                node.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
+                break
+            except Exception as ex:
+                print("Got exception node{}".format(n + 1), smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            assert False, "Cannot reconnect for node{}".format(n + 1)
+
+    for node in [node1, node2, node3]:
+        for i in range(100):
+            try:
+                node.query("SYSTEM SYNC REPLICA t2", timeout=10)
+                break
+            except Exception as ex:
+                print("Got exception node{}".format(n + 1), smaller_exception(ex))
+                time.sleep(0.5)
+        else:
+            assert False, "Cannot reconnect for node{}".format(n + 1)
+
+    assert node1.query("SELECT COUNT() FROM t2") == "510\n"
+    assert node2.query("SELECT COUNT() FROM t2") == "510\n"
+    assert node3.query("SELECT COUNT() FROM t2") == "510\n"

From b0d645eea87d01c21d0e1871345d42651cd728e0 Mon Sep 17 00:00:00 2001
From: yiguolei <676222867@qq.com>
Date: Thu, 28 Jan 2021 20:12:00 +0800
Subject: [PATCH 0306/1238] check active replicas and detached tables

---
 src/Interpreters/DDLWorker.cpp              | 13 ++++++++-----
 src/Storages/StorageReplicatedMergeTree.cpp |  1 -
 src/Storages/StorageReplicatedMergeTree.h   |  1 -
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index d3ebed228c7..85445eb0cff 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -865,13 +865,16 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
     while (stopwatch.elapsedSeconds() <= MAX_EXECUTION_TIMEOUT_SEC)
     {
         StorageReplicatedMergeTree::Status status;
-        replicated_storage->getStatus(status);
+        // Has to get with zk fields to get active replicas field
+        replicated_storage->getStatus(status, true);
 
-        // Should return as soon as possible if the table is shutdown by drop or other command.
-        if (status.is_partial_shutdown)
+        // Should return as soon as possible if the table is dropped.
+        bool replica_dropped = replicated_storage->is_dropped;
+        bool all_replicas_likely_detached = status.active_replicas == 0 && !DatabaseCatalog::instance().isTableExist(replicated_storage->getStorageID(), context);
+        if (replica_dropped || all_replicas_likely_detached) 
         {
-            LOG_WARNING(log, "Table is shutdown, task {} will not be executed.", task.entry_name);
-            task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, table is shutdown");
+            LOG_WARNING(log, "Table is dropped or detached permantly, task {} will not be executed.", task.entry_name);
+            task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, table is dropped or detached permantly");
             return false;
         }
 
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index f6e830f1570..70e90e9706a 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -4791,7 +4791,6 @@ void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields)
     res.can_become_leader = storage_settings_ptr->replicated_can_become_leader;
     res.is_readonly = is_readonly;
     res.is_session_expired = !zookeeper || zookeeper->expired();
-    res.is_partial_shutdown = partial_shutdown_called;
 
     res.queue = queue.getStatus();
     res.absolute_delay = getAbsoluteDelay(); /// NOTE: may be slightly inconsistent with queue status.
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index 549f81c10e5..cf36cf82fc9 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -159,7 +159,6 @@ public:
         bool can_become_leader;
         bool is_readonly;
         bool is_session_expired;
-        bool is_partial_shutdown;
         ReplicatedMergeTreeQueue::Status queue;
         UInt32 parts_to_check;
         String zookeeper_path;

From c7ec1b931376c2ed1967d229ba079ff700b1ed57 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Thu, 28 Jan 2021 15:14:57 +0300
Subject: [PATCH 0307/1238] fixed links

---
 docs/en/sql-reference/functions/other-functions.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 4b768f5d1dd..be16f76ff59 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -186,7 +186,7 @@ In ClickHouse, queries are always run on blocks (sets of column parts). This fun
 
 Returns estimation of uncompressed byte size of its arguments in memory.
 
-E.g. for [UInt32](/../../sql-reference/data-types/int-uint.md) argument it will return constant 4, for [String](/../../sql-reference/data-types/string.md) arguments — the string length + 9 (terminating zero + length).
+E.g. for [UInt32](../../sql-reference/data-types/int-uint.md) argument it will return constant 4, for [String](../../sql-reference/data-types/string.md) arguments — the string length + 9 (terminating zero + length).
 
 The function can take multiple arguments. The typical application is byteSize(*).
 
@@ -206,7 +206,7 @@ byteSize(arguments)
 
 -   Estimation of byte size of the arguments in memory.
 
-Type: [UInt64](/../../sql-reference/data-types/int-uint.md).
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 

From 3b50a7475904fbfcc7540d26373145b6533a048d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 28 Jan 2021 15:32:02 +0300
Subject: [PATCH 0308/1238] Simplify optimization interface.

---
 .../QueryPlan/Optimizations/Optimizations.h   | 33 +++++++++----------
 .../Optimizations/liftUpArrayJoin.cpp         | 12 +++----
 .../QueryPlan/Optimizations/limitPushDown.cpp | 22 ++++++-------
 .../Optimizations/mergeExpressions.cpp        | 12 +++----
 .../QueryPlan/Optimizations/optimizeTree.cpp  |  6 ++--
 .../QueryPlan/Optimizations/splitFilter.cpp   | 10 +++---
 6 files changed, 46 insertions(+), 49 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h
index d6e9d345a23..454eab9649a 100644
--- a/src/Processors/QueryPlan/Optimizations/Optimizations.h
+++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h
@@ -1,3 +1,4 @@
+#pragma once
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <array>
 
@@ -12,43 +13,39 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes);
 
 /// Optimization is a function applied to QueryPlan::Node.
 /// It can read and update subtree of specified node.
-/// It return true if some change of thee happened.
+/// It return the number of updated layers of subtree if some change happened.
+/// It must guarantee that the structure of tree is correct.
+///
 /// New nodes should be added to QueryPlan::Nodes list.
 /// It is not needed to remove old nodes from the list.
-///
-/// Optimization must guarantee that:
-///  * the structure of tree is correct
-///  * no more then `read_depth` layers of subtree was read
-///  * no more then `update_depth` layers of subtree was updated
 struct Optimization
 {
-    using Function = bool (*)(QueryPlan::Node *, QueryPlan::Nodes &);
-    const Function run = nullptr;
-    const size_t read_depth;
-    const size_t update_depth;
+    using Function = size_t (*)(QueryPlan::Node *, QueryPlan::Nodes &);
+    const Function apply = nullptr;
+    const char * name;
 };
 
 /// Move ARRAY JOIN up if possible.
-bool tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
+size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
 
 /// Move LimitStep down if possible.
-bool tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
+size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
 
 /// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
-bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
+size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
 
 /// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
 /// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
-bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
+size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
 
 inline const auto & getOptimizations()
 {
     static const std::array<Optimization, 4> optimizations =
     {{
-        {tryLiftUpArrayJoin, 2, 3},
-        {tryPushDownLimit, 2, 2},
-        {trySplitFilter, 1, 2},
-        {tryMergeExpressions, 2, 1},
+        {tryLiftUpArrayJoin, "liftUpArrayJoin"},
+        {tryPushDownLimit, "pushDownLimit"},
+        {trySplitFilter, "splitFilter"},
+        {tryMergeExpressions, "mergeExpressions"},
      }};
 
     return optimizations;
diff --git a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
index 0dc022b14af..e20c5f93d6e 100644
--- a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
+++ b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
@@ -8,10 +8,10 @@
 namespace DB::QueryPlanOptimizations
 {
 
-bool tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
+size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
 {
     if (parent_node->children.size() != 1)
-        return false;
+        return 0;
 
     QueryPlan::Node * child_node = parent_node->children.front();
 
@@ -22,7 +22,7 @@ bool tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
     auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
 
     if (!(expression_step || filter_step) || !array_join_step)
-        return false;
+        return 0;
 
     const auto & array_join = array_join_step->arrayJoin();
     const auto & expression = expression_step ? expression_step->getExpression()
@@ -32,7 +32,7 @@ bool tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
 
     /// No actions can be moved before ARRAY JOIN.
     if (split_actions.first->trivial())
-        return false;
+        return 0;
 
     auto description = parent->getStepDescription();
 
@@ -57,7 +57,7 @@ bool tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
         child->setStepDescription(std::move(description));
 
         array_join_step->updateInputStream(child->getOutputStream(), expected_header);
-        return false;
+        return 2;
     }
 
     /// Add new expression step before ARRAY JOIN.
@@ -79,7 +79,7 @@ bool tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
                                               filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
 
     parent->setStepDescription(description + " [split]");
-    return true;
+    return 3;
 }
 
 }
diff --git a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
index 0b2fcdfb209..01af6a2bbde 100644
--- a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
+++ b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
@@ -54,10 +54,10 @@ static bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
     return updated;
 }
 
-bool tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
+size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
 {
     if (parent_node->children.size() != 1)
-        return false;
+        return 0;
 
     QueryPlan::Node * child_node = parent_node->children.front();
 
@@ -66,25 +66,25 @@ bool tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
     auto * limit = typeid_cast<LimitStep *>(parent.get());
 
     if (!limit)
-        return false;
+        return 0;
 
     /// Skip LIMIT WITH TIES by now.
     if (limit->withTies())
-        return false;
+        return 0;
 
     const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
 
     /// Skip everything which is not transform.
     if (!transforming)
-        return false;
+        return 0;
 
     /// Special cases for sorting steps.
     if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
-        return false;
+        return 0;
 
     /// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
     if (typeid_cast<const TotalsHavingStep *>(child.get()))
-        return false;
+        return 0;
 
     /// Now we should decide if pushing down limit possible for this step.
 
@@ -93,22 +93,22 @@ bool tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
 
     /// Cannot push down if child changes the number of rows.
     if (!transform_traits.preserves_number_of_rows)
-        return false;
+        return 0;
 
     /// Cannot push down if data was sorted exactly by child stream.
     if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
-        return false;
+        return 0;
 
     /// Now we push down limit only if it doesn't change any stream properties.
     /// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
     if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
-        return false;
+        return 0;
 
     /// Input stream for Limit have changed.
     limit->updateInputStream(transforming->getInputStreams().front());
 
     parent.swap(child);
-    return true;
+    return 2;
 }
 
 }
diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
index 4b4bf540cc5..dfd15a2a929 100644
--- a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
+++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
@@ -6,7 +6,7 @@
 namespace DB::QueryPlanOptimizations
 {
 
-bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
+size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
 {
     if (parent_node->children.size() != 1)
         return false;
@@ -29,7 +29,7 @@ bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
         /// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
         /// Such a query will return two zeroes if we combine actions together.
         if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
-            return false;
+            return 0;
 
         auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
 
@@ -38,7 +38,7 @@ bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
 
         parent_node->step = std::move(expr);
         parent_node->children.swap(child_node->children);
-        return true;
+        return 1;
     }
     else if (parent_filter && child_expr)
     {
@@ -46,7 +46,7 @@ bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
         const auto & parent_actions = parent_filter->getExpression();
 
         if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
-            return false;
+            return 0;
 
         auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
 
@@ -56,10 +56,10 @@ bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
 
         parent_node->step = std::move(filter);
         parent_node->children.swap(child_node->children);
-        return true;
+        return 1;
     }
 
-    return false;
+    return 0;
 }
 
 }
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
index da031378e0c..e5ccc173ed8 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -51,12 +51,12 @@ void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
         for (const auto & optimization : optimizations)
         {
             /// Just in case, skip optimization if it is not initialized.
-            if (!optimization.run)
+            if (!optimization.apply)
                 continue;
 
             /// Try to apply optimization.
-            if (optimization.run(frame.node, nodes))
-                max_update_depth = std::max<size_t>(max_update_depth, optimization.update_depth);
+            auto update_depth = optimization.apply(frame.node, nodes);
+            max_update_depth = std::max<size_t>(max_update_depth, update_depth);
         }
 
         /// Traverse `max_update_depth` layers of tree again.
diff --git a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
index 09ce500ee54..38ba8f25b24 100644
--- a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
+++ b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
@@ -7,22 +7,22 @@ namespace DB::QueryPlanOptimizations
 {
 
 /// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
-bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
+size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
 {
     auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
     if (!filter_step)
-        return false;
+        return 0;
 
     const auto & expr = filter_step->getExpression();
 
     /// Do not split if there are function like runningDifference.
     if (expr->hasStatefulFunctions())
-        return false;
+        return 0;
 
     auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
 
     if (split.second->trivial())
-        return false;
+        return 0;
 
     if (filter_step->removesFilterColumn())
         split.second->removeUnusedInput(filter_step->getFilterColumnName());
@@ -44,7 +44,7 @@ bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
     filter_node.step->setStepDescription("(" + description + ")[split]");
     node->step->setStepDescription(description);
 
-    return true;
+    return 2;
 }
 
 }

From 1510e3147df939f3b9a3bff8c874fd4648af91f3 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 28 Jan 2021 16:08:07 +0300
Subject: [PATCH 0309/1238] Different ports for nodes

---
 .../test_testkeeper_multinode/configs/enable_test_keeper1.xml | 4 ++--
 .../test_testkeeper_multinode/configs/enable_test_keeper2.xml | 4 ++--
 .../test_testkeeper_multinode/configs/enable_test_keeper3.xml | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
index 81f68f50c7c..7fcd76ea57a 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
@@ -15,14 +15,14 @@
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
-                <port>44444</port>
+                <port>44445</port>
                 <can_become_leader>true</can_become_leader>
                 <priority>2</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
-                <port>44444</port>
+                <port>44446</port>
                 <can_become_leader>true</can_become_leader>
                 <priority>1</priority>
             </server>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
index 73340973367..f9d6dcad1d6 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
@@ -15,14 +15,14 @@
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
-                <port>44444</port>
+                <port>44445</port>
                 <can_become_leader>true</can_become_leader>
                 <priority>2</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
-                <port>44444</port>
+                <port>44446</port>
                 <can_become_leader>true</can_become_leader>
                 <priority>1</priority>
             </server>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
index fbc51489d11..7d71fd3a20d 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
@@ -15,14 +15,14 @@
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
-                <port>44444</port>
+                <port>44445</port>
                 <can_become_leader>true</can_become_leader>
                 <priority>2</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
-                <port>44444</port>
+                <port>44446</port>
                 <can_become_leader>true</can_become_leader>
                 <priority>1</priority>
             </server>

From d3763e735b5a0f31f707d3efee05041cac95632d Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Thu, 28 Jan 2021 21:18:31 +0800
Subject: [PATCH 0310/1238] replace mawk with gawk

---
 tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index 6d0f6178cba..6376040fcc5 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # Data preparation.
 # Now we can get the user_files_path by use the table file function for trick. also we can get it by query as:
 # "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')"
-user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | awk '{match($0,"File (.*)/nonexist.txt",path); print path[1]}')
+user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | /usr/bin/gawk '{match($0,"File (.*)/nonexist.txt",path); print path[1]}')
 mkdir -p ${user_files_path}/
 echo -n aaaaaaaaa > ${user_files_path}/a.txt
 echo -n bbbbbbbbb > ${user_files_path}/b.txt

From 629cb44d4caea6d66a8fced5185f62c6f7cfa621 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 28 Jan 2021 16:42:18 +0300
Subject: [PATCH 0311/1238] everything was wrong

---
 src/Parsers/ExpressionElementParsers.cpp      |   2 +-
 src/Processors/Transforms/WindowTransform.cpp | 340 +++++++-----------
 src/Processors/Transforms/WindowTransform.h   |  37 +-
 3 files changed, 142 insertions(+), 237 deletions(-)

diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 11369e3495f..123e68e32ed 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -577,7 +577,7 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
         }
     }
 
-    if (node->frame != WindowFrame{})
+    if (!(node->frame == WindowFrame{}))
     {
         node->frame.is_default = false;
     }
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index bd2a26a907c..7cb7e6e68e7 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -165,97 +165,103 @@ void WindowTransform::advancePartitionEnd()
     partition_etalon = RowNumber{block_number, block_rows - 1};
 }
 
-void WindowTransform::advanceGroupEnd()
-{
-    if (group_ended)
-    {
-        return;
-    }
-
-    switch (window_description.frame.type)
-    {
-        case WindowFrame::FrameType::Range:
-        case WindowFrame::FrameType::Groups:
-            advanceGroupEndOrderBy();
-            break;
-        case WindowFrame::FrameType::Rows:
-            advanceGroupEndTrivial();
-            break;
-    }
-}
-
-void WindowTransform::advanceGroupEndTrivial()
-{
-    // ROWS mode, peer groups always contains only the current row.
-    // We cannot advance the groups if the group start is already beyond the
-    // end of partition.
-    assert(group_start < partition_end);
-    group_end = group_start;
-    advanceRowNumber(group_end);
-    group_ended = true;
-}
-
-void WindowTransform::advanceGroupEndOrderBy()
-{
-    const size_t n = order_by_indices.size();
-    if (n == 0)
-    {
-        // No ORDER BY, so all rows are the same group. The group will end
-        // with the partition.
-        group_end = partition_end;
-        group_ended = partition_ended;
-    }
-
-    // `partition_end` is either end of partition or end of data.
-    for (; group_end < partition_end; advanceRowNumber(group_end))
-    {
-        // Check for group end.
-        size_t i = 0;
-        for (; i < n; i++)
-        {
-            const auto * ref = inputAt(group_start)[order_by_indices[i]].get();
-            const auto * c = inputAt(group_end)[order_by_indices[i]].get();
-            if (c->compareAt(group_end.row, group_start.row, *ref,
-                    1 /* nan_direction_hint */) != 0)
-            {
-                break;
-            }
-        }
-
-        if (i < n)
-        {
-            group_ended = true;
-            return;
-        }
-    }
-
-    assert(group_end == partition_end);
-    if (partition_ended)
-    {
-        // A corner case -- the ORDER BY columns were the same, but the group
-        // still ended because the partition has ended.
-        group_ended = true;
-    }
-}
-
 void WindowTransform::advanceFrameStart()
 {
     // Frame start is always UNBOUNDED PRECEDING for now, so we don't have to
     // move it. It is initialized when the new partition starts.
 }
 
+bool WindowTransform::arePeers(const RowNumber & x, const RowNumber & y) const
+{
+    if (x == y)
+    {
+        // For convenience, a row is always its own peer.
+        return true;
+    }
+
+    if (window_description.frame.type == WindowFrame::FrameType::Rows)
+    {
+        // For ROWS frame, row is only peers with itself (checked above);
+        return false;
+    }
+
+    // For RANGE frame, rows that compare equal w/ORDER BY are peers.
+    assert(window_description.frame.type == WindowFrame::FrameType::Range);
+    const size_t n = order_by_indices.size();
+    if (n == 0)
+    {
+        // No ORDER BY, so all rows are peers.
+        return true;
+    }
+
+    size_t i = 0;
+    for (; i < n; i++)
+    {
+        const auto * column_x = inputAt(x)[order_by_indices[i]].get();
+        const auto * column_y = inputAt(y)[order_by_indices[i]].get();
+        if (column_x->compareAt(x.row, y.row, *column_y,
+                1 /* nan_direction_hint */) != 0)
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+void WindowTransform::advanceFrameEndCurrentRow()
+{
+    // We only process one block here, and frame_end must be already in it: if
+    // we didn't find the end in the previous block, frame_end is now the first
+    // row of the current block. We need this knowledge to write a simpler loop
+    // (only loop over rows and not over blocks), that should hopefully be more
+    // efficient.
+    // partition_end is either in this new block or past-the-end.
+    assert(frame_end.block  == partition_end.block
+        || frame_end.block + 1 == partition_end.block);
+
+    if (frame_end == partition_end)
+    {
+        // The case when we get a new block and find out that the partition has
+        // ended.
+        assert(partition_ended);
+        frame_ended = partition_ended;
+        return;
+    }
+
+    const auto block_rows = blockRowsNumber(frame_end);
+    // We could retreat the frame_end here, but for some reason I am reluctant
+    // to do this... It would have better data locality.
+    auto reference = current_row;
+    for (; frame_end.row < block_rows; ++frame_end.row)
+    {
+        if (!arePeers(reference, frame_end))
+        {
+            //fmt::print(stderr, "{} and {} don't match\n", reference, frame_end);
+            frame_ended = true;
+            return;
+        }
+        reference = frame_end;
+    }
+
+    // Got to the end of current block, have to properly update the row number.
+    ++frame_end.block;
+    frame_end.row = 0;
+
+    // Got to the end of partition (frame ended as well then) or end of data.
+    assert(frame_end == partition_end);
+    frame_ended = partition_ended;
+}
+
 void WindowTransform::advanceFrameEnd()
 {
-    // This should be called when we know the boundaries of the group (probably
-    // not a fundamental requirement, but currently it's written this way).
-    assert(group_ended);
+    // No reason for this function to be called again after it succeeded.
+    assert(!frame_ended);
 
     const auto frame_end_before = frame_end;
 
-    // Frame end is always the current group end, for now.
-    // In ROWS mode the group is going to contain only the current row.
-    frame_end = group_end;
-    frame_ended = group_ended;
+    // The only frame end we have for now is CURRENT ROW.
+    advanceFrameEndCurrentRow();
 
     // Add the columns over which we advanced the frame to the aggregate function
     // states.
@@ -321,13 +327,10 @@ void WindowTransform::advanceFrameEnd()
     }
 }
 
-void WindowTransform::writeOutGroup()
+void WindowTransform::writeOutCurrentRow()
 {
-//    fmt::print(stderr, "write out group [{}..{})\n",
-//        group_start, group_end);
-
-    // Empty groups don't make sense.
-    assert(group_start < group_end);
+    assert(current_row < partition_end);
+    assert(current_row.block >= first_block_number);
 
     for (size_t wi = 0; wi < workspaces.size(); ++wi)
     {
@@ -336,93 +339,11 @@ void WindowTransform::writeOutGroup()
         const auto * a = f.aggregate_function.get();
         auto * buf = ws.aggregate_function_state.data();
 
-        // We'll calculate the value once for the first row in the group, and
-        // insert its copy for each other row in the group.
-        IColumn * reference_column = outputAt(group_start)[wi].get();
-        const size_t reference_row = group_start.row;
+        IColumn * result_column = outputAt(current_row)[wi].get();
         // FIXME does it also allocate the result on the arena?
         // We'll have to pass it out with blocks then...
-        a->insertResultInto(buf, *reference_column, arena.get());
-        // The row we just added to the end of the column must correspond to the
-        // first row of the group.
-        assert(reference_column->size() == reference_row + 1);
-
-//        fmt::print(stderr, "calculated value of function {} is '{}'\n",
-//            wi, toString((*reference_column)[reference_row]));
-
-        // Now duplicate the calculated value into all other rows.
-        auto first_row_to_copy_to = group_start;
-        advanceRowNumber(first_row_to_copy_to);
-
-
-        // We use two explicit loops here instead of using advanceRowNumber(),
-        // because we want to batch the inserts per-block.
-        // Unfortunately this leads to tricky loop conditions, because the
-        // frame_end might be either a past-the-end block, or a valid block, in
-        // which case we also have to process its head. We have to avoid stepping
-        // into the past-the-end block because it might not be valid.
-        // Moreover, the past-the-end row is not in the past-the-end block, but
-        // in the block before it.
-        // And we also have to remember to reset the row number when moving to
-        // the next block.
-        uint64_t past_the_end_block;
-        uint64_t past_the_end_row;
-        if (group_end.row == 0)
-        {
-            // group_end might not be valid.
-            past_the_end_block = group_end.block;
-
-            // Otherwise a group would end at the start of data, this is not
-            // possible.
-            assert(group_end.block > 0);
-
-            const size_t first_valid_block = group_end.block - 1;
-            assert(first_valid_block >= first_block_number);
-
-            past_the_end_row = blocks[first_valid_block - first_block_number]
-                .input_columns[0]->size();
-        }
-        else
-        {
-            past_the_end_block = group_end.block + 1;
-            past_the_end_row = group_end.row;
-        }
-
-        for (auto block_index = first_row_to_copy_to.block;
-            block_index < past_the_end_block;
-            ++block_index)
-        {
-            const auto & block = blocks[block_index - first_block_number];
-
-            // We process tail of the first block, all rows of intermediate
-            // blocks, and the head of the last block.
-            const auto block_first_row
-                = (block_index == first_row_to_copy_to.block)
-                    ? first_row_to_copy_to.row : 0;
-            const auto block_last_row = ((block_index + 1) == past_the_end_block)
-                ? past_the_end_row : block.numRows();
-
-//            fmt::print(stderr,
-//                "group rest [{}, {}), pteb {}, pter {}, cur {}, fr {}, lr {}\n",
-//                group_start, group_end, past_the_end_block, group_end.row,
-//                block_index, block_first_row, block_last_row);
-            // The number of the elements left to insert may be zero, but we must
-            // notice it on the first block. Other blocks shouldn't be empty,
-            // because we don't generally have empty block, and advanceRowNumber()
-            // doesn't generate past-the-end row numbers, so we wouldn't get into
-            // a block we don't want to process.
-            if (block_first_row == block_last_row)
-            {
-                assert(block_index == first_row_to_copy_to.block);
-                break;
-            }
-
-            block.output_columns[wi]->insertManyFrom(*reference_column,
-                reference_row, block_last_row - block_first_row);
-        }
+        a->insertResultInto(buf, *result_column, arena.get());
     }
-
-    first_not_ready_row = group_end;
 }
 
 void WindowTransform::appendChunk(Chunk & chunk)
@@ -434,6 +355,7 @@ void WindowTransform::appendChunk(Chunk & chunk)
     // have it if it's end of data, though.
     if (!input_is_finished)
     {
+        assert(chunk.hasRows());
         blocks.push_back({});
         auto & block = blocks.back();
         block.input_columns = chunk.detachColumns();
@@ -470,25 +392,11 @@ void WindowTransform::appendChunk(Chunk & chunk)
             assert(input_is_finished);
         }
 
-        // After that, advance the peer groups. We can advance peer groups until
-        // the end of partition or current end of data, which is precisely the
-        // description of `partition_end`.
-        while (group_start < partition_end)
+        // After that, try to calculate window functions for each next row.
+        // We can continue until the end of partition or current end of data,
+        // which is precisely the definition of `partition_end`.
+        while (current_row < partition_end)
         {
-            advanceGroupEnd();
-
-//            fmt::print(stderr, "group [{}, {}), {}\n", group_start, group_end,
-//                group_ended);
-
-            if (!group_ended)
-            {
-                // Wait for more input data to find the end of group.
-                assert(!input_is_finished);
-                assert(!partition_ended);
-                return;
-            }
-
-            // The group ended.
             // Advance the frame start, updating the state of the aggregate
             // functions.
             advanceFrameStart();
@@ -496,6 +404,9 @@ void WindowTransform::appendChunk(Chunk & chunk)
             // functions.
             advanceFrameEnd();
 
+//            fmt::print(stderr, "row {} frame [{}, {}) {}\n",
+//                current_row, frame_start, frame_end, frame_ended);
+
             if (!frame_ended)
             {
                 // Wait for more input data to find the end of frame.
@@ -504,16 +415,16 @@ void WindowTransform::appendChunk(Chunk & chunk)
                 return;
             }
 
-            // Write out the aggregation results
-            writeOutGroup();
+            // The frame shouldn't be empty (probably?).
+            assert(frame_start < frame_end);
 
-            // Move to the next group.
-            // The frame will have to be recalculated.
+            // Write out the aggregation results.
+            writeOutCurrentRow();
+
+            // Move to the next row. The frame will have to be recalculated.
+            advanceRowNumber(current_row);
+            first_not_ready_row = current_row;
             frame_ended = false;
-
-            // Move to the next group.
-            group_ended = false;
-            group_start = group_end;
         }
 
         if (input_is_finished)
@@ -543,10 +454,7 @@ void WindowTransform::appendChunk(Chunk & chunk)
         // for now.
         frame_start = new_partition_start;
         frame_end = new_partition_start;
-        group_start = new_partition_start;
-        group_end = new_partition_start;
-        // The group pointers are already reset to the partition start, see the
-        // above loop.
+        assert(current_row == new_partition_start);
 
 //        fmt::print(stderr, "reinitialize agg data at start of {}\n",
 //            new_partition_start);
@@ -653,6 +561,17 @@ IProcessor::Status WindowTransform::prepare()
     if (!has_input && input.hasData())
     {
         input_data = input.pullData(true /* set_not_needed */);
+
+        // If we got an exception from input, just return it and mark that we're
+        // finished.
+        if (input_data.exception)
+        {
+            output.pushData(std::move(input_data));
+            output.finish();
+
+            return Status::PortFull;
+        }
+
         has_input = true;
 
         // Now we have new input and can try to generate more output in work().
@@ -678,14 +597,8 @@ IProcessor::Status WindowTransform::prepare()
 
 void WindowTransform::work()
 {
-    if (input_data.exception)
-    {
-        /// Skip transform in case of exception.
-        output_data = std::move(input_data);
-        has_input = false;
-        has_output = true;
-        return;
-    }
+    // Exceptions should be skipped in prepare().
+    assert(!input_data.exception);
 
     assert(has_input || input_is_finished);
 
@@ -697,7 +610,6 @@ void WindowTransform::work()
     catch (DB::Exception &)
     {
         output_data.exception = std::current_exception();
-        has_output = true;
         has_input = false;
         return;
     }
@@ -705,12 +617,12 @@ void WindowTransform::work()
     // We don't really have to keep the entire partition, and it can be big, so
     // we want to drop the starting blocks to save memory.
     // We can drop the old blocks if we already returned them as output, and the
-    // frame, group and the partition etalon are already past them. Note that the
-    // frame start can be further than group start for some frame specs (e.g.
-    // EXCLUDE CURRENT ROW), so we have to check both.
+    // frame, the current row and the partition etalon are already past them.
+    // Note that the frame start can be further than current row for some frame
+    // specs (e.g. EXCLUDE CURRENT ROW), so we have to check both.
     const auto first_used_block = std::min(next_output_block_number,
         std::min(frame_start.block,
-            std::min(group_start.block,
+            std::min(current_row.block,
                 partition_etalon.block)));
     if (first_block_number < first_used_block)
     {
@@ -723,7 +635,7 @@ void WindowTransform::work()
 
         assert(next_output_block_number >= first_block_number);
         assert(frame_start.block >= first_block_number);
-        assert(group_start.block >= first_block_number);
+        assert(current_row.block >= first_block_number);
     }
 }
 
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index d18c9c727d2..0699f7cde6e 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -61,17 +61,14 @@ struct RowNumber
  * be sorted by PARTITION BY (in any order), then by ORDER BY.
  * We need to track the following pointers:
  * 1) boundaries of partition -- rows that compare equal w/PARTITION BY.
- * 2) boundaries of peer group -- rows that compare equal w/ORDER BY (empty
- *    ORDER BY means all rows are peers).
- * 3) boundaries of the frame.
+ * 2) current row for which we will compute the window functions.
+ * 3) boundaries of the frame for this row.
  * Both the peer group and the frame are inside the partition, but can have any
  * position relative to each other.
- * All pointers only move forward. For partition and group boundaries, this is
- * ensured by the order of input data. This property also trivially holds for
- * the ROWS and GROUPS frames. For the RANGE frame, the proof requires the
- * additional fact that the ranges are specified in terms of (the single)
- * ORDER BY column.
- * The value of the window function is the same for all rows of the peer group.
+ * All pointers only move forward. For partition boundaries, this is ensured by
+ * the order of input data. This property also trivially holds for the ROWS and
+ * GROUPS frames. For the RANGE frame, the proof requires the additional fact
+ * that the ranges are specified in terms of (the single) ORDER BY column.
  */
 class WindowTransform : public IProcessor /* public ISimpleTransform */
 {
@@ -105,13 +102,11 @@ public:
 
 private:
     void advancePartitionEnd();
-    void advanceGroupEnd();
-    void advanceGroupEndOrderBy();
-    void advanceGroupEndTrivial();
-    void advanceGroupEndRange();
     void advanceFrameStart();
     void advanceFrameEnd();
-    void writeOutGroup();
+    void advanceFrameEndCurrentRow();
+    bool arePeers(const RowNumber & x, const RowNumber & y) const;
+    void writeOutCurrentRow();
 
     Columns & inputAt(const RowNumber & x)
     {
@@ -179,7 +174,8 @@ private:
 
 public:
     /*
-     * Data (formerly) inherited from ISimpleTransform.
+     * Data (formerly) inherited from ISimpleTransform, needed for the
+     * implementation of the IProcessor interface.
      */
     InputPort & input;
     OutputPort & output;
@@ -231,21 +227,18 @@ public:
     RowNumber partition_end;
     bool partition_ended = false;
 
-    // Current peer group is [group_start, group_end) if group_ended,
-    // [group_start, ?) otherwise.
-    RowNumber group_start;
-    RowNumber group_end;
-    bool group_ended = false;
+    // This is the row for which we are computing the window functions now.
+    RowNumber current_row;
 
     // The frame is [frame_start, frame_end) if frame_ended, and unknown
-    // otherwise. Note that when we move to the next peer group, both the
+    // otherwise. Note that when we move to the next row, both the
     // frame_start and the frame_end may jump forward by an unknown amount of
     // blocks, e.g. if we use a RANGE frame. This means that sometimes we don't
     // know neither frame_end nor frame_start.
     // We update the states of the window functions as we track the frame
     // boundaries.
     // After we have found the final boundaries of the frame, we can immediately
-    // output the result for the current group, w/o waiting for more data.
+    // output the result for the current row, w/o waiting for more data.
     RowNumber frame_start;
     RowNumber frame_end;
     bool frame_ended = false;

From e553eb112f78513192423cffea3d548a311d9594 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 28 Jan 2021 16:47:06 +0300
Subject: [PATCH 0312/1238] cleanup

---
 src/Processors/Transforms/WindowTransform.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 7cb7e6e68e7..2ed622937c3 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -272,6 +272,8 @@ void WindowTransform::advanceFrameEnd()
         const auto * a = f.aggregate_function.get();
         auto * buf = ws.aggregate_function_state.data();
 
+        // FIXME we don't need these complex loops, because frame_end advances
+        // by one block at most.
         // We use two explicit loops here instead of using advanceRowNumber(),
         // because we want to cache the argument columns array per block. Later
         // we also use batch add.
@@ -502,13 +504,10 @@ IProcessor::Status WindowTransform::prepare()
         return Status::Finished;
     }
 
-//    // Technically the past-the-end next_output_block_number is also valid if
-//    // we haven't yet received the corresponding input block.
-//    assert(next_output_block_number < first_block_number + blocks.size()
-//        || blocks.empty());
-
     assert(first_not_ready_row.block >= first_block_number);
-    // Might be past-the-end, so equality also valid.
+    // The first_not_ready_row might be past-the-end if we have already
+    // calculated the window functions for all input rows. That's why the
+    // equality is also valid here.
     assert(first_not_ready_row.block <= first_block_number + blocks.size());
     assert(next_output_block_number >= first_block_number);
 

From 52e5c0aad748b6ee55a97380abddf0ceb12aa864 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Thu, 28 Jan 2021 16:48:17 +0300
Subject: [PATCH 0313/1238] fix thread status

---
 src/Common/CurrentThread.h                  |  7 +++---
 src/Common/ThreadStatus.cpp                 |  3 +++
 src/Common/ThreadStatus.h                   |  2 +-
 src/Interpreters/DDLWorker.cpp              | 24 +++++----------------
 src/Interpreters/DDLWorker.h                |  3 ---
 src/Interpreters/InterpreterCreateQuery.cpp |  3 ++-
 src/Interpreters/ThreadStatusExt.cpp        |  2 ++
 src/Interpreters/executeQuery.cpp           |  9 ++------
 src/Server/MySQLHandler.cpp                 |  6 +++++-
 src/Server/PostgreSQLHandler.cpp            |  7 +++++-
 src/Storages/StorageReplicatedMergeTree.cpp |  2 +-
 11 files changed, 31 insertions(+), 37 deletions(-)

diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h
index 876cbd8a66b..7ab57ea7fab 100644
--- a/src/Common/CurrentThread.h
+++ b/src/Common/CurrentThread.h
@@ -63,9 +63,6 @@ public:
     /// Call from master thread as soon as possible (e.g. when thread accepted connection)
     static void initializeQuery();
 
-    /// Sets query_context for current thread group
-    static void attachQueryContext(Context & query_context);
-
     /// You must call one of these methods when create a query child thread:
     /// Add current thread to a group associated with the thread group
     static void attachTo(const ThreadGroupStatusPtr & thread_group);
@@ -99,6 +96,10 @@ public:
 
 private:
     static void defaultThreadDeleter();
+
+    /// Sets query_context for current thread group
+    /// Can by used only through QueryScope
+    static void attachQueryContext(Context & query_context);
 };
 
 }
diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp
index 5105fff03b2..f2256fbf192 100644
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@@ -99,6 +99,9 @@ ThreadStatus::~ThreadStatus()
         /// We've already allocated a little bit more than the limit and cannot track it in the thread memory tracker or its parent.
     }
 
+    /// It may cause segfault if query_context was destroyed, but was not detached
+    assert((!query_context && query_id.empty()) || (query_id == query_context->getCurrentQueryId()));
+
     if (deleter)
         deleter();
     current_thread = nullptr;
diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h
index 1be1f2cd4df..dc5f09c5f3d 100644
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@@ -201,7 +201,7 @@ public:
     void setFatalErrorCallback(std::function<void()> callback);
     void onFatalError();
 
-    /// Sets query context for current thread and its thread group
+    /// Sets query context for current master thread and its thread group
     /// NOTE: query_context have to be alive until detachQuery() is called
     void attachQueryContext(Context & query_context);
 
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index cb38c733582..83412ab8fb7 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -202,11 +202,12 @@ void DDLWorker::shutdown()
     queue_updated_event->set();
     cleanup_event->set();
 
-    worker_pool.reset();
     if (main_thread.joinable())
         main_thread.join();
     if (cleanup_thread.joinable())
         cleanup_thread.join();
+
+    worker_pool.reset();
 }
 
 DDLWorker::~DDLWorker()
@@ -355,8 +356,6 @@ void DDLWorker::scheduleTasks()
         if (!task)
         {
             LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason);
-            //task->was_executed = true;
-            //saveTask(std::move(task));
             continue;
         }
 
@@ -379,7 +378,7 @@ void DDLWorker::scheduleTasks()
 
 DDLTaskBase & DDLWorker::saveTask(DDLTaskPtr && task)
 {
-    std::remove_if(current_tasks.begin(), current_tasks.end(), [](const DDLTaskPtr & t) { return t->completely_processed.load(); });
+    current_tasks.remove_if([](const DDLTaskPtr & t) { return t->completely_processed.load(); });
     assert(current_tasks.size() <= pool_size);
     current_tasks.emplace_back(std::move(task));
     return *current_tasks.back();
@@ -394,10 +393,12 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task)
     ReadBufferFromString istr(query_to_execute);
     String dummy_string;
     WriteBufferFromString ostr(dummy_string);
+    std::optional<CurrentThread::QueryScope> query_scope;
 
     try
     {
         auto query_context = task.makeQueryContext(context);
+        query_scope.emplace(*query_context);
         executeQuery(istr, ostr, false, *query_context, {});
     }
     catch (const DB::Exception & e)
@@ -433,20 +434,6 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task)
     return true;
 }
 
-void DDLWorker::attachToThreadGroup()
-{
-    if (thread_group)
-    {
-        /// Put all threads to one thread pool
-        CurrentThread::attachToIfDetached(thread_group);
-    }
-    else
-    {
-        CurrentThread::initializeQuery();
-        thread_group = CurrentThread::getGroup();
-    }
-}
-
 void DDLWorker::processTask(DDLTaskBase & task)
 {
     auto zookeeper = tryGetZooKeeper();
@@ -909,7 +896,6 @@ void DDLWorker::runMainThread()
     };
 
     setThreadName("DDLWorker");
-    attachToThreadGroup();
     LOG_DEBUG(log, "Starting DDLWorker thread");
 
     while (!stop_flag)
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index c0194c4f252..1b7ebfb5796 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -102,8 +102,6 @@ protected:
     void runMainThread();
     void runCleanupThread();
 
-    void attachToThreadGroup();
-
 protected:
     Context context;
     Poco::Logger * log;
@@ -138,7 +136,6 @@ protected:
     /// How many tasks could be in the queue
     size_t max_tasks_in_queue = 1000;
 
-    ThreadGroupStatusPtr thread_group;
     std::atomic<UInt64> max_id = 0;
 };
 
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index b66af77930c..5292ef57d7a 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -929,7 +929,8 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
                 drop_ast->table = create.table;
                 drop_ast->no_ddl_lock = true;
 
-                InterpreterDropQuery interpreter(drop_ast, context);
+                Context drop_context = context;
+                InterpreterDropQuery interpreter(drop_ast, drop_context);
                 interpreter.execute();
             }
             else
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index 61322cabfb3..8a979721290 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -500,6 +500,8 @@ CurrentThread::QueryScope::QueryScope(Context & query_context)
 {
     CurrentThread::initializeQuery();
     CurrentThread::attachQueryContext(query_context);
+    if (!query_context.hasQueryContext())
+        query_context.makeQueryContext();
 }
 
 void CurrentThread::QueryScope::logPeakMemoryUsage()
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 7003e6f5ee9..770e6e65d24 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -326,13 +326,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 {
     const auto current_time = std::chrono::system_clock::now();
 
-    /// If we already executing query and it requires to execute internal query, than
-    /// don't replace thread context with given (it can be temporary). Otherwise, attach context to thread.
-    if (!internal)
-    {
-        context.makeQueryContext();
-        CurrentThread::attachQueryContext(context);
-    }
+    assert(internal || CurrentThread::get().getQueryContext());
+    assert(internal || CurrentThread::get().getQueryContext()->getCurrentQueryId() == CurrentThread::getQueryId());
 
     const Settings & settings = context.getSettingsRef();
 
diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp
index 63a48fde1a7..f660d97cdc6 100644
--- a/src/Server/MySQLHandler.cpp
+++ b/src/Server/MySQLHandler.cpp
@@ -24,6 +24,7 @@
 #include <regex>
 #include <Access/User.h>
 #include <Access/AccessControlManager.h>
+#include <Common/setThreadName.h>
 
 #if !defined(ARCADIA_BUILD)
 #    include <Common/config_version.h>
@@ -86,6 +87,8 @@ MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & so
 
 void MySQLHandler::run()
 {
+    setThreadName("MySQLHandler");
+    ThreadStatus thread_status;
     connection_context.makeSessionContext();
     connection_context.getClientInfo().interface = ClientInfo::Interface::MYSQL;
     connection_context.setDefaultFormat("MySQLWire");
@@ -339,8 +342,9 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
 
             affected_rows += progress.written_rows;
         });
+        CurrentThread::QueryScope query_scope{query_context};
 
-        executeQuery(should_replace ? replacement : payload, *out, true, query_context,
+        executeQuery(should_replace ? replacement : payload, *out, false, query_context,
             [&with_output](const String &, const String &, const String &, const String &)
             {
                 with_output = true;
diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp
index 2bce5abcd11..b3a3bbf2aaa 100644
--- a/src/Server/PostgreSQLHandler.cpp
+++ b/src/Server/PostgreSQLHandler.cpp
@@ -5,6 +5,7 @@
 #include <Interpreters/executeQuery.h>
 #include "PostgreSQLHandler.h"
 #include <Parsers/parseQuery.h>
+#include <Common/setThreadName.h>
 #include <random>
 
 #if !defined(ARCADIA_BUILD)
@@ -49,6 +50,8 @@ void PostgreSQLHandler::changeIO(Poco::Net::StreamSocket & socket)
 
 void PostgreSQLHandler::run()
 {
+    setThreadName("PostgresHandler");
+    ThreadStatus thread_status;
     connection_context.makeSessionContext();
     connection_context.getClientInfo().interface = ClientInfo::Interface::POSTGRESQL;
     connection_context.setDefaultFormat("PostgreSQLWire");
@@ -273,8 +276,10 @@ void PostgreSQLHandler::processQuery()
 
         for (const auto & spl_query : queries)
         {
+            /// FIXME why do we execute all queries in a single connection context?
+            CurrentThread::QueryScope query_scope{connection_context};
             ReadBufferFromString read_buf(spl_query);
-            executeQuery(read_buf, *out, true, connection_context, {});
+            executeQuery(read_buf, *out, false, connection_context, {});
 
             PostgreSQLProtocol::Messaging::CommandComplete::Command command =
                 PostgreSQLProtocol::Messaging::CommandComplete::classifyQuery(spl_query);
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 30b08cdea1e..951ce63944b 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -3682,7 +3682,7 @@ void StorageReplicatedMergeTree::shutdown()
 
     /// We clear all old parts after stopping all background operations. It's
     /// important, because background operations can produce temporary parts
-    /// which will remove themselves in their descrutors. If so, we may have
+    /// which will remove themselves in their destrutors. If so, we may have
     /// race condition between our remove call and background process.
     clearOldPartsFromFilesystem(true);
 }

From ffaa8e34a666e7036e458605e1fd59c9e96ec54e Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Thu, 28 Jan 2021 16:57:36 +0300
Subject: [PATCH 0314/1238] minor code improvements around ThreadStatus

---
 src/Common/CurrentThread.h                  |  7 ++++---
 src/Common/ThreadStatus.cpp                 |  3 +++
 src/Common/ThreadStatus.h                   |  2 +-
 src/Interpreters/DDLWorker.cpp              | 18 ++----------------
 src/Interpreters/DDLWorker.h                |  4 ----
 src/Interpreters/ThreadStatusExt.cpp        |  2 ++
 src/Interpreters/executeQuery.cpp           |  9 ++-------
 src/Server/MySQLHandler.cpp                 |  6 +++++-
 src/Server/PostgreSQLHandler.cpp            |  7 ++++++-
 src/Storages/StorageReplicatedMergeTree.cpp |  2 +-
 10 files changed, 26 insertions(+), 34 deletions(-)

diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h
index 876cbd8a66b..7ab57ea7fab 100644
--- a/src/Common/CurrentThread.h
+++ b/src/Common/CurrentThread.h
@@ -63,9 +63,6 @@ public:
     /// Call from master thread as soon as possible (e.g. when thread accepted connection)
     static void initializeQuery();
 
-    /// Sets query_context for current thread group
-    static void attachQueryContext(Context & query_context);
-
     /// You must call one of these methods when create a query child thread:
     /// Add current thread to a group associated with the thread group
     static void attachTo(const ThreadGroupStatusPtr & thread_group);
@@ -99,6 +96,10 @@ public:
 
 private:
     static void defaultThreadDeleter();
+
+    /// Sets query_context for current thread group
+    /// Can by used only through QueryScope
+    static void attachQueryContext(Context & query_context);
 };
 
 }
diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp
index 5105fff03b2..f2256fbf192 100644
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@@ -99,6 +99,9 @@ ThreadStatus::~ThreadStatus()
         /// We've already allocated a little bit more than the limit and cannot track it in the thread memory tracker or its parent.
     }
 
+    /// It may cause segfault if query_context was destroyed, but was not detached
+    assert((!query_context && query_id.empty()) || (query_id == query_context->getCurrentQueryId()));
+
     if (deleter)
         deleter();
     current_thread = nullptr;
diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h
index 1be1f2cd4df..dc5f09c5f3d 100644
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@@ -201,7 +201,7 @@ public:
     void setFatalErrorCallback(std::function<void()> callback);
     void onFatalError();
 
-    /// Sets query context for current thread and its thread group
+    /// Sets query context for current master thread and its thread group
     /// NOTE: query_context have to be alive until detachQuery() is called
     void attachQueryContext(Context & query_context);
 
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 6dab60f57d1..d99e49dce2d 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -609,12 +609,14 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
     ReadBufferFromString istr(query_to_execute);
     String dummy_string;
     WriteBufferFromString ostr(dummy_string);
+    std::optional<CurrentThread::QueryScope> query_scope;
 
     try
     {
         auto current_context = std::make_unique<Context>(context);
         current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
         current_context->setCurrentQueryId(""); // generate random query_id
+        query_scope.emplace(*current_context);
         executeQuery(istr, ostr, false, *current_context, {});
     }
     catch (...)
@@ -631,20 +633,6 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
     return true;
 }
 
-void DDLWorker::attachToThreadGroup()
-{
-    if (thread_group)
-    {
-        /// Put all threads to one thread pool
-        CurrentThread::attachToIfDetached(thread_group);
-    }
-    else
-    {
-        CurrentThread::initializeQuery();
-        thread_group = CurrentThread::getGroup();
-    }
-}
-
 
 void DDLWorker::enqueueTask(DDLTaskPtr task_ptr)
 {
@@ -1124,8 +1112,6 @@ void DDLWorker::runMainThread()
     {
         try
         {
-            attachToThreadGroup();
-
             cleanup_event->set();
             scheduleTasks();
 
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 9a4e55dcfc4..afc4a4f9794 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -161,8 +161,6 @@ private:
     void runMainThread();
     void runCleanupThread();
 
-    void attachToThreadGroup();
-
 private:
     Context context;
     Poco::Logger * log;
@@ -195,8 +193,6 @@ private:
     /// How many tasks could be in the queue
     size_t max_tasks_in_queue = 1000;
 
-    ThreadGroupStatusPtr thread_group;
-
     std::atomic<UInt64> max_id = 0;
 
     friend class DDLQueryStatusInputStream;
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index 61322cabfb3..8a979721290 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -500,6 +500,8 @@ CurrentThread::QueryScope::QueryScope(Context & query_context)
 {
     CurrentThread::initializeQuery();
     CurrentThread::attachQueryContext(query_context);
+    if (!query_context.hasQueryContext())
+        query_context.makeQueryContext();
 }
 
 void CurrentThread::QueryScope::logPeakMemoryUsage()
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 7003e6f5ee9..770e6e65d24 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -326,13 +326,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 {
     const auto current_time = std::chrono::system_clock::now();
 
-    /// If we already executing query and it requires to execute internal query, than
-    /// don't replace thread context with given (it can be temporary). Otherwise, attach context to thread.
-    if (!internal)
-    {
-        context.makeQueryContext();
-        CurrentThread::attachQueryContext(context);
-    }
+    assert(internal || CurrentThread::get().getQueryContext());
+    assert(internal || CurrentThread::get().getQueryContext()->getCurrentQueryId() == CurrentThread::getQueryId());
 
     const Settings & settings = context.getSettingsRef();
 
diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp
index 63a48fde1a7..f660d97cdc6 100644
--- a/src/Server/MySQLHandler.cpp
+++ b/src/Server/MySQLHandler.cpp
@@ -24,6 +24,7 @@
 #include <regex>
 #include <Access/User.h>
 #include <Access/AccessControlManager.h>
+#include <Common/setThreadName.h>
 
 #if !defined(ARCADIA_BUILD)
 #    include <Common/config_version.h>
@@ -86,6 +87,8 @@ MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & so
 
 void MySQLHandler::run()
 {
+    setThreadName("MySQLHandler");
+    ThreadStatus thread_status;
     connection_context.makeSessionContext();
     connection_context.getClientInfo().interface = ClientInfo::Interface::MYSQL;
     connection_context.setDefaultFormat("MySQLWire");
@@ -339,8 +342,9 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
 
             affected_rows += progress.written_rows;
         });
+        CurrentThread::QueryScope query_scope{query_context};
 
-        executeQuery(should_replace ? replacement : payload, *out, true, query_context,
+        executeQuery(should_replace ? replacement : payload, *out, false, query_context,
             [&with_output](const String &, const String &, const String &, const String &)
             {
                 with_output = true;
diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp
index 2bce5abcd11..b3a3bbf2aaa 100644
--- a/src/Server/PostgreSQLHandler.cpp
+++ b/src/Server/PostgreSQLHandler.cpp
@@ -5,6 +5,7 @@
 #include <Interpreters/executeQuery.h>
 #include "PostgreSQLHandler.h"
 #include <Parsers/parseQuery.h>
+#include <Common/setThreadName.h>
 #include <random>
 
 #if !defined(ARCADIA_BUILD)
@@ -49,6 +50,8 @@ void PostgreSQLHandler::changeIO(Poco::Net::StreamSocket & socket)
 
 void PostgreSQLHandler::run()
 {
+    setThreadName("PostgresHandler");
+    ThreadStatus thread_status;
     connection_context.makeSessionContext();
     connection_context.getClientInfo().interface = ClientInfo::Interface::POSTGRESQL;
     connection_context.setDefaultFormat("PostgreSQLWire");
@@ -273,8 +276,10 @@ void PostgreSQLHandler::processQuery()
 
         for (const auto & spl_query : queries)
         {
+            /// FIXME why do we execute all queries in a single connection context?
+            CurrentThread::QueryScope query_scope{connection_context};
             ReadBufferFromString read_buf(spl_query);
-            executeQuery(read_buf, *out, true, connection_context, {});
+            executeQuery(read_buf, *out, false, connection_context, {});
 
             PostgreSQLProtocol::Messaging::CommandComplete::Command command =
                 PostgreSQLProtocol::Messaging::CommandComplete::classifyQuery(spl_query);
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 70e90e9706a..baa50804db3 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -3680,7 +3680,7 @@ void StorageReplicatedMergeTree::shutdown()
 
     /// We clear all old parts after stopping all background operations. It's
     /// important, because background operations can produce temporary parts
-    /// which will remove themselves in their descrutors. If so, we may have
+    /// which will remove themselves in their destrutors. If so, we may have
     /// race condition between our remove call and background process.
     clearOldPartsFromFilesystem(true);
 }

From e612ad2babcb91e3f3069b1f3cbfce1dc5ae1b20 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Thu, 28 Jan 2021 17:13:41 +0300
Subject: [PATCH 0315/1238] minor fixes

---
 docs/en/sql-reference/functions/other-functions.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index be16f76ff59..c8f2c0ad495 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -195,7 +195,7 @@ Use case: suppose you have a service that stores data for multiple clients in on
 **Syntax**
 
 ```sql
-byteSize(arguments)
+byteSize(argument [, ...])
 ```
 
 **Parameters**
@@ -290,7 +290,6 @@ SELECT byteSize(NULL, 1, 0.3, '');
 Result:
 
 ```text
-
 ┌─byteSize(NULL, 1, 0.3, '')─┐
 │                         19 │
 └────────────────────────────┘

From 7db0766192ced901b343224282f65156647f1443 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Thu, 28 Jan 2021 17:40:50 +0300
Subject: [PATCH 0316/1238] fix flaky checks

---
 tests/integration/test_ttl_replicated/test.py | 20 +++++++++----------
 .../0_stateless/01280_ttl_where_group_by.sh   |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py
index 84b3340925c..389e249790f 100644
--- a/tests/integration/test_ttl_replicated/test.py
+++ b/tests/integration/test_ttl_replicated/test.py
@@ -340,15 +340,15 @@ def test_ttl_empty_parts(started_cluster):
     [(node1, node2, 0), (node3, node4, 1), (node5, node6, 2)]
 )
 def test_ttl_compatibility(started_cluster, node_left, node_right, num_run):
-    drop_table([node_left, node_right], "test_ttl")
+    drop_table([node_left, node_right], "test_ttl_delete")
     drop_table([node_left, node_right], "test_ttl_group_by")
     drop_table([node_left, node_right], "test_ttl_where")
 
     for node in [node_left, node_right]:
         node.query(
             '''
-                CREATE TABLE test_ttl(date DateTime, id UInt32)
-                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_{suff}', '{replica}')
+                CREATE TABLE test_ttl_delete(date DateTime, id UInt32)
+                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_delete_{suff}', '{replica}')
                 ORDER BY id PARTITION BY toDayOfMonth(date)
                 TTL date + INTERVAL 3 SECOND
             '''.format(suff=num_run, replica=node.name))
@@ -369,10 +369,10 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run):
                 TTL date + INTERVAL 3 SECOND DELETE WHERE id % 2 = 1
             '''.format(suff=num_run, replica=node.name))
 
-    node_left.query("INSERT INTO test_ttl VALUES (now(), 1)")
-    node_left.query("INSERT INTO test_ttl VALUES (toDateTime('2100-10-11 10:00:00'), 2)")
-    node_right.query("INSERT INTO test_ttl VALUES (now(), 3)")
-    node_right.query("INSERT INTO test_ttl VALUES (toDateTime('2100-10-11 10:00:00'), 4)")
+    node_left.query("INSERT INTO test_ttl_delete VALUES (now(), 1)")
+    node_left.query("INSERT INTO test_ttl_delete VALUES (toDateTime('2100-10-11 10:00:00'), 2)")
+    node_right.query("INSERT INTO test_ttl_delete VALUES (now(), 3)")
+    node_right.query("INSERT INTO test_ttl_delete VALUES (toDateTime('2100-10-11 10:00:00'), 4)")
 
     node_left.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 1)")
     node_left.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 2)")
@@ -392,12 +392,12 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run):
     
     time.sleep(5) # Wait for TTL
 
-    node_right.query("OPTIMIZE TABLE test_ttl FINAL")
+    node_right.query("OPTIMIZE TABLE test_ttl_delete FINAL")
     node_right.query("OPTIMIZE TABLE test_ttl_group_by FINAL")
     node_right.query("OPTIMIZE TABLE test_ttl_where FINAL")
 
-    assert node_left.query("SELECT id FROM test_ttl ORDER BY id") == "2\n4\n"
-    assert node_right.query("SELECT id FROM test_ttl ORDER BY id") == "2\n4\n"
+    assert node_left.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n"
+    assert node_right.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n"
 
     assert node_left.query("SELECT val FROM test_ttl_group_by ORDER BY id") == "10\n"
     assert node_right.query("SELECT val FROM test_ttl_group_by ORDER BY id") == "10\n"
diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.sh b/tests/queries/0_stateless/01280_ttl_where_group_by.sh
index a83956b7c3d..9f30c7c5872 100755
--- a/tests/queries/0_stateless/01280_ttl_where_group_by.sh
+++ b/tests/queries/0_stateless/01280_ttl_where_group_by.sh
@@ -52,7 +52,7 @@ $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_3"
 
 echo "ttl_01280_3"
 $CLICKHOUSE_CLIENT -n --query "
-create table ttl_01280_3 (a Int, b Int, x Int64, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set x = argMax(x, d), y = argMax(y, d), d = max(d);
+create table ttl_01280_3 (a Int, b Int, x Int64, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set b = min(b), x = argMax(x, d), y = argMax(y, d), d = max(d);
 insert into ttl_01280_3 values (1, 1, 0, 4, now() + 10);
 insert into ttl_01280_3 values (1, 1, 10, 6, now() + 1);
 insert into ttl_01280_3 values (1, 2, 3, 7, now());

From 10460313ad5406e3c49d05d7e5b4cb6281c96e90 Mon Sep 17 00:00:00 2001
From: tavplubix <avtokmakov@yandex-team.ru>
Date: Thu, 28 Jan 2021 17:48:09 +0300
Subject: [PATCH 0317/1238] Update StorageReplicatedMergeTree.cpp

---
 src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index baa50804db3..49d90e026cb 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -3680,7 +3680,7 @@ void StorageReplicatedMergeTree::shutdown()
 
     /// We clear all old parts after stopping all background operations. It's
     /// important, because background operations can produce temporary parts
-    /// which will remove themselves in their destrutors. If so, we may have
+    /// which will remove themselves in their destructors. If so, we may have
     /// race condition between our remove call and background process.
     clearOldPartsFromFilesystem(true);
 }

From 70963e439666be6ecd8a55d3399e344671abc721 Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Thu, 28 Jan 2021 17:57:57 +0300
Subject: [PATCH 0318/1238] Fixed test_kafka_flush_by_time test.

---
 .../configs/kafka_macros.xml                  | 13 ----------
 tests/integration/test_storage_kafka/test.py  | 26 ++++++++++++++-----
 2 files changed, 19 insertions(+), 20 deletions(-)
 delete mode 100644 tests/integration/test_storage_kafka/configs/kafka_macros.xml

diff --git a/tests/integration/test_storage_kafka/configs/kafka_macros.xml b/tests/integration/test_storage_kafka/configs/kafka_macros.xml
deleted file mode 100644
index 7f6cfb5eb1f..00000000000
--- a/tests/integration/test_storage_kafka/configs/kafka_macros.xml
+++ /dev/null
@@ -1,13 +0,0 @@
-<?xml version="1.0" ?>
-<yandex>
-	<macros>
-        <kafka_broker>kafka1</kafka_broker>
-        <kafka_topic_old>old</kafka_topic_old>
-        <kafka_group_name_old>old</kafka_group_name_old>
-
-        <kafka_topic_new>new</kafka_topic_new>
-        <kafka_group_name_new>new</kafka_group_name_new>
-        <kafka_client_id>instance</kafka_client_id>
-        <kafka_format_json_each_row>JSONEachRow</kafka_format_json_each_row>
-	</macros>
-</yandex>
\ No newline at end of file
diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py
index 1f31cbdbbc7..5f2726832cc 100644
--- a/tests/integration/test_storage_kafka/test.py
+++ b/tests/integration/test_storage_kafka/test.py
@@ -39,9 +39,16 @@ from . import social_pb2
 
 cluster = ClickHouseCluster(__file__)
 instance = cluster.add_instance('instance',
-                                main_configs=['configs/kafka.xml', 'configs/log_conf.xml', 'configs/kafka_macros.xml'],
+                                main_configs=['configs/kafka.xml', 'configs/log_conf.xml'],
                                 with_kafka=True,
                                 with_zookeeper=True,
+                                macros={"kafka_broker":"kafka1",
+                                        "kafka_topic_old":"old",
+                                        "kafka_group_name_old":"old",
+                                        "kafka_topic_new":"new",
+                                        "kafka_group_name_new":"new",
+                                        "kafka_client_id":"instance",
+                                        "kafka_format_json_each_row":"JSONEachRow"},
                                 clickhouse_path_dir='clickhouse_path')
 kafka_id = ''
 
@@ -1732,6 +1739,11 @@ def test_kafka_produce_key_timestamp(kafka_cluster):
 
 @pytest.mark.timeout(600)
 def test_kafka_flush_by_time(kafka_cluster):
+    admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092")
+    topic_list = []
+    topic_list.append(NewTopic(name="flush_by_time", num_partitions=1, replication_factor=1))
+    admin_client.create_topics(new_topics=topic_list, validate_only=False)
+
     instance.query('''
         DROP TABLE IF EXISTS test.view;
         DROP TABLE IF EXISTS test.consumer;
@@ -1771,7 +1783,7 @@ def test_kafka_flush_by_time(kafka_cluster):
 
     time.sleep(18)
 
-    result = instance.query('SELECT uniqExact(ts) = 2, count() > 15 FROM test.view')
+    result = instance.query('SELECT uniqExact(ts) = 2, count() >= 15 FROM test.view')
 
     cancel.set()
     kafka_thread.join()
@@ -2357,9 +2369,9 @@ def test_premature_flush_on_eof(kafka_cluster):
     ''')
 
     # messages created here will be consumed immedeately after MV creation
-    # reaching topic EOF. 
+    # reaching topic EOF.
     # But we should not do flush immedeately after reaching EOF, because
-    # next poll can return more data, and we should respect kafka_flush_interval_ms 
+    # next poll can return more data, and we should respect kafka_flush_interval_ms
     # and try to form bigger block
     messages = [json.dumps({'key': j + 1, 'value': j + 1}) for j in range(1)]
     kafka_produce('premature_flush_on_eof', messages)
@@ -2379,11 +2391,11 @@ def test_premature_flush_on_eof(kafka_cluster):
 
     # all subscriptions/assignments done during select, so it start sending data to test.destination
     # immediately after creation of MV
-    
+
     time.sleep(1.5) # that sleep is needed to ensure that first poll finished, and at least one 'empty' polls happened.
                   # Empty poll before the fix were leading to premature flush.
-                  # TODO: wait for messages in log: "Polled batch of 1 messages", followed by "Stalled"  
-    
+                  # TODO: wait for messages in log: "Polled batch of 1 messages", followed by "Stalled"
+
     # produce more messages after delay
     kafka_produce('premature_flush_on_eof', messages)
 

From c1d18fc8a258ba41f6abc67c23fdbbcfb2ec40a1 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 28 Jan 2021 18:10:53 +0300
Subject: [PATCH 0319/1238] Fix very rare case

---
 .../ReplicatedMergeTreePartCheckThread.cpp    | 40 +++++++++++++------
 .../ReplicatedMergeTreePartCheckThread.h      |  5 ++-
 2 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
index c9175ad14db..44ee3a789e0 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
@@ -165,14 +165,13 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP
     return MissingPartSearchResult::LostForever;
 }
 
-void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible(const String & part_name)
+void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper)
 {
     auto zookeeper = storage.getZooKeeper();
-    String part_path = storage.replica_path + "/parts/" + part_name;
-
     auto missing_part_search_result = searchForMissingPartOnOtherReplicas(part_name);
+
     /// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue.
-    if (zookeeper->exists(part_path))
+    if (exists_in_zookeeper)
     {
         /// If part found on some other replica
         if (missing_part_search_result == MissingPartSearchResult::FoundAndNeedFetch)
@@ -210,11 +209,18 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible(
     }
 }
 
-
-CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
+std::pair<bool, MergeTreeDataPartPtr> ReplicatedMergeTreePartCheckThread::findLocalPart(const String & part_name)
 {
-    LOG_WARNING(log, "Checking part {}", part_name);
-    ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks);
+    auto zookeeper = storage.getZooKeeper();
+    String part_path = storage.replica_path + "/parts/" + part_name;
+
+    /// It's important to check zookeeper first and after that check local storage,
+    /// because our checks of local storage and zookeeper are not consistent.
+    /// If part exists in zookeeper and doesn't exists in local storage definetely require
+    /// to fetch this part. But if we check local storage first and than check zookeeper
+    /// some background process can successfuly commit part between this checks (both to the local stoarge and zookeeper),
+    /// but checker thread will remove part from zookeeper and queue fetch.
+    bool exists_in_zookeeper = zookeeper->exists(part_path);
 
     /// If the part is still in the PreCommitted -> Committed transition, it is not lost
     /// and there is no need to go searching for it on other replicas. To definitely find the needed part
@@ -223,17 +229,27 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
     if (!part)
         part = storage.getActiveContainingPart(part_name);
 
+    return std::make_pair(exists_in_zookeeper, part);
+}
+
+CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
+{
+    LOG_WARNING(log, "Checking part {}", part_name);
+    ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks);
+
+    auto [exists_in_zookeeper, part] = findLocalPart(part_name);
+
     /// We do not have this or a covering part.
     if (!part)
     {
-        searchForMissingPartAndFetchIfPossible(part_name);
+        searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper);
         return {part_name, false, "Part is missing, will search for it"};
     }
+
     /// We have this part, and it's active. We will check whether we need this part and whether it has the right data.
-    else if (part->name == part_name)
+    if (part->name == part_name)
     {
         auto zookeeper = storage.getZooKeeper();
-
         auto table_lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations);
 
         auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(
@@ -291,7 +307,7 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
                 LOG_ERROR(log, message);
 
                 /// Part is broken, let's try to find it and fetch.
-                searchForMissingPartAndFetchIfPossible(part_name);
+                searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper);
 
                 /// Delete part locally.
                 storage.forgetPartAndMoveToDetached(part, "broken");
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
index d43a9a02237..8257898fe3f 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
@@ -12,6 +12,7 @@
 #include <common/logger_useful.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <Storages/CheckResults.h>
+#include <Storages/MergeTree/IMergeTreeDataPart.h>
 
 namespace DB
 {
@@ -75,7 +76,9 @@ private:
 
     /// Search for missing part and queue fetch if possible. Otherwise
     /// remove part from zookeeper and queue.
-    void searchForMissingPartAndFetchIfPossible(const String & part_name);
+    void searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper);
+
+    std::pair<bool, MergeTreeDataPartPtr> findLocalPart(const String & part_name);
 
     enum MissingPartSearchResult
     {

From 817df99d119b827be73d4cfc2bfea8cc8716fc68 Mon Sep 17 00:00:00 2001
From: tavplubix <avtokmakov@yandex-team.ru>
Date: Thu, 28 Jan 2021 18:29:21 +0300
Subject: [PATCH 0320/1238] Update FieldVisitorsAccurateComparison.h

---
 src/Common/FieldVisitorsAccurateComparison.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/FieldVisitorsAccurateComparison.h b/src/Common/FieldVisitorsAccurateComparison.h
index 91fa4bf28de..ca2f5a62fae 100644
--- a/src/Common/FieldVisitorsAccurateComparison.h
+++ b/src/Common/FieldVisitorsAccurateComparison.h
@@ -53,7 +53,7 @@ public:
                 if constexpr (std::is_arithmetic_v<U>)
                 {
                     ReadBufferFromString in(l);
-                    T parsed;
+                    U parsed;
                     readText(parsed, in);
                     return operator()(parsed, r);
                 }

From c0ac1444cb8c9c4b22663b5aac8da2215bb396b5 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Thu, 28 Jan 2021 23:33:17 +0800
Subject: [PATCH 0321/1238] adapting to mawk

---
 tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index 6376040fcc5..3aca8a9980a 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # Data preparation.
 # Now we can get the user_files_path by use the table file function for trick. also we can get it by query as:
 # "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')"
-user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | /usr/bin/gawk '{match($0,"File (.*)/nonexist.txt",path); print path[1]}')
+user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
 mkdir -p ${user_files_path}/
 echo -n aaaaaaaaa > ${user_files_path}/a.txt
 echo -n bbbbbbbbb > ${user_files_path}/b.txt

From 37455304e7c5b4f802ea62ad786b4c70a2e6b3b4 Mon Sep 17 00:00:00 2001
From: tavplubix <avtokmakov@yandex-team.ru>
Date: Thu, 28 Jan 2021 18:51:01 +0300
Subject: [PATCH 0322/1238] Update FieldVisitorsAccurateComparison.h

---
 src/Common/FieldVisitorsAccurateComparison.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/FieldVisitorsAccurateComparison.h b/src/Common/FieldVisitorsAccurateComparison.h
index ca2f5a62fae..84099eafb0f 100644
--- a/src/Common/FieldVisitorsAccurateComparison.h
+++ b/src/Common/FieldVisitorsAccurateComparison.h
@@ -113,7 +113,7 @@ public:
                 if constexpr (std::is_arithmetic_v<U>)
                 {
                     ReadBufferFromString in(l);
-                    T parsed;
+                    U parsed;
                     readText(parsed, in);
                     return operator()(parsed, r);
                 }

From be1104d6c1b4122b87268bed50270ef7e473f034 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 28 Jan 2021 19:07:47 +0300
Subject: [PATCH 0323/1238] Fix typos

---
 src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
index 44ee3a789e0..a306547d843 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
@@ -216,9 +216,9 @@ std::pair<bool, MergeTreeDataPartPtr> ReplicatedMergeTreePartCheckThread::findLo
 
     /// It's important to check zookeeper first and after that check local storage,
     /// because our checks of local storage and zookeeper are not consistent.
-    /// If part exists in zookeeper and doesn't exists in local storage definetely require
+    /// If part exists in zookeeper and doesn't exists in local storage definitely require
     /// to fetch this part. But if we check local storage first and than check zookeeper
-    /// some background process can successfuly commit part between this checks (both to the local stoarge and zookeeper),
+    /// some background process can successfully commit part between this checks (both to the local stoarge and zookeeper),
     /// but checker thread will remove part from zookeeper and queue fetch.
     bool exists_in_zookeeper = zookeeper->exists(part_path);
 

From 8b3e0e5493ff5347ba7d51a71a5ae36ca4ba2d40 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Thu, 28 Jan 2021 19:15:39 +0300
Subject: [PATCH 0324/1238] Update 00459_group_array_insert_at.sql

---
 tests/queries/0_stateless/00459_group_array_insert_at.sql | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/queries/0_stateless/00459_group_array_insert_at.sql b/tests/queries/0_stateless/00459_group_array_insert_at.sql
index 8bc329d9103..59ecfc05045 100644
--- a/tests/queries/0_stateless/00459_group_array_insert_at.sql
+++ b/tests/queries/0_stateless/00459_group_array_insert_at.sql
@@ -1,4 +1,3 @@
--- remove this comment before merge
 SELECT groupArrayInsertAt(toString(number), number * 2) FROM (SELECT * FROM system.numbers LIMIT 10);
 SELECT groupArrayInsertAt('-')(toString(number), number * 2) FROM (SELECT * FROM system.numbers LIMIT 10);
 SELECT groupArrayInsertAt([123])(range(number), number * 2) FROM (SELECT * FROM system.numbers LIMIT 10);

From e9b570aceef54b0b169ae60a1984ba98c6c16d5c Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Thu, 28 Jan 2021 19:15:56 +0300
Subject: [PATCH 0325/1238] Update 01014_lazy_database_basic.sh

---
 tests/queries/0_stateless/01014_lazy_database_basic.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/queries/0_stateless/01014_lazy_database_basic.sh b/tests/queries/0_stateless/01014_lazy_database_basic.sh
index 76cbcea6d97..11d698e764e 100755
--- a/tests/queries/0_stateless/01014_lazy_database_basic.sh
+++ b/tests/queries/0_stateless/01014_lazy_database_basic.sh
@@ -4,8 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-# remove this comment before merge
-
 ${CLICKHOUSE_CLIENT} -n -q "DROP DATABASE IF EXISTS testlazy"
 
 ${CLICKHOUSE_CLIENT} -n -q "

From f6218714653c30d8e48e067fd16ea330fd7b4af0 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Thu, 28 Jan 2021 19:17:46 +0300
Subject: [PATCH 0326/1238] Fixed race in tests

---
 src/Functions/FunctionsExternalDictionaries.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index cf8eb2f90df..3c955ffbcf0 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -292,8 +292,6 @@ public:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
     {
-        std::cerr << "FunctionDictGetNoType::executeImpl " << this << std::endl;
-
         if (input_rows_count == 0)
             return result_type->createColumn();
 

From 3ace39fbf7b22132219d238908744790e758a941 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 28 Jan 2021 20:05:01 +0300
Subject: [PATCH 0327/1238] simplify a loop and make RANGE frame the default

---
 src/Interpreters/AggregateDescription.h       |  4 +-
 src/Parsers/ExpressionElementParsers.cpp      |  1 -
 src/Processors/Transforms/WindowTransform.cpp | 86 +++++++---------
 src/Processors/Transforms/WindowTransform.h   |  6 +-
 .../01591_window_functions.reference          | 98 ++++++++++---------
 .../0_stateless/01591_window_functions.sql    | 84 ++++++++--------
 6 files changed, 127 insertions(+), 152 deletions(-)

diff --git a/src/Interpreters/AggregateDescription.h b/src/Interpreters/AggregateDescription.h
index 7f286b9c763..ce05a6600bd 100644
--- a/src/Interpreters/AggregateDescription.h
+++ b/src/Interpreters/AggregateDescription.h
@@ -46,10 +46,10 @@ struct WindowFrame
 
     // This flag signifies that the frame properties were not set explicitly by
     // user, but the fields of this structure still have to contain proper values
-    // for the default frame of ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW.
+    // for the default frame of RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW.
     bool is_default = true;
 
-    FrameType type = FrameType::Rows;
+    FrameType type = FrameType::Range;
 
     /*
      * We don't need these yet.
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 123e68e32ed..c337b4bd80c 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -561,7 +561,6 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
         {
             return false;
         }
-
     }
     else
     {
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 2ed622937c3..6ddecd0b30c 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -263,68 +263,48 @@ void WindowTransform::advanceFrameEnd()
     // The only frame end we have for now is CURRENT ROW.
     advanceFrameEndCurrentRow();
 
+    // We might not have advanced the frame end if we found out we reached the
+    // end of input or the partition, or if we still don't know the frame start.
+    if (frame_end_before == frame_end)
+    {
+        return;
+    }
+
     // Add the columns over which we advanced the frame to the aggregate function
     // states.
-    std::vector<const IColumn *> argument_columns;
+    // We could have advanced over at most the entire last block.
+    uint64_t last_row = frame_end.row;
+    if (frame_end.row == 0)
+    {
+        assert(frame_end == blocksEnd());
+        last_row = blockRowsNumber(frame_end_before);
+    }
+    else
+    {
+        assert(frame_end_before.block == frame_end.block);
+    }
+    assert(frame_end_before.row < last_row);
+
     for (auto & ws : workspaces)
     {
-        const auto & f = ws.window_function;
-        const auto * a = f.aggregate_function.get();
-        auto * buf = ws.aggregate_function_state.data();
-
-        // FIXME we don't need these complex loops, because frame_end advances
-        // by one block at most.
-        // We use two explicit loops here instead of using advanceRowNumber(),
-        // because we want to cache the argument columns array per block. Later
-        // we also use batch add.
-        // Unfortunately this leads to tricky loop conditions, because the
-        // frame_end might be either a past-the-end block, or a valid block, in
-        // which case we also have to process its head.
-        // And we also have to remember to reset the row number when moving to
-        // the next block.
-
-        uint64_t past_the_end_block;
-        // Note that the past-the-end row is not in the past-the-end block, but
-        // in the block before it.
-        uint64_t past_the_end_row;
-
-        if (frame_end.block < first_block_number + blocks.size())
+        if (frame_end_before.block != ws.cached_block_number)
         {
-            // The past-the-end row is in some valid block.
-            past_the_end_block = frame_end.block + 1;
-            past_the_end_row = frame_end.row;
-        }
-        else
-        {
-            // The past-the-end row is at the total end of data.
-            past_the_end_block = first_block_number + blocks.size();
-            // It's in the previous block!
-            past_the_end_row = blocks.back().numRows();
-        }
-        for (auto r = frame_end_before;
-            r.block < past_the_end_block;
-            ++r.block, r.row = 0)
-        {
-            const auto & block = blocks[r.block - first_block_number];
-
-            argument_columns.clear();
+            const auto & block
+                = blocks[frame_end_before.block - first_block_number];
+            ws.argument_columns.clear();
             for (const auto i : ws.argument_column_indices)
             {
-                argument_columns.push_back(block.input_columns[i].get());
+                ws.argument_columns.push_back(block.input_columns[i].get());
             }
+            ws.cached_block_number = frame_end_before.block;
+        }
 
-            // We process all rows of intermediate blocks, and the head of the
-            // last block.
-            const auto end = ((r.block + 1) == past_the_end_block)
-                ? past_the_end_row
-                : block.numRows();
-            for (; r.row < end; ++r.row)
-            {
-                a->add(buf,
-                    argument_columns.data(),
-                    r.row,
-                    arena.get());
-            }
+        const auto * a = ws.window_function.aggregate_function.get();
+        auto * buf = ws.aggregate_function_state.data();
+        auto * columns = ws.argument_columns.data();
+        for (auto row = frame_end_before.row; row < last_row; ++row)
+        {
+            a->add(buf, columns, row, arena.get());
         }
     }
 }
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 0699f7cde6e..ab9b548dbc9 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -22,11 +22,9 @@ struct WindowFunctionWorkspace
     AlignedBuffer aggregate_function_state;
     std::vector<size_t> argument_column_indices;
 
-    /*
-    // Argument and result columns. Be careful, they are per-chunk.
+    // Argument columns. Be careful, this is a per-block cache.
     std::vector<const IColumn *> argument_columns;
-    MutableColumnPtr result_column;
-    */
+    uint64_t cached_block_number = std::numeric_limits<uint64_t>::max();
 };
 
 struct WindowTransformBlock
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 8d6adad5e3d..7283f5246da 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -2,7 +2,7 @@
 
 set allow_experimental_window_functions = 1;
 -- just something basic
-select number, count() over (partition by intDiv(number, 3) order by number) from numbers(10);
+select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10);
 0	1
 1	2
 2	3
@@ -14,7 +14,7 @@ select number, count() over (partition by intDiv(number, 3) order by number) fro
 8	3
 9	1
 -- proper calculation across blocks
-select number, max(number) over (partition by intDiv(number, 3) order by number desc) from numbers(10) settings max_block_size = 2;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2;
 2	2
 1	2
 0	2
@@ -26,9 +26,9 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 6	8
 9	9
 -- not a window function
-select number, abs(number) over (partition by toString(intDiv(number, 3))) from numbers(10); -- { serverError 63 }
+select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 }
 -- no partition by
-select number, avg(number) over (order by number) from numbers(10);
+select number, avg(number) over (order by number rows unbounded preceding) from numbers(10);
 0	0
 1	0.5
 2	1
@@ -40,7 +40,7 @@ select number, avg(number) over (order by number) from numbers(10);
 8	4
 9	4.5
 -- no order by
-select number, quantileExact(number) over (partition by intDiv(number, 3)) from numbers(10);
+select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10);
 0	0
 1	1
 2	1
@@ -52,7 +52,7 @@ select number, quantileExact(number) over (partition by intDiv(number, 3)) from
 8	7
 9	9
 -- can add an alias after window spec
-select number, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10);
+select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10);
 0	0
 1	1
 2	1
@@ -65,14 +65,14 @@ select number, quantileExact(number) over (partition by intDiv(number, 3)) q fro
 9	9
 -- can't reference it yet -- the window functions are calculated at the
 -- last stage of select, after all other functions.
-select q * 10, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10); -- { serverError 47 }
+select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 }
 -- must work in WHERE if you wrap it in a subquery
-select * from (select count(*) over () c from numbers(3)) where c > 0;
+select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0;
 1
 2
 3
 -- should work in ORDER BY
-select number, max(number) over (partition by intDiv(number, 3) order by number desc) m from numbers(10) order by m desc, number;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number;
 9	9
 6	8
 7	8
@@ -84,14 +84,14 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 1	2
 2	2
 -- also works in ORDER BY if you wrap it in a subquery
-select * from (select count(*) over () c from numbers(3)) order by c;
+select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c;
 1
 2
 3
 -- Example with window function only in ORDER BY. Here we make a rank of all
 -- numbers sorted descending, and then sort by this rank descending, and must get
 -- the ascending order.
-select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc) desc;
+select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc;
 0
 1
 2
@@ -100,23 +100,23 @@ select * from (select * from numbers(5) order by rand()) order by count() over (
 -- Aggregate functions as window function arguments. This query is semantically
 -- the same as the above one, only we replace `number` with
 -- `any(number) group by number` and so on.
-select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc) desc;
+select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc;
 0
 1
 2
 3
 4
 -- some more simple cases w/aggregate functions
-select sum(any(number)) over () from numbers(1);
+select sum(any(number)) over (rows unbounded preceding) from numbers(1);
 0
-select sum(any(number) + 1) over () from numbers(1);
+select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1);
 1
-select sum(any(number + 1)) over () from numbers(1);
+select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1);
 1
 -- different windows
 -- an explain test would also be helpful, but it's too immature now and I don't
 -- want to change reference all the time
-select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 5) order by number) as m from numbers(31) order by number settings max_block_size = 2;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2;
 0	2	1
 1	2	2
 2	2	3
@@ -151,7 +151,7 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 -- two functions over the same window
 -- an explain test would also be helpful, but it's too immature now and I don't
 -- want to change reference all the time
-select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 3) order by number desc) as m from numbers(7) order by number settings max_block_size = 2;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2;
 0	2	3
 1	2	2
 2	2	1
@@ -163,22 +163,26 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 select median(x) over (partition by x) from (select 1 x);
 1
 -- an empty window definition is valid as well
-select groupArray(number) over () from numbers(3);
+select groupArray(number) over (rows unbounded preceding) from numbers(3);
 [0]
 [0,1]
 [0,1,2]
+select groupArray(number) over () from numbers(3);
+[0,1,2]
+[0,1,2]
+[0,1,2]
 -- This one tests we properly process the window  function arguments.
 -- Seen errors like 'column `1` not found' from count(1).
-select count(1) over (), max(number + 1) over () from numbers(3);
+select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3);
 1	3
 -- Should work in DISTINCT
-select distinct sum(0) over () from numbers(2);
+select distinct sum(0) over (rows unbounded preceding) from numbers(2);
 0
-select distinct any(number) over () from numbers(2);
+select distinct any(number) over (rows unbounded preceding) from numbers(2);
 0
 -- Various kinds of aliases are properly substituted into various parts of window
 -- function definition.
-with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x) from numbers(7);
+with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7);
 0	1
 0	3
 0	6
@@ -192,8 +196,8 @@ select 1 window w1 as ();
 select sum(number) over w1, sum(number) over w2
 from numbers(10)
 window
-    w1 as (),
-    w2 as (partition by intDiv(number, 3))
+    w1 as (rows unbounded preceding),
+    w2 as (partition by intDiv(number, 3) rows unbounded preceding)
 ;
 0	0
 1	1
@@ -205,12 +209,14 @@ window
 28	13
 36	21
 45	9
+-- FIXME both functions should use the same window, but they don't. Add an
+-- EXPLAIN test for this.
 select
     sum(number) over w1,
-    sum(number) over (partition by intDiv(number, 3))
+    sum(number) over (partition by intDiv(number, 3) rows unbounded preceding)
 from numbers(10)
 window
-    w1 as (partition by intDiv(number, 3))
+    w1 as (partition by intDiv(number, 3) rows unbounded preceding)
 ;
 0	0
 1	1
@@ -222,25 +228,17 @@ window
 13	13
 21	21
 9	9
--- ROWS frame
-select
-    sum(number)
-        over (order by number rows between unbounded preceding and current row)
-from numbers(3);
-0
-1
-3
---select
---    sum(number)
---        over (order by number groups between unbounded preceding and current row)
---from numbers(3);
-
 -- RANGE frame
+-- It's the default
+select sum(number) over () from numbers(3);
+3
+3
+3
 -- Try some mutually prime sizes of partition, group and block, for the number
--- of rows that is their least common multiple so that we see all the interesting
--- corner cases.
+-- of rows that is their least common multiple + 1, so that we see all the
+-- interesting corner cases.
 select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c
-from numbers(30)
+from numbers(31)
 window w as (partition by p order by o range unbounded preceding)
 order by number
 settings max_block_size = 5
@@ -275,8 +273,9 @@ settings max_block_size = 5
 27	9	1	3
 28	9	0	1
 29	9	1	3
+30	10	0	1
 select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c
-from numbers(30)
+from numbers(31)
 window w as (partition by p order by o range unbounded preceding)
 order by number
 settings max_block_size = 2
@@ -311,8 +310,9 @@ settings max_block_size = 2
 27	5	0	1
 28	5	1	3
 29	5	2	5
+30	6	0	1
 select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c
-from numbers(30)
+from numbers(31)
 window w as (partition by p order by o range unbounded preceding)
 order by number
 settings max_block_size = 3
@@ -347,8 +347,9 @@ settings max_block_size = 3
 27	5	1	5
 28	5	0	2
 29	5	1	5
+30	6	0	1
 select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c
-from numbers(30)
+from numbers(31)
 window w as (partition by p order by o range unbounded preceding)
 order by number
 settings max_block_size = 2
@@ -383,8 +384,9 @@ settings max_block_size = 2
 27	9	2	1
 28	9	3	2
 29	9	4	3
+30	10	0	1
 select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c
-from numbers(30)
+from numbers(31)
 window w as (partition by p order by o range unbounded preceding)
 order by number
 settings max_block_size = 3
@@ -419,8 +421,9 @@ settings max_block_size = 3
 27	13	2	2
 28	14	3	1
 29	14	4	2
+30	15	0	1
 select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c
-from numbers(30)
+from numbers(31)
 window w as (partition by p order by o range unbounded preceding)
 order by number
 settings max_block_size = 5
@@ -455,3 +458,4 @@ settings max_block_size = 5
 27	13	0	1
 28	14	1	1
 29	14	2	2
+30	15	0	1
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 2c0a978d07b..0baac535144 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -3,77 +3,78 @@
 set allow_experimental_window_functions = 1;
 
 -- just something basic
-select number, count() over (partition by intDiv(number, 3) order by number) from numbers(10);
+select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10);
 
 -- proper calculation across blocks
-select number, max(number) over (partition by intDiv(number, 3) order by number desc) from numbers(10) settings max_block_size = 2;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2;
 
 -- not a window function
-select number, abs(number) over (partition by toString(intDiv(number, 3))) from numbers(10); -- { serverError 63 }
+select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 }
 
 -- no partition by
-select number, avg(number) over (order by number) from numbers(10);
+select number, avg(number) over (order by number rows unbounded preceding) from numbers(10);
 
 -- no order by
-select number, quantileExact(number) over (partition by intDiv(number, 3)) from numbers(10);
+select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10);
 
 -- can add an alias after window spec
-select number, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10);
+select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10);
 
 -- can't reference it yet -- the window functions are calculated at the
 -- last stage of select, after all other functions.
-select q * 10, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10); -- { serverError 47 }
+select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 }
 
 -- must work in WHERE if you wrap it in a subquery
-select * from (select count(*) over () c from numbers(3)) where c > 0;
+select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0;
 
 -- should work in ORDER BY
-select number, max(number) over (partition by intDiv(number, 3) order by number desc) m from numbers(10) order by m desc, number;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number;
 
 -- also works in ORDER BY if you wrap it in a subquery
-select * from (select count(*) over () c from numbers(3)) order by c;
+select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c;
 
 -- Example with window function only in ORDER BY. Here we make a rank of all
 -- numbers sorted descending, and then sort by this rank descending, and must get
 -- the ascending order.
-select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc) desc;
+select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc;
 
 -- Aggregate functions as window function arguments. This query is semantically
 -- the same as the above one, only we replace `number` with
 -- `any(number) group by number` and so on.
-select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc) desc;
+select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc;
 -- some more simple cases w/aggregate functions
-select sum(any(number)) over () from numbers(1);
-select sum(any(number) + 1) over () from numbers(1);
-select sum(any(number + 1)) over () from numbers(1);
+select sum(any(number)) over (rows unbounded preceding) from numbers(1);
+select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1);
+select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1);
 
 -- different windows
 -- an explain test would also be helpful, but it's too immature now and I don't
 -- want to change reference all the time
-select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 5) order by number) as m from numbers(31) order by number settings max_block_size = 2;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2;
 
 -- two functions over the same window
 -- an explain test would also be helpful, but it's too immature now and I don't
 -- want to change reference all the time
-select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 3) order by number desc) as m from numbers(7) order by number settings max_block_size = 2;
+select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2;
 
 -- check that we can work with constant columns
 select median(x) over (partition by x) from (select 1 x);
 
 -- an empty window definition is valid as well
+select groupArray(number) over (rows unbounded preceding) from numbers(3);
 select groupArray(number) over () from numbers(3);
 
 -- This one tests we properly process the window  function arguments.
 -- Seen errors like 'column `1` not found' from count(1).
-select count(1) over (), max(number + 1) over () from numbers(3);
+select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3);
 
 -- Should work in DISTINCT
-select distinct sum(0) over () from numbers(2);
-select distinct any(number) over () from numbers(2);
+select distinct sum(0) over (rows unbounded preceding) from numbers(2);
+select distinct any(number) over (rows unbounded preceding) from numbers(2);
 
 -- Various kinds of aliases are properly substituted into various parts of window
 -- function definition.
-with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x) from numbers(7);
+with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7);
 
 -- WINDOW clause
 select 1 window w1 as ();
@@ -81,71 +82,64 @@ select 1 window w1 as ();
 select sum(number) over w1, sum(number) over w2
 from numbers(10)
 window
-    w1 as (),
-    w2 as (partition by intDiv(number, 3))
+    w1 as (rows unbounded preceding),
+    w2 as (partition by intDiv(number, 3) rows unbounded preceding)
 ;
 
+-- FIXME both functions should use the same window, but they don't. Add an
+-- EXPLAIN test for this.
 select
     sum(number) over w1,
-    sum(number) over (partition by intDiv(number, 3))
+    sum(number) over (partition by intDiv(number, 3) rows unbounded preceding)
 from numbers(10)
 window
-    w1 as (partition by intDiv(number, 3))
+    w1 as (partition by intDiv(number, 3) rows unbounded preceding)
 ;
 
--- ROWS frame
-select
-    sum(number)
-        over (order by number rows between unbounded preceding and current row)
-from numbers(3);
-
-
---select
---    sum(number)
---        over (order by number groups between unbounded preceding and current row)
---from numbers(3);
-
 -- RANGE frame
+-- It's the default
+select sum(number) over () from numbers(3);
+
 -- Try some mutually prime sizes of partition, group and block, for the number
--- of rows that is their least common multiple so that we see all the interesting
--- corner cases.
+-- of rows that is their least common multiple + 1, so that we see all the
+-- interesting corner cases.
 select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c
-from numbers(30)
+from numbers(31)
 window w as (partition by p order by o range unbounded preceding)
 order by number
 settings max_block_size = 5
 ;
 
 select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c
-from numbers(30)
+from numbers(31)
 window w as (partition by p order by o range unbounded preceding)
 order by number
 settings max_block_size = 2
 ;
 
 select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c
-from numbers(30)
+from numbers(31)
 window w as (partition by p order by o range unbounded preceding)
 order by number
 settings max_block_size = 3
 ;
 
 select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c
-from numbers(30)
+from numbers(31)
 window w as (partition by p order by o range unbounded preceding)
 order by number
 settings max_block_size = 2
 ;
 
 select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c
-from numbers(30)
+from numbers(31)
 window w as (partition by p order by o range unbounded preceding)
 order by number
 settings max_block_size = 3
 ;
 
 select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c
-from numbers(30)
+from numbers(31)
 window w as (partition by p order by o range unbounded preceding)
 order by number
 settings max_block_size = 5

From a121a5250b9e25988dd416741ff4de5ad0dfea5f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 28 Jan 2021 20:31:34 +0300
Subject: [PATCH 0328/1238] Add log comment when running .sh tests

---
 tests/clickhouse-test         | 3 +++
 tests/queries/shell_config.sh | 5 ++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index d6c2287ad46..0c6154f1662 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -135,6 +135,9 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
 
         os.environ["CLICKHOUSE_DATABASE"] = database
 
+    # This is for .sh tests
+    os.environ["CLICKHOUSE_LOG_COMMENT"] = test
+
     params = {
         'client': args.client + ' --database=' + database,
         'logs_level': server_logs_level,
diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh
index 0ca2cee3c77..6130bec52f1 100644
--- a/tests/queries/shell_config.sh
+++ b/tests/queries/shell_config.sh
@@ -10,7 +10,9 @@ export CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL
 [ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} "
 [ -v CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} "
 [ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
+[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_CLIENT_OPT0+=" --log_comment=${CLICKHOUSE_LOG_COMMENT} "
 [ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_BENCHMARK_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
+[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_BENCHMARK_OPT0+=" --log_comment=${CLICKHOUSE_LOG_COMMENT} "
 
 export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"}
 [ -x "$CLICKHOUSE_BINARY-client" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY-client}
@@ -52,13 +54,14 @@ export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:=$(${CLICKHOUSE_EXTRACT_CON
 export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:="8443"}
 export CLICKHOUSE_PORT_HTTP_PROTO=${CLICKHOUSE_PORT_HTTP_PROTO:="http"}
 
-# Add database to url params
+# Add database and log comment to url params
 if [ -v CLICKHOUSE_URL_PARAMS ]
 then
   export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&database=${CLICKHOUSE_DATABASE}"
 else
   export CLICKHOUSE_URL_PARAMS="database=${CLICKHOUSE_DATABASE}"
 fi
+[ -v CLICKHOUSE_LOG_COMMENT ] && export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&log_comment=${CLICKHOUSE_LOG_COMMENT}"
 
 export CLICKHOUSE_URL=${CLICKHOUSE_URL:="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/"}
 export CLICKHOUSE_URL_HTTPS=${CLICKHOUSE_URL_HTTPS:="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/"}

From b508fab8ca5f3fb5327a9d6f15b9e1ed0d8f5edf Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 28 Jan 2021 20:39:32 +0300
Subject: [PATCH 0329/1238] some docs

---
 .../sql-reference/window-functions/index.md   | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 docs/en/sql-reference/window-functions/index.md

diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
new file mode 100644
index 00000000000..a79328ade32
--- /dev/null
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -0,0 +1,38 @@
+# [development] Window Functions
+
+!!! warning "Warning"
+This is an experimental feature that is currently in development and is not ready
+for general use. It will change in unpredictable backwards-incompatible ways in
+the future releases.
+
+ClickHouse currently supports calculation of aggregate functions over a window.
+Pure window functions such as `rank`, `lag`, `lead` and so on are not yet supported.
+
+The window can be specified either with an `OVER` clause or with a separate
+`WINDOW` clause.
+
+Only two variants of frame are supported, `ROWS` and `RANGE`. The only supported
+frame boundaries are `ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW`.
+
+
+## References
+
+### GitHub Issues
+The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097).
+
+All GitHub issues related to window funtions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag.
+
+### Tests
+These tests contain the examples of the currently supported grammar:
+https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml
+https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql
+
+### Postgres Docs
+https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS
+https://www.postgresql.org/docs/devel/functions-window.html
+https://www.postgresql.org/docs/devel/tutorial-window.html
+
+### MySQL Docs
+https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
+https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html
+https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html

From 1b891e042d6d76445d5be0e6f127dc932a828920 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 28 Jan 2021 21:18:16 +0300
Subject: [PATCH 0330/1238] fix a bug

---
 src/Processors/Transforms/WindowTransform.cpp | 47 +++++++++++++++----
 .../01591_window_functions.reference          | 13 +++++
 .../0_stateless/01591_window_functions.sql    |  4 ++
 3 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 6ddecd0b30c..a4a0c0cd27b 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -211,6 +211,8 @@ bool WindowTransform::arePeers(const RowNumber & x, const RowNumber & y) const
 
 void WindowTransform::advanceFrameEndCurrentRow()
 {
+//    fmt::print(stderr, "starting from frame_end {}\n", frame_end);
+
     // We only process one block here, and frame_end must be already in it: if
     // we didn't find the end in the previous block, frame_end is now the first
     // row of the current block. We need this knowledge to write a simpler loop
@@ -229,24 +231,46 @@ void WindowTransform::advanceFrameEndCurrentRow()
         return;
     }
 
-    const auto block_rows = blockRowsNumber(frame_end);
+    // We advance until the partition end. It's either in the current block or
+    // in the next one, which is also the past-the-end block. Figure out how
+    // many rows we have to process.
+    uint64_t rows_end;
+    if (partition_end.row == 0)
+    {
+        assert(partition_end == blocksEnd());
+        rows_end = blockRowsNumber(frame_end);
+    }
+    else
+    {
+        assert(frame_end.block == partition_end.block);
+        rows_end = partition_end.row;
+    }
+    // Equality would mean "no data to process", for which we checked above.
+    assert(frame_end.row < rows_end);
+
+//    fmt::print(stderr, "first row {} last {}\n", frame_end.row, rows_end);
+
     // We could retreat the frame_end here, but for some reason I am reluctant
     // to do this... It would have better data locality.
     auto reference = current_row;
-    for (; frame_end.row < block_rows; ++frame_end.row)
+    for (; frame_end.row < rows_end; ++frame_end.row)
     {
         if (!arePeers(reference, frame_end))
         {
-            //fmt::print(stderr, "{} and {} don't match\n", reference, frame_end);
+//            fmt::print(stderr, "{} and {} don't match\n", reference, frame_end);
             frame_ended = true;
             return;
         }
         reference = frame_end;
     }
 
-    // Got to the end of current block, have to properly update the row number.
-    ++frame_end.block;
-    frame_end.row = 0;
+    // Might have gotten to the end of the current block, have to properly
+    // update the row number.
+    if (frame_end.row == blockRowsNumber(frame_end))
+    {
+        ++frame_end.block;
+        frame_end.row = 0;
+    }
 
     // Got to the end of partition (frame ended as well then) or end of data.
     assert(frame_end == partition_end);
@@ -263,6 +287,8 @@ void WindowTransform::advanceFrameEnd()
     // The only frame end we have for now is CURRENT ROW.
     advanceFrameEndCurrentRow();
 
+//    fmt::print(stderr, "frame_end {} -> {}\n", frame_end_before, frame_end);
+
     // We might not have advanced the frame end if we found out we reached the
     // end of input or the partition, or if we still don't know the frame start.
     if (frame_end_before == frame_end)
@@ -273,17 +299,18 @@ void WindowTransform::advanceFrameEnd()
     // Add the columns over which we advanced the frame to the aggregate function
     // states.
     // We could have advanced over at most the entire last block.
-    uint64_t last_row = frame_end.row;
+    uint64_t rows_end = frame_end.row;
     if (frame_end.row == 0)
     {
         assert(frame_end == blocksEnd());
-        last_row = blockRowsNumber(frame_end_before);
+        rows_end = blockRowsNumber(frame_end_before);
     }
     else
     {
         assert(frame_end_before.block == frame_end.block);
     }
-    assert(frame_end_before.row < last_row);
+    // Equality would mean "no data to process", for which we checked above.
+    assert(frame_end_before.row < rows_end);
 
     for (auto & ws : workspaces)
     {
@@ -302,7 +329,7 @@ void WindowTransform::advanceFrameEnd()
         const auto * a = ws.window_function.aggregate_function.get();
         auto * buf = ws.aggregate_function_state.data();
         auto * columns = ws.argument_columns.data();
-        for (auto row = frame_end_before.row; row < last_row; ++row)
+        for (auto row = frame_end_before.row; row < rows_end; ++row)
         {
             a->add(buf, columns, row, arena.get());
         }
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 7283f5246da..e6b49b5207a 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -459,3 +459,16 @@ settings max_block_size = 5
 28	14	1	1
 29	14	2	2
 30	15	0	1
+-- A case where the partition end is in the current block, and the frame end
+-- is triggered by the partition end.
+select min(number) over (partition by p)  from (select number, intDiv(number, 3) p from numbers(10));
+0
+0
+0
+3
+3
+3
+6
+6
+6
+9
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 0baac535144..e56fe9cb315 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -144,3 +144,7 @@ window w as (partition by p order by o range unbounded preceding)
 order by number
 settings max_block_size = 5
 ;
+
+-- A case where the partition end is in the current block, and the frame end
+-- is triggered by the partition end.
+select min(number) over (partition by p)  from (select number, intDiv(number, 3) p from numbers(10));

From a57456a3fd21829d22635df01404f7383ece545d Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Thu, 28 Jan 2021 22:02:39 +0300
Subject: [PATCH 0331/1238] fix

---
 src/Interpreters/DDLTask.h                  | 1 +
 src/Interpreters/DDLWorker.cpp              | 6 ++++++
 src/Interpreters/InterpreterCreateQuery.cpp | 6 +++++-
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index a12676ab8a3..5b50413b975 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -158,6 +158,7 @@ struct MetadataTransaction
     void addOps(Coordination::Requests & other_ops)
     {
         std::move(ops.begin(), ops.end(), std::back_inserter(other_ops));
+        ops.clear();
     }
 
     void commit();
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 83412ab8fb7..7b9d3ef8f5b 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -400,6 +400,12 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task)
         auto query_context = task.makeQueryContext(context);
         query_scope.emplace(*query_context);
         executeQuery(istr, ostr, false, *query_context, {});
+
+        if (auto txn = query_context->getMetadataTransaction())
+        {
+            if (txn->state == MetadataTransaction::CREATED)
+                txn->commit();
+        }
     }
     catch (const DB::Exception & e)
     {
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 5292ef57d7a..926737ef888 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -800,11 +800,11 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
 
     String current_database = context.getCurrentDatabase();
     auto database_name = create.database.empty() ? current_database : create.database;
-    auto database = DatabaseCatalog::instance().getDatabase(database_name);
 
     // If this is a stub ATTACH query, read the query definition from the database
     if (create.attach && !create.storage && !create.columns_list)
     {
+        auto database = DatabaseCatalog::instance().getDatabase(database_name);
         bool if_not_exists = create.if_not_exists;
 
         // Table SQL definition is available even if the table is detached (even permanently)
@@ -869,7 +869,11 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     }
 
     //TODO make code better if possible
+    DatabasePtr database;
     bool need_add_to_database = !create.temporary;
+    if (need_add_to_database)
+        database = DatabaseCatalog::instance().getDatabase(database_name);
+
     if (need_add_to_database && database->getEngineName() == "Replicated")
     {
         auto guard = DatabaseCatalog::instance().getDDLGuard(create.database, create.table);

From c6beaba12a1fab9b870e12d37e4917601da41b49 Mon Sep 17 00:00:00 2001
From: Dmitriy <sevirov@yandex-team.ru>
Date: Thu, 28 Jan 2021 22:18:49 +0300
Subject: [PATCH 0332/1238] Edit and translate into Russian
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Выполнил перевод на русский язык, немного поправив английскую версию.
---
 .../sql-reference/statements/insert-into.md   |   2 +-
 .../sql-reference/statements/select/index.md  |   3 +-
 .../sql-reference/statements/insert-into.md   |   2 +-
 .../sql-reference/statements/select/index.md  | 109 +++++++++++++++++-
 4 files changed, 110 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md
index 7acf4018812..61373b1c72f 100644
--- a/docs/en/sql-reference/statements/insert-into.md
+++ b/docs/en/sql-reference/statements/insert-into.md
@@ -13,7 +13,7 @@ Basic query format:
 INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
 ```
 
-You can specify a list of columns to insert using  the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). 
+You can specify a list of columns to insert using  the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
 
 For example, consider the table:
 
diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md
index 7c13772ffdf..e99ebef838c 100644
--- a/docs/en/sql-reference/statements/select/index.md
+++ b/docs/en/sql-reference/statements/select/index.md
@@ -278,5 +278,4 @@ Other ways to make settings see [here](../../../operations/settings/index.md).
 SELECT * FROM some_table SETTINGS optimize_read_in_order=1, cast_keep_nullable=1;
 ```
 
-[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)
-<!--hide-->
+[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)<!--hide-->
diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md
index e3cea4aecc5..ba35f792e49 100644
--- a/docs/ru/sql-reference/statements/insert-into.md
+++ b/docs/ru/sql-reference/statements/insert-into.md
@@ -13,7 +13,7 @@ toc_title: INSERT INTO
 INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
 ```
 
-Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`. 
+Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
 
 В качестве примера рассмотрим таблицу:
 
diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md
index c37e82ae0be..536511ed5e8 100644
--- a/docs/ru/sql-reference/statements/select/index.md
+++ b/docs/ru/sql-reference/statements/select/index.md
@@ -162,6 +162,112 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of
 
 Подробнее смотрите в разделе «Настройки». Присутствует возможность использовать внешнюю сортировку (с сохранением временных данных на диск) и внешнюю агрегацию.
 
+## Модификаторы запроса SELECT {#select-modifiers}
+
+Вы можете использовать следующие модификаторы в запросах `SELECT`.
+
+### APPLY {#apply-modifier}
+
+Вызывает некоторую функцию для каждой строки, которая возвращает внешнее табличное выражение запроса. 
+
+**Синтаксис:**
+
+``` sql
+SELECT <expr> APPLY( <func> ) FROM [db.]table_name
+```
+
+**Пример:** 
+
+``` sql
+CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) ENGINE = MergeTree ORDER by (i);
+INSERT INTO columns_transformers VALUES (100, 10, 324), (120, 8, 23);
+SELECT * APPLY(sum) FROM columns_transformers;
+```
+
+```
+┌─sum(i)─┬─sum(j)─┬─sum(k)─┐
+│    220 │     18 │    347 │
+└────────┴────────┴────────┘
+```
+
+### EXCEPT {#except-modifier}
+
+Указывает имена одного или нескольких столбцов для исключения из результата.
+
+**Синтаксис:**
+
+``` sql
+SELECT <expr> EXCEPT ( col_name1 [, col_name2, col_name3, ...] ) FROM [db.]table_name
+```
+
+**Пример:**
+
+``` sql
+SELECT * EXCEPT (i) from columns_transformers;
+```
+
+```
+┌──j─┬───k─┐
+│ 10 │ 324 │
+│  8 │  23 │
+└────┴─────┘
+```
+
+### REPLACE {#replace-modifier}
+
+Указывает одно или несколько [выражений алиасов](../../../sql-reference/syntax.md#syntax-expression_aliases). Каждый алиас должен соответствовать имени столбца из запроса `SELECT *`. В списке столбцов из результата запроса имя столбца, соответствующее алиасу, заменяется выражением в модификаторе `REPLACE`.
+
+Этот модификатор не изменяет имена или порядок столбцов. Однако он может изменить значение и тип значения.
+
+**Синтаксис:**
+
+``` sql
+SELECT <expr> REPLACE( <expr> AS col_name) from [db.]table_name
+```
+
+**Пример:**
+
+``` sql
+SELECT * REPLACE(i + 1 AS i) from columns_transformers;
+```
+
+```
+┌───i─┬──j─┬───k─┐
+│ 101 │ 10 │ 324 │
+│ 121 │  8 │  23 │
+└─────┴────┴─────┘
+```
+
+### Комбинации модификаторов {#modifier-combinations}
+
+Вы можете использовать каждый модификатор отдельно или комбинировать их.
+
+**Примеры:**
+
+Использование одного и того же модификатора несколько раз.
+
+``` sql
+SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) from columns_transformers;
+```
+
+```
+┌─max(length(toString(j)))─┬─max(length(toString(k)))─┐
+│                        2 │                        3 │
+└──────────────────────────┴──────────────────────────┘
+```
+
+Использование нескольких модификаторов в одном запросе.
+
+``` sql
+SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers;
+```
+
+```
+┌─sum(plus(i, 1))─┬─sum(k)─┐
+│             222 │    347 │
+└─────────────────┴────────┘
+```
+
 ## SETTINGS в запросе SELECT {#settings-in-select}
 
 Вы можете задать значения необходимых настроек непосредственно в запросе `SELECT` в секции `SETTINGS`. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию. 
@@ -174,5 +280,4 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of
 SELECT * FROM some_table SETTINGS optimize_read_in_order=1, cast_keep_nullable=1;
 ```
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/)
-<!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/)<!--hide-->

From b57452446b36ae1aa024d5c9813716e4e4c33ad0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 25 Jan 2021 22:42:57 +0300
Subject: [PATCH 0333/1238] client/suggest: add error code names

---
 programs/client/Suggest.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp
index 87083c2c27b..5e917422e7c 100644
--- a/programs/client/Suggest.cpp
+++ b/programs/client/Suggest.cpp
@@ -104,6 +104,8 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
         " UNION ALL "
         "SELECT cluster FROM system.clusters"
         " UNION ALL "
+        "SELECT name FROM system.errors"
+        " UNION ALL "
         "SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate";
 
     /// The user may disable loading of databases, tables, columns by setting suggestion_limit to zero.

From a2bab85b789884f3bdce4863a9948df89846d000 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 25 Jan 2021 23:05:41 +0300
Subject: [PATCH 0334/1238] client/suggest: set system_events_show_zero_values
 for query

---
 programs/client/Suggest.cpp | 11 ++++++++---
 programs/client/Suggest.h   |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp
index 5e917422e7c..e243cf6e6f1 100644
--- a/programs/client/Suggest.cpp
+++ b/programs/client/Suggest.cpp
@@ -1,5 +1,6 @@
 #include "Suggest.h"
 
+#include <Core/Settings.h>
 #include <Columns/ColumnString.h>
 #include <Common/typeid_cast.h>
 
@@ -125,12 +126,16 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
 
     query << ") WHERE notEmpty(res)";
 
-    fetch(connection, timeouts, query.str());
+    Settings settings;
+    /// To show all rows from:
+    /// - system.errors
+    settings.system_events_show_zero_values = true;
+    fetch(connection, timeouts, query.str(), settings);
 }
 
-void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query)
+void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings)
 {
-    connection.sendQuery(timeouts, query);
+    connection.sendQuery(timeouts, query, "" /* query_id */, QueryProcessingStage::Complete, &settings);
 
     while (true)
     {
diff --git a/programs/client/Suggest.h b/programs/client/Suggest.h
index 03332088cbe..0049bc08ebf 100644
--- a/programs/client/Suggest.h
+++ b/programs/client/Suggest.h
@@ -33,7 +33,7 @@ public:
 private:
 
     void loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit);
-    void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query);
+    void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings);
     void fillWordsFromBlock(const Block & block);
 
     /// Words are fetched asynchronously.

From 1c68bf8545e8ecf801722edc51b6e94ce0cef9a7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 25 Jan 2021 23:10:34 +0300
Subject: [PATCH 0335/1238] client/suggest: add other metrics

---
 programs/client/Suggest.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp
index e243cf6e6f1..28cad80ffb6 100644
--- a/programs/client/Suggest.cpp
+++ b/programs/client/Suggest.cpp
@@ -107,6 +107,12 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
         " UNION ALL "
         "SELECT name FROM system.errors"
         " UNION ALL "
+        "SELECT event FROM system.events"
+        " UNION ALL "
+        "SELECT metric FROM system.asynchronous_metrics"
+        " UNION ALL "
+        "SELECT metric FROM system.metrics"
+        " UNION ALL "
         "SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate";
 
     /// The user may disable loading of databases, tables, columns by setting suggestion_limit to zero.
@@ -129,6 +135,7 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
     Settings settings;
     /// To show all rows from:
     /// - system.errors
+    /// - system.events
     settings.system_events_show_zero_values = true;
     fetch(connection, timeouts, query.str(), settings);
 }

From 212d5082b4feab509de7ac7b078b0bff532c5c0d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 25 Jan 2021 23:15:47 +0300
Subject: [PATCH 0336/1238] client/suggest: add macros

---
 programs/client/Suggest.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp
index 28cad80ffb6..72f5a325b98 100644
--- a/programs/client/Suggest.cpp
+++ b/programs/client/Suggest.cpp
@@ -113,6 +113,8 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
         " UNION ALL "
         "SELECT metric FROM system.metrics"
         " UNION ALL "
+        "SELECT macro FROM system.macros"
+        " UNION ALL "
         "SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate";
 
     /// The user may disable loading of databases, tables, columns by setting suggestion_limit to zero.

From 86f80105cb2a7a37e014da76c23902e93017015d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 25 Jan 2021 23:15:59 +0300
Subject: [PATCH 0337/1238] client/suggest: add policy

---
 programs/client/Suggest.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp
index 72f5a325b98..a32e019e89b 100644
--- a/programs/client/Suggest.cpp
+++ b/programs/client/Suggest.cpp
@@ -115,6 +115,8 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
         " UNION ALL "
         "SELECT macro FROM system.macros"
         " UNION ALL "
+        "SELECT policy_name FROM system.storage_policies"
+        " UNION ALL "
         "SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate";
 
     /// The user may disable loading of databases, tables, columns by setting suggestion_limit to zero.

From d72b302b9790b56036201a6a424660b713b67828 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Thu, 28 Jan 2021 23:16:35 +0300
Subject: [PATCH 0338/1238] Update Client.cpp

---
 programs/client/Client.cpp | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index ef12974adea..9a8b580407a 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -931,14 +931,22 @@ private:
             }
             std::cerr << "Received exception from server (version "
                 << server_version << "):" << std::endl << "Code: "
-                << server_exception->code() << ". " << text << std::endl << std::endl;
+                << server_exception->code() << ". " << text << std::endl;
+            if (is_interactive)
+            {
+                std::cerr << std::endl;
+            }
         }
 
         if (client_exception)
         {
             fmt::print(stderr,
-                "Error on processing query '{}':\n{}\n\n",
+                "Error on processing query '{}':\n{}\n",
                 full_query, client_exception->message());
+            if (is_interactive)
+            {
+                fmt::print(stderr, "\n");
+            }
         }
 
         // A debug check -- at least some exception must be set, if the error

From 09e192395a72f667398da0d9087fd250884c9cdb Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 28 Jan 2021 23:39:27 +0300
Subject: [PATCH 0339/1238] update comments and apply the setNeeded() advice
 from Nikolai

---
 src/Processors/Transforms/WindowTransform.cpp | 36 ++++++++++---------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index a4a0c0cd27b..7c5c1e6c88d 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -81,22 +81,24 @@ void WindowTransform::advancePartitionEnd()
 
 //    fmt::print(stderr, "end {}, partition_end {}\n", end, partition_end);
 
-    // If we're at the total end of data, we must end the partition. This is the
-    // only place in calculations where we need special handling for end of data,
-    // other places will work as usual based on `partition_ended` = true, because
-    // end of data is logically the same as any other end of partition.
+    // If we're at the total end of data, we must end the partition. This is one
+    // of the few places in calculations where we need special handling for end
+    // of data, other places will work as usual based on
+    // `partition_ended` = true, because end of data is logically the same as
+    // any other end of partition.
     // We must check this first, because other calculations might not be valid
     // when we're at the end of data.
-    // FIXME not true, we also handle it elsewhere
     if (input_is_finished)
     {
         partition_ended = true;
-        partition_end = end;
+        // We receive empty chunk at the end of data, so the partition_end must
+        // be already at the end of data.
+        assert(partition_end == end);
         return;
     }
 
-    // If we got to the end of the block already, but expect more data, wait for
-    // it.
+    // If we got to the end of the block already, but we are going to get more
+    // input data, wait for it.
     if (partition_end == end)
     {
         return;
@@ -296,9 +298,8 @@ void WindowTransform::advanceFrameEnd()
         return;
     }
 
-    // Add the columns over which we advanced the frame to the aggregate function
-    // states.
-    // We could have advanced over at most the entire last block.
+    // Add the rows over which we advanced the frame to the aggregate function
+    // states. We could have advanced over at most the entire last block.
     uint64_t rows_end = frame_end.row;
     if (frame_end.row == 0)
     {
@@ -539,13 +540,9 @@ IProcessor::Status WindowTransform::prepare()
 
             output.pushData(std::move(output_data));
         }
-        else
-        {
-            // Not sure what this branch means. The output port is full and we
-            // apply backoff pressure on the input?
-            input.setNotNeeded();
-        }
 
+        // We don't need input.setNotNeeded() here, because we already pull with
+        // the set_not_needed flag.
         return Status::PortFull;
     }
 
@@ -566,6 +563,11 @@ IProcessor::Status WindowTransform::prepare()
     // Consume input data if we have any ready.
     if (!has_input && input.hasData())
     {
+        // Pulling with set_not_needed = true and using an explicit setNeeded()
+        // later is somewhat more efficient, because after the setNeeded(), the
+        // required input block will be generated in the same thread and passed
+        // to our prepare() + work() methods in the same thread right away, so
+        // hopefully we will work on hot (cached) data.
         input_data = input.pullData(true /* set_not_needed */);
 
         // If we got an exception from input, just return it and mark that we're

From 2dd5062cc0f9d7e6b5b4783773dd468bc8e2bfe0 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 28 Jan 2021 23:46:22 +0300
Subject: [PATCH 0340/1238] use current row as partition etalon

---
 src/Processors/Transforms/WindowTransform.cpp | 34 +++++++------------
 src/Processors/Transforms/WindowTransform.h   |  3 --
 2 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 7c5c1e6c88d..8b4d77d4d72 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -125,20 +125,19 @@ void WindowTransform::advancePartitionEnd()
     // The partition ends when the PARTITION BY columns change. We need
     // some reference columns for comparison. We might have already
     // dropped the blocks where the partition starts, but any row in the
-    // partition will do. We use a special partition_etalon pointer for this.
-    // It might be the same as the partition_end if we're at the first row of the
-    // first partition, so we compare it to itself, but it still works correctly.
-    const auto block_number = partition_end.block;
+    // partition will do. We use the current_row for this. It might be the same
+    // as the partition_end if we're at the first row of the first partition, so
+    // we will compare it to itself, but it still works correctly.
     const auto block_rows = blockRowsNumber(partition_end);
     for (; partition_end.row < block_rows; ++partition_end.row)
     {
         size_t i = 0;
         for (; i < n; i++)
         {
-            const auto * ref = inputAt(partition_etalon)[partition_by_indices[i]].get();
+            const auto * ref = inputAt(current_row)[partition_by_indices[i]].get();
             const auto * c = inputAt(partition_end)[partition_by_indices[i]].get();
             if (c->compareAt(partition_end.row,
-                    partition_etalon.row, *ref,
+                    current_row.row, *ref,
                     1 /* nan_direction_hint */) != 0)
             {
                 break;
@@ -159,12 +158,6 @@ void WindowTransform::advancePartitionEnd()
 
     // Went until the end of data and didn't find the new partition.
     assert(!partition_ended && partition_end == blocksEnd());
-
-    // Advance the partition etalon so that we can drop the old blocks.
-    // We can use the last valid row of the block as the partition etalon.
-    // Shouldn't have empty blocks here (what would it mean?).
-    assert(block_rows > 0);
-    partition_etalon = RowNumber{block_number, block_rows - 1};
 }
 
 void WindowTransform::advanceFrameStart()
@@ -389,10 +382,9 @@ void WindowTransform::appendChunk(Chunk & chunk)
     // Start the calculations. First, advance the partition end.
     for (;;)
     {
-//        const auto old_etalon = partition_etalon;
         advancePartitionEnd();
-//        fmt::print(stderr, "partition [?, {}), {}, etalon old {} new {}\n",
-//            partition_end, partition_ended, old_etalon, partition_etalon);
+//        fmt::print(stderr, "partition [?, {}), {}\n",
+//            partition_end, partition_ended);
 
         // Either we ran out of data or we found the end of partition (maybe
         // both, but this only happens at the total end of data).
@@ -458,7 +450,6 @@ void WindowTransform::appendChunk(Chunk & chunk)
         const auto new_partition_start = partition_end;
         advanceRowNumber(partition_end);
         partition_ended = false;
-        partition_etalon = new_partition_start;
         // We have to reset the frame when the new partition starts. This is not a
         // generally correct way to do so, but we don't really support moving frame
         // for now.
@@ -625,13 +616,12 @@ void WindowTransform::work()
     // We don't really have to keep the entire partition, and it can be big, so
     // we want to drop the starting blocks to save memory.
     // We can drop the old blocks if we already returned them as output, and the
-    // frame, the current row and the partition etalon are already past them.
-    // Note that the frame start can be further than current row for some frame
-    // specs (e.g. EXCLUDE CURRENT ROW), so we have to check both.
+    // frame and the current row are already past them. Note that the frame
+    // start can be further than current row for some frame specs (e.g. EXCLUDE
+    // CURRENT ROW), so we have to check both.
     const auto first_used_block = std::min(next_output_block_number,
-        std::min(frame_start.block,
-            std::min(current_row.block,
-                partition_etalon.block)));
+        std::min(frame_start.block, current_row.block));
+
     if (first_block_number < first_used_block)
     {
 //        fmt::print(stderr, "will drop blocks from {} to {}\n", first_block_number,
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index ab9b548dbc9..f1dd9682d00 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -219,9 +219,6 @@ public:
     // need it, and we want to be able to drop the starting blocks to save memory.
     // The `partition_end` is past-the-end, as usual. When partition_ended = false,
     // it still haven't ended, and partition_end is the next row to check.
-    // We still need to keep some not-too-far-away row in the partition, to use
-    // it as an etalon for PARTITION BY comparison.
-    RowNumber partition_etalon;
     RowNumber partition_end;
     bool partition_ended = false;
 

From e7d8aa250cc9440e0b334aff68327b3eb2778be1 Mon Sep 17 00:00:00 2001
From: Dmitriy <sevirov@yandex-team.ru>
Date: Thu, 28 Jan 2021 23:57:04 +0300
Subject: [PATCH 0341/1238] Add links
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Добавил ссылки на модификаторы.
---
 docs/en/sql-reference/statements/insert-into.md | 2 +-
 docs/ru/sql-reference/statements/insert-into.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md
index 2928e50224d..facc1b19dad 100644
--- a/docs/en/sql-reference/statements/insert-into.md
+++ b/docs/en/sql-reference/statements/insert-into.md
@@ -13,7 +13,7 @@ Basic query format:
 INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
 ```
 
-You can specify a list of columns to insert using  the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
+You can specify a list of columns to insert using  the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
 
 For example, consider the table:
 
diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md
index d83f6691f6b..61dc1170883 100644
--- a/docs/ru/sql-reference/statements/insert-into.md
+++ b/docs/ru/sql-reference/statements/insert-into.md
@@ -13,7 +13,7 @@ toc_title: INSERT INTO
 INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
 ```
 
-Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`.
+Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
 
 В качестве примера рассмотрим таблицу:
 

From 5e3fce1509e9532e91a06afd0ec8ce2a12720d49 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Fri, 29 Jan 2021 00:29:51 +0300
Subject: [PATCH 0342/1238] Added test to ANTLR skip_list.json

---
 tests/queries/skip_list.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 8ed1e890cf1..95163bae9f8 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -310,7 +310,8 @@
         "01642_if_nullable_regression",
         "01643_system_suspend",
         "01655_plan_optimizations",
-        "01475_read_subcolumns_storages"
+        "01475_read_subcolumns_storages",
+        "01674_clickhouse_client_query_param_cte"
     ],
     "parallel":
     [

From a12c666b40c8dcbbb5a4fc1ac4b01dfb01c81654 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Fri, 29 Jan 2021 01:54:21 +0300
Subject: [PATCH 0343/1238] Documented array function

---
 .../functions/array-functions.md              | 186 ++++++++++++++++--
 1 file changed, 169 insertions(+), 17 deletions(-)

diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index dc7727bdfd8..1b50591f835 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -1288,73 +1288,225 @@ Returns the index of the first element in the `arr1` array for which `func` retu
 
 Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
 
-## arrayMin(\[func,\] arr1, …) {#array-min}
+## arrayMin {#array-min}
 
-Returns the min of the `func` values. If the function is omitted, it just returns the min of the array elements.
+Returns the minimum of elements in the source array. 
+
+If the `func` function is specified, returns the miminum of elements converted by this function.
 
 Note that the `arrayMin` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
 
-Examples:
+**Syntax**
+
 ```sql
-SELECT arrayMin([1, 2, 4]) AS res
+arrayMin([func,] arr1)
+```
+
+**Parameters**
+
+-   `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — Array. [Array](../../sql-reference/data-types/array.md).
+
+**Returned value**
+
+-   The minimum of function values (or the array minimum). 
+
+Type: matches the array type. 
+
+**Examples**
+
+Query:
+
+```sql
+SELECT arrayMin([1, 2, 4]) AS res;
+```
+
+Result:
+
+```text
 ┌─res─┐
 │   1 │
 └─────┘
+```
 
+Query:
 
-SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res
+```
+SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res;
+```
+
+Result:
+
+```text
 ┌─res─┐
 │  -4 │
 └─────┘
 ```
 
-## arrayMax(\[func,\] arr1, …) {#array-max}
+## arrayMax {#array-max}
 
-Returns the max of the `func` values. If the function is omitted, it just returns the max of the array elements.
+Returns the maximum of elements in the source array. 
+
+If the `func` function is specified, returns the maximum of elements converted by this function.
 
 Note that the `arrayMax` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
 
-Examples:
+**Syntax**
+
 ```sql
-SELECT arrayMax([1, 2, 4]) AS res
+arrayMax([func,] arr)
+```
+
+**Parameters**
+
+-   `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — Array. [Array](../../sql-reference/data-types/array.md).
+
+**Returned value**
+
+-   The maximum of function values (or the array maximum). 
+
+Type: matches the array type. 
+
+**Examples**
+
+Query:
+
+```sql
+SELECT arrayMax([1, 2, 4]) AS res;
+```
+Result:
+
+```text
 ┌─res─┐
 │   4 │
 └─────┘
+```
 
+Query:
 
-SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res
+```sql
+SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res;
+```
+
+Result:
+
+```text
 ┌─res─┐
 │  -1 │
 └─────┘
 ```
 
-## arraySum(\[func,\] arr1, …) {#array-sum}
+## arraySum {#array-sum}
 
-Returns the sum of the `func` values. If the function is omitted, it just returns the sum of the array elements.
+Returns the sum of the elements in the source array. 
+
+If the `func` function is specified, returns the sum of elements converted by this function.
 
 Note that the `arraySum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
 
-Examples:
+**Syntax**
+
 ```sql
-SELECT arraySum([2,3]) AS res
+arraySum([func, ] arr)
+```
+
+**Parameters**
+
+-   `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — Array. [Array](../../sql-reference/data-types/array.md).   
+
+**Returned value**
+
+-   The sum of the function values (or the array sum).
+
+Type: matches the array type. 
+
+**Examples**
+
+Query:
+
+```sql
+SELECT arraySum([2,3]) AS res;
+```
+
+Result:
+
+```text
 ┌─res─┐
 │   5 │
 └─────┘
+```
 
+Query:
 
-SELECT arraySum(x -> x*x, [2, 3]) AS res
+```sql
+SELECT arraySum(x -> x*x, [2, 3]) AS res;
+```
+
+Result:
+
+```text
 ┌─res─┐
 │  13 │
 └─────┘
 ```
 
+## arrayAvg {#array-avg}
 
-## arrayAvg(\[func,\] arr1, …) {#array-avg}
+Returns the average of the elements in the source array. 
 
-Returns the average of the `func` values. If the function is omitted, it just returns the average of the array elements.
+If the `func` function is specified, returns the average of elements converted by this function.
 
 Note that the `arrayAvg` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
 
+**Syntax**
+
+```sql
+arrayAvg([func,] arr)
+```
+
+**Parameters**
+
+-   `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — Array. [Array](../../sql-reference/data-types/array.md).   
+
+**Returned value**
+
+-   The average of the function values (or the array average).
+
+Type: [Float64](../../sql-reference/data-types/float.md).
+
+**Examples**
+
+Query:
+
+```sql
+SELECT arrayAvg([1, 2, 4]) AS res;
+```
+
+Result:
+
+```text
+┌────────────────res─┐
+│ 2.3333333333333335 │
+└────────────────────┘
+```
+
+Query:
+
+```sql
+SELECT arrayAvg(x -> (x * x), [2, 4]) AS res;
+```
+
+Result:
+
+```text
+┌─res─┐
+│  10 │
+└─────┘
+```
+
 ## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
 
 Returns an array of partial sums of elements in the source array (a running sum). If the `func` function is specified, then the values of the array elements are converted by this function before summing.

From d6e34deeee7d4e007da090200296b88eef236802 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 03:53:21 +0300
Subject: [PATCH 0344/1238] Fix Python

---
 tests/clickhouse-test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 0c6154f1662..6981494b64e 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -136,7 +136,7 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
         os.environ["CLICKHOUSE_DATABASE"] = database
 
     # This is for .sh tests
-    os.environ["CLICKHOUSE_LOG_COMMENT"] = test
+    os.environ.setdefault["CLICKHOUSE_LOG_COMMENT"] = test
 
     params = {
         'client': args.client + ' --database=' + database,

From 9c111a7bdb553e784f32f501f9f2cbc9be396a28 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 03:53:54 +0300
Subject: [PATCH 0345/1238] Translate comment in test

---
 tests/queries/1_stateful/00139_like.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/1_stateful/00139_like.sql b/tests/queries/1_stateful/00139_like.sql
index ccc195bc81d..8cb84558407 100644
--- a/tests/queries/1_stateful/00139_like.sql
+++ b/tests/queries/1_stateful/00139_like.sql
@@ -1,4 +1,4 @@
-/* Заметим, что запросы написаны так, как будто пользователь не понимает смысл символа _ в LIKE выражении. */
+/* Note that queries are written as the user doesn't really understand that the symbol _ has special meaning in LIKE pattern. */
 SELECT count() FROM test.hits WHERE URL LIKE '%/avtomobili_s_probegom/_%__%__%__%';
 SELECT count() FROM test.hits WHERE URL LIKE '/avtomobili_s_probegom/_%__%__%__%';
 SELECT count() FROM test.hits WHERE URL LIKE '%_/avtomobili_s_probegom/_%__%__%__%';

From eb0473c3f92371fde097e2e5be40ecbb1d2029f8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 04:03:38 +0300
Subject: [PATCH 0346/1238] Fix UBSan report in "round"

---
 base/common/defines.h                                     | 2 ++
 src/Functions/FunctionsRound.h                            | 3 ++-
 tests/queries/0_stateless/01676_round_int_ubsan.reference | 1 +
 tests/queries/0_stateless/01676_round_int_ubsan.sql       | 6 ++++++
 4 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01676_round_int_ubsan.reference
 create mode 100644 tests/queries/0_stateless/01676_round_int_ubsan.sql

diff --git a/base/common/defines.h b/base/common/defines.h
index 39df4698b88..845a53179ef 100644
--- a/base/common/defines.h
+++ b/base/common/defines.h
@@ -84,10 +84,12 @@
 #    define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined")))
 #    define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
 #    define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
+#    define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined")))
 #else  /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it.
 #    define NO_SANITIZE_UNDEFINED
 #    define NO_SANITIZE_ADDRESS
 #    define NO_SANITIZE_THREAD
+#    define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE
 #endif
 
 /// A template function for suppressing warnings about unused variables or function results.
diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h
index f942b894052..b510f62662e 100644
--- a/src/Functions/FunctionsRound.h
+++ b/src/Functions/FunctionsRound.h
@@ -101,7 +101,8 @@ struct IntegerRoundingComputation
         return scale;
     }
 
-    static ALWAYS_INLINE T computeImpl(T x, T scale)
+    /// Integer overflow is Ok.
+    static ALWAYS_INLINE_NO_SANITIZE_UNDEFINED T computeImpl(T x, T scale)
     {
         switch (rounding_mode)
         {
diff --git a/tests/queries/0_stateless/01676_round_int_ubsan.reference b/tests/queries/0_stateless/01676_round_int_ubsan.reference
new file mode 100644
index 00000000000..cec4825deb9
--- /dev/null
+++ b/tests/queries/0_stateless/01676_round_int_ubsan.reference
@@ -0,0 +1 @@
+9223372036854775700
diff --git a/tests/queries/0_stateless/01676_round_int_ubsan.sql b/tests/queries/0_stateless/01676_round_int_ubsan.sql
new file mode 100644
index 00000000000..45aa5706a05
--- /dev/null
+++ b/tests/queries/0_stateless/01676_round_int_ubsan.sql
@@ -0,0 +1,6 @@
+-- Overflow during integer rounding is implementation specific behaviour.
+-- This test allows to be aware if the impkementation changes.
+-- Changing the implementation specific behaviour is Ok.
+-- and should not be treat as incompatibility (simply update test result then).
+
+SELECT round(-9223372036854775808, -2);

From b3944006cb9de882051fea2b4572a4c04c71c1b4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 04:41:55 +0300
Subject: [PATCH 0347/1238] Fix trivial bug in arrayEnumerateUniq

---
 src/Functions/array/arrayEnumerateExtended.h        |  3 +++
 .../0_stateless/01677_array_enumerate_bug.reference |  2 ++
 .../0_stateless/01677_array_enumerate_bug.sql       | 13 +++++++++++++
 3 files changed, 18 insertions(+)
 create mode 100644 tests/queries/0_stateless/01677_array_enumerate_bug.reference
 create mode 100644 tests/queries/0_stateless/01677_array_enumerate_bug.sql

diff --git a/src/Functions/array/arrayEnumerateExtended.h b/src/Functions/array/arrayEnumerateExtended.h
index 412fe9e7858..39bdd2a515e 100644
--- a/src/Functions/array/arrayEnumerateExtended.h
+++ b/src/Functions/array/arrayEnumerateExtended.h
@@ -353,6 +353,9 @@ bool FunctionArrayEnumerateExtended<Derived>::execute128bit(
         keys_bytes += key_sizes[j];
     }
 
+    if (keys_bytes > 16)
+        return false;
+
     executeMethod<MethodFixed>(offsets, columns, key_sizes, nullptr, res_values);
     return true;
 }
diff --git a/tests/queries/0_stateless/01677_array_enumerate_bug.reference b/tests/queries/0_stateless/01677_array_enumerate_bug.reference
new file mode 100644
index 00000000000..9c0e526801f
--- /dev/null
+++ b/tests/queries/0_stateless/01677_array_enumerate_bug.reference
@@ -0,0 +1,2 @@
+[1,1,2]
+[1,1,1]
diff --git a/tests/queries/0_stateless/01677_array_enumerate_bug.sql b/tests/queries/0_stateless/01677_array_enumerate_bug.sql
new file mode 100644
index 00000000000..0db0c51fe5b
--- /dev/null
+++ b/tests/queries/0_stateless/01677_array_enumerate_bug.sql
@@ -0,0 +1,13 @@
+-- there was a bug - missing check of the total size of keys for the case with hash table with 128bit key.
+
+SELECT arrayEnumerateUniq(arrayEnumerateUniq([toInt256(10), toInt256(100), toInt256(2)]), [toInt256(123), toInt256(1023), toInt256(123)]);
+
+SELECT arrayEnumerateUniq(
+    [111111, 222222, 333333],
+    [444444, 555555, 666666],
+    [111111, 222222, 333333],
+    [444444, 555555, 666666],
+    [111111, 222222, 333333],
+    [444444, 555555, 666666],
+    [111111, 222222, 333333],
+    [444444, 555555, 666666]);

From 93f58257e482be996296c6260487b3fea43d5e5a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 04:45:24 +0300
Subject: [PATCH 0348/1238] Fix mistake

---
 tests/clickhouse-test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 6981494b64e..319c2f523e6 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -136,7 +136,7 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
         os.environ["CLICKHOUSE_DATABASE"] = database
 
     # This is for .sh tests
-    os.environ.setdefault["CLICKHOUSE_LOG_COMMENT"] = test
+    os.environ.setdefault["CLICKHOUSE_LOG_COMMENT"] = case_file
 
     params = {
         'client': args.client + ' --database=' + database,

From 35f8a0810ac945403e40a4ff261f3c7304e96c1d Mon Sep 17 00:00:00 2001
From: taiyang-li <taiyang_li@163.com>
Date: Fri, 29 Jan 2021 09:59:26 +0800
Subject: [PATCH 0349/1238] fix client bug while executing select

---
 src/Parsers/parseQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp
index 48a92534e74..650c0e40c8c 100644
--- a/src/Parsers/parseQuery.cpp
+++ b/src/Parsers/parseQuery.cpp
@@ -79,7 +79,7 @@ void writeQueryWithHighlightedErrorPositions(
     {
         const char * current_position_to_hilite = positions_to_hilite[position_to_hilite_idx].begin;
 
-        assert(current_position_to_hilite < end);
+        assert(current_position_to_hilite <= end);
         assert(current_position_to_hilite >= begin);
 
         out.write(pos, current_position_to_hilite - pos);

From f4a05bd773c4294de73369aa586c65a297baa085 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 05:05:46 +0300
Subject: [PATCH 0350/1238] Fix mistake in greatCircleAngle

---
 src/Functions/greatCircleDistance.cpp                       | 4 ++--
 .../queries/0_stateless/01678_great_circle_angle.reference  | 5 +++++
 tests/queries/0_stateless/01678_great_circle_angle.sql      | 6 ++++++
 3 files changed, 13 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/01678_great_circle_angle.reference
 create mode 100644 tests/queries/0_stateless/01678_great_circle_angle.sql

diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp
index 8b9df91e117..ac66c0d42fe 100644
--- a/src/Functions/greatCircleDistance.cpp
+++ b/src/Functions/greatCircleDistance.cpp
@@ -95,7 +95,7 @@ void geodistInit()
 
         sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude)));
 
-        sphere_metric_lut[i] = cosf(latitude);
+        sphere_metric_lut[i] = sqr(cosf(latitude));
     }
 }
 
@@ -182,7 +182,7 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
         ///  (Remember how a plane flies from Moscow to New York)
         /// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line.
 
-        float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
+        float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes
         size_t latitude_midpoint_index = floatToIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1);
 
         /// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1".
diff --git a/tests/queries/0_stateless/01678_great_circle_angle.reference b/tests/queries/0_stateless/01678_great_circle_angle.reference
new file mode 100644
index 00000000000..0373970e3bd
--- /dev/null
+++ b/tests/queries/0_stateless/01678_great_circle_angle.reference
@@ -0,0 +1,5 @@
+0.1224
+0.7071
+0.7135
+10007554
+10007554
diff --git a/tests/queries/0_stateless/01678_great_circle_angle.sql b/tests/queries/0_stateless/01678_great_circle_angle.sql
new file mode 100644
index 00000000000..124c7bfadf2
--- /dev/null
+++ b/tests/queries/0_stateless/01678_great_circle_angle.sql
@@ -0,0 +1,6 @@
+SELECT round(greatCircleAngle(0, 45, 0.1, 45.1), 4);
+SELECT round(greatCircleAngle(0, 45, 1, 45), 4);
+SELECT round(greatCircleAngle(0, 45, 1, 45.1), 4);
+
+SELECT round(greatCircleDistance(0, 0, 0, 90), 4);
+SELECT round(greatCircleDistance(0, 0, 90, 0), 4);

From 2b77488036777a67270ce05f7a299e0298736e22 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Fri, 29 Jan 2021 05:08:22 +0300
Subject: [PATCH 0351/1238] Minor fixes

---
 docs/en/sql-reference/functions/array-functions.md | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index 1b50591f835..402362c0601 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -1292,7 +1292,7 @@ Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference
 
 Returns the minimum of elements in the source array. 
 
-If the `func` function is specified, returns the miminum of elements converted by this function.
+If the `func` function is specified, returns the mininum of elements converted by this function.
 
 Note that the `arrayMin` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
 
@@ -1366,7 +1366,7 @@ arrayMax([func,] arr)
 
 -   The maximum of function values (or the array maximum). 
 
-Type: matches the array type. 
+Type: matches the array type (or type of function result). 
 
 **Examples**
 
@@ -1375,6 +1375,7 @@ Query:
 ```sql
 SELECT arrayMax([1, 2, 4]) AS res;
 ```
+
 Result:
 
 ```text
@@ -1399,7 +1400,7 @@ Result:
 
 ## arraySum {#array-sum}
 
-Returns the sum of the elements in the source array. 
+Returns the sum of elements in the source array. 
 
 If the `func` function is specified, returns the sum of elements converted by this function.
 
@@ -1454,7 +1455,7 @@ Result:
 
 ## arrayAvg {#array-avg}
 
-Returns the average of the elements in the source array. 
+Returns the average of elements in the source array. 
 
 If the `func` function is specified, returns the average of elements converted by this function.
 
@@ -1473,7 +1474,7 @@ arrayAvg([func,] arr)
 
 **Returned value**
 
--   The average of the function values (or the array average).
+-   The average of function values (or the array average).
 
 Type: [Float64](../../sql-reference/data-types/float.md).
 

From 643b1da999e060d4c226c2cce65fb21e9a408bac Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Fri, 29 Jan 2021 10:14:10 +0800
Subject: [PATCH 0352/1238] just restart the CI test

---
 tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index 3aca8a9980a..02b0beee550 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 # Data preparation.
 # Now we can get the user_files_path by use the table file function for trick. also we can get it by query as:
-# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')"
+#  "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')"
 user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
 mkdir -p ${user_files_path}/
 echo -n aaaaaaaaa > ${user_files_path}/a.txt

From eee84eec791f4fc4a0a1277ea9bafaf14188c4a7 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 05:14:15 +0300
Subject: [PATCH 0353/1238] formatReadableTimeDelta: correctly output infinite
 values

---
 src/Functions/formatReadableTimeDelta.cpp     | 39 ++++++++++++-------
 ...9_format_readable_time_delta_inf.reference |  3 ++
 .../01679_format_readable_time_delta_inf.sql  |  1 +
 3 files changed, 28 insertions(+), 15 deletions(-)
 create mode 100644 tests/queries/0_stateless/01679_format_readable_time_delta_inf.reference
 create mode 100644 tests/queries/0_stateless/01679_format_readable_time_delta_inf.sql

diff --git a/src/Functions/formatReadableTimeDelta.cpp b/src/Functions/formatReadableTimeDelta.cpp
index 8600e1b1095..9ffdb8464e0 100644
--- a/src/Functions/formatReadableTimeDelta.cpp
+++ b/src/Functions/formatReadableTimeDelta.cpp
@@ -3,6 +3,7 @@
 #include <Functions/FunctionHelpers.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnVector.h>
+#include <Common/NaNUtils.h>
 #include <DataTypes/DataTypeString.h>
 #include <IO/WriteBufferFromVector.h>
 #include <IO/WriteHelpers.h>
@@ -134,24 +135,32 @@ public:
             /// Virtual call is Ok (negligible comparing to the rest of calculations).
             Float64 value = arguments[0].column->getFloat64(i);
 
-            bool is_negative = value < 0;
-            if (is_negative)
+            if (!isFinite(value))
             {
-                writeChar('-', buf_to);
-                value = -value;
+                /// Cannot decide what unit it is (years, month), just simply write inf or nan.
+                writeFloatText(value, buf_to);
             }
-
-            /// To output separators between parts: ", " and " and ".
-            bool has_output = false;
-
-            switch (max_unit) /// A kind of Duff Device.
+            else
             {
-                case Years:     processUnit(365 * 24 * 3600, " year", 5, value, buf_to, has_output); [[fallthrough]];
-                case Months:    processUnit(30.5 * 24 * 3600, " month", 6, value, buf_to, has_output); [[fallthrough]];
-                case Days:      processUnit(24 * 3600, " day", 4, value, buf_to, has_output); [[fallthrough]];
-                case Hours:     processUnit(3600, " hour", 5, value, buf_to, has_output); [[fallthrough]];
-                case Minutes:   processUnit(60, " minute", 7, value, buf_to, has_output); [[fallthrough]];
-                case Seconds:   processUnit(1, " second", 7, value, buf_to, has_output);
+                bool is_negative = value < 0;
+                if (is_negative)
+                {
+                    writeChar('-', buf_to);
+                    value = -value;
+                }
+
+                /// To output separators between parts: ", " and " and ".
+                bool has_output = false;
+
+                switch (max_unit) /// A kind of Duff Device.
+                {
+                    case Years:     processUnit(365 * 24 * 3600, " year", 5, value, buf_to, has_output); [[fallthrough]];
+                    case Months:    processUnit(30.5 * 24 * 3600, " month", 6, value, buf_to, has_output); [[fallthrough]];
+                    case Days:      processUnit(24 * 3600, " day", 4, value, buf_to, has_output); [[fallthrough]];
+                    case Hours:     processUnit(3600, " hour", 5, value, buf_to, has_output); [[fallthrough]];
+                    case Minutes:   processUnit(60, " minute", 7, value, buf_to, has_output); [[fallthrough]];
+                    case Seconds:   processUnit(1, " second", 7, value, buf_to, has_output);
+                }
             }
 
             writeChar(0, buf_to);
diff --git a/tests/queries/0_stateless/01679_format_readable_time_delta_inf.reference b/tests/queries/0_stateless/01679_format_readable_time_delta_inf.reference
new file mode 100644
index 00000000000..5446cd475b0
--- /dev/null
+++ b/tests/queries/0_stateless/01679_format_readable_time_delta_inf.reference
@@ -0,0 +1,3 @@
+inf
+-inf
+nan
diff --git a/tests/queries/0_stateless/01679_format_readable_time_delta_inf.sql b/tests/queries/0_stateless/01679_format_readable_time_delta_inf.sql
new file mode 100644
index 00000000000..ac92dec2bee
--- /dev/null
+++ b/tests/queries/0_stateless/01679_format_readable_time_delta_inf.sql
@@ -0,0 +1 @@
+SELECT formatReadableTimeDelta(arrayJoin([inf, -inf, nan]));

From 95a375b4cc444128befa6690fb290690d2d842c8 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Fri, 29 Jan 2021 05:41:03 +0300
Subject: [PATCH 0354/1238] Minor fixes

---
 docs/en/sql-reference/functions/array-functions.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index 402362c0601..48ef103368b 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -1299,7 +1299,7 @@ Note that the `arrayMin` is a [higher-order function](../../sql-reference/functi
 **Syntax**
 
 ```sql
-arrayMin([func,] arr1)
+arrayMin([func,] arr)
 ```
 
 **Parameters**
@@ -1409,7 +1409,7 @@ Note that the `arraySum` is a [higher-order function](../../sql-reference/functi
 **Syntax**
 
 ```sql
-arraySum([func, ] arr)
+arraySum([func,] arr)
 ```
 
 **Parameters**
@@ -1428,7 +1428,7 @@ Type: matches the array type.
 Query:
 
 ```sql
-SELECT arraySum([2,3]) AS res;
+SELECT arraySum([2, 3]) AS res;
 ```
 
 Result:

From b7f1a953d504f8114563068c866db72e4282ad4c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 06:24:59 +0300
Subject: [PATCH 0355/1238] Fix test

---
 .../0_stateless/01044_great_circle_angle.reference        | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/01044_great_circle_angle.reference b/tests/queries/0_stateless/01044_great_circle_angle.reference
index 60a616c7187..ebdeaa10067 100644
--- a/tests/queries/0_stateless/01044_great_circle_angle.reference
+++ b/tests/queries/0_stateless/01044_great_circle_angle.reference
@@ -17,11 +17,11 @@
 ██████████▎
 ████████████▍
 ██████████████▍
-████████████████▌
+████████████████▍
 ██████████████████▌
-████████████████████▋
-██████████████████████▋
-████████████████████████▋
+████████████████████▌
+██████████████████████▌
+████████████████████████▌
 ██████████████████████████▌
 ████████████████████████████▍
 ██████████████████████████████▍

From feeb1243ec2c143a8d13d2ed167cddb87dbe9dcb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 06:26:53 +0300
Subject: [PATCH 0356/1238] Fix Python

---
 tests/clickhouse-test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 319c2f523e6..0c49a3670a0 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -136,7 +136,7 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
         os.environ["CLICKHOUSE_DATABASE"] = database
 
     # This is for .sh tests
-    os.environ.setdefault["CLICKHOUSE_LOG_COMMENT"] = case_file
+    os.environ.setdefault("CLICKHOUSE_LOG_COMMENT", case_file)
 
     params = {
         'client': args.client + ' --database=' + database,

From 585874bf46651890ee2c5f498dacfebe3b7d8c19 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 27 Jan 2021 22:44:22 +0300
Subject: [PATCH 0357/1238] tests: add a test for clickhouse-client
 autocompletion

v2: Increase timeout for 01676_clickhouse_client_autocomplete
    https://github.com/ClickHouse/ClickHouse/pull/19584#discussion_r565727175

v3: Disable 01676_clickhouse_client_autocomplete in unbundled build (arcadia)

    autocomplete does not have to work fully unbundled build (since it lack
    of replxx).

    Similar to bd523a0aff1e1355300febcddadced05f393a15f

v4: set expect timeout back to 1 and increase total timeout to 20 sec
v4: set expect timeout back to 3 and increase total timeout to 22 (3*X+1) sec
---
 programs/client/Suggest.cpp                   |   5 +
 ...6_clickhouse_client_autocomplete.reference |   0
 .../01676_clickhouse_client_autocomplete.sh   | 114 ++++++++++++++++++
 .../queries/0_stateless/arcadia_skip_list.txt |   1 +
 tests/queries/skip_list.json                  |   1 +
 5 files changed, 121 insertions(+)
 create mode 100644 tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference
 create mode 100755 tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh

diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp
index a32e019e89b..297b371d987 100644
--- a/programs/client/Suggest.cpp
+++ b/programs/client/Suggest.cpp
@@ -87,6 +87,11 @@ Suggest::Suggest()
 
 void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit)
 {
+    ///
+    /// NOTE: Once you will update the completion list,
+    /// do not forget to update 01676_clickhouse_client_autocomplete.sh
+    ///
+
     std::stringstream query;        // STYLE_CHECK_ALLOW_STD_STRING_STREAM
     query << "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM ("
         "SELECT name FROM system.functions"
diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh
new file mode 100755
index 00000000000..08e07044841
--- /dev/null
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+function test_completion_word()
+{
+    local w=$1 && shift
+
+    local w_len=${#w}
+    local compword_begin=${w:0:$((w_len-3))}
+    local compword_end=${w:$((w_len-3))}
+
+    # NOTE: here and below you should escape variables of the expect.
+    timeout 22s expect << EOF
+log_user 0
+set timeout 3
+match_max 100000
+# A default timeout action is to do nothing, change it to fail
+expect_after {
+    timeout {
+        exit 1
+    }
+}
+
+spawn bash -c "$CLICKHOUSE_CLIENT_BINARY $CLICKHOUSE_CLIENT_OPT"
+expect ":) "
+
+# Make a query
+send -- "SET $compword_begin"
+expect "SET $compword_begin"
+
+# Wait for suggestions to load, they are loaded in background
+set is_done 0
+while {\$is_done == 0} {
+    send -- "\\t"
+    expect {
+        "$compword_begin$compword_end" {
+            set is_done 1
+        }
+        default {
+            sleep 1
+        }
+    }
+}
+
+send -- "\\3\\4"
+expect eof
+EOF
+}
+
+# last 3 bytes will be completed,
+# so take this in mind when you will update the list.
+compwords_positive=(
+    # system.functions
+    concatAssumeInjective
+    # system.table_engines
+    ReplacingMergeTree
+    # system.formats
+    JSONEachRow
+    # system.table_functions
+    clusterAllReplicas
+    # system.data_type_families
+    SimpleAggregateFunction
+    # system.merge_tree_settings
+    write_ahead_log_interval_ms_to_fsync
+    # system.settings
+    max_concurrent_queries_for_all_users
+    # system.clusters
+    test_shard_localhost
+    # system.errors, also it is very rare to cover system_events_show_zero_values
+    CONDITIONAL_TREE_PARENT_NOT_FOUND
+    # system.events, also it is very rare to cover system_events_show_zero_values
+    WriteBufferFromFileDescriptorWriteFailed
+    # system.asynchronous_metrics, also this metric has zero value
+    #
+    # NOTE: that there is no ability to complete metrics like
+    # jemalloc.background_thread.num_runs, due to "." is used as a word breaker
+    # (and this cannot be changed -- db.table)
+    ReplicasMaxAbsoluteDelay
+    # system.metrics
+    PartsPreCommitted
+    # system.macros
+    default_path_test
+    # system.storage_policies, egh not uniq
+    default
+    # system.aggregate_function_combinators
+    uniqCombined64ForEach
+
+    # FIXME: one may add separate case for suggestion_limit
+    # system.databases
+    system
+    # system.tables
+    aggregate_function_combinators
+    # system.columns
+    primary_key_bytes_in_memory_allocated
+    # system.dictionaries
+    # FIXME: none
+)
+for w in "${compwords_positive[@]}"; do
+    test_completion_word "$w" || echo "[FAIL] $w (positive)"
+done
+
+# One negative is enough
+compwords_negative=(
+    # system.clusters
+    test_shard_localhost_no_such_cluster
+)
+for w in "${compwords_negative[@]}"; do
+    test_completion_word "$w" && echo "[FAIL] $w (negative)"
+done
+
+exit 0
diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 253eab720af..269d4ed72b1 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -198,3 +198,4 @@
 01659_test_base64Decode_mysql_compatibility
 01675_data_type_coroutine
 01671_aggregate_function_group_bitmap_data
+01676_clickhouse_client_autocomplete
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 96da47ee9c2..727cb314748 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -90,6 +90,7 @@
         "01300_client_save_history_when_terminated",
         "orc_output",
         "01370_client_autocomplete_word_break_characters",
+        "01676_clickhouse_client_autocomplete",
         "01193_metadata_loading",
         "01455_time_zones"
     ],

From 1e2669fd3c5d4d15bc7f3f4f9cbb8a1cf45ee75b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 07:54:46 +0300
Subject: [PATCH 0358/1238] Fix error

---
 src/Common/StringSearcher.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h
index c6f0cff71f1..7028ae8eae1 100644
--- a/src/Common/StringSearcher.h
+++ b/src/Common/StringSearcher.h
@@ -142,7 +142,7 @@ public:
                 continue;
             }
 
-            size_t src_len = UTF8::seqLength(*needle_pos);
+            size_t src_len = std::min<size_t>(needle_end - needle_pos, UTF8::seqLength(*needle_pos));
             int c_u32 = UTF8::convert(needle_pos);  /// This assumes valid UTF-8 or zero byte after needle.
 
             if (c_u32 >= 0)

From d729aacb0924350f347588377d929e67e7e91df5 Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Fri, 29 Jan 2021 07:54:52 +0300
Subject: [PATCH 0359/1238] Fixed table function S3 `auto` compression mode.

---
 src/Storages/StorageS3.cpp                |  4 +--
 tests/integration/test_storage_s3/test.py | 34 +++++++++++++++--------
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 0af115dc0b5..cafd552978e 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -329,7 +329,7 @@ Pipe StorageS3::read(
             context,
             metadata_snapshot->getColumns(),
             max_block_size,
-            chooseCompressionMethod(uri.endpoint, compression_method),
+            chooseCompressionMethod(uri.key, compression_method),
             client,
             uri.bucket,
             key));
@@ -347,7 +347,7 @@ BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const StorageMet
         format_name,
         metadata_snapshot->getSampleBlock(),
         global_context,
-        chooseCompressionMethod(uri.endpoint, compression_method),
+        chooseCompressionMethod(uri.key, compression_method),
         client,
         uri.bucket,
         uri.key,
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index a97493b5def..1f445feb5a0 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -443,10 +443,14 @@ def test_infinite_redirect(cluster):
         assert exception_raised
 
 
-def test_storage_s3_get_gzip(cluster):
+@pytest.mark.parametrize("extension,method", [
+    ("bin", "gzip"),
+    ("gz", "auto")
+])
+def test_storage_s3_get_gzip(cluster, extension, method):
     bucket = cluster.minio_bucket
     instance = cluster.instances["dummy"]
-    filename = "test_get_gzip.bin"
+    filename = f"test_get_gzip.{extension}"
     name = "test_get_gzip"
     data = [
         "Sophia Intrieri,55",
@@ -473,13 +477,15 @@ def test_storage_s3_get_gzip(cluster):
     put_s3_file_content(cluster, bucket, filename, buf.getvalue())
 
     try:
-        run_query(instance, "CREATE TABLE {} (name String, id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV', 'gzip')".format(
-            name, cluster.minio_host, cluster.minio_port, bucket, filename))
+        run_query(instance, f"""CREATE TABLE {name} (name String, id UInt32) ENGINE = S3(
+                                    'http://{cluster.minio_host}:{cluster.minio_port}/{bucket}/{filename}',
+                                    'CSV',
+                                    '{method}')""")
 
         run_query(instance, "SELECT sum(id) FROM {}".format(name)).splitlines() == ["565"]
 
     finally:
-        run_query(instance, "DROP TABLE {}".format(name))
+        run_query(instance, f"DROP TABLE {name}")
 
 
 def test_storage_s3_put_uncompressed(cluster):
@@ -515,13 +521,17 @@ def test_storage_s3_put_uncompressed(cluster):
         uncompressed_content = get_s3_file_content(cluster, bucket, filename)
         assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 753
     finally:
-        run_query(instance, "DROP TABLE {}".format(name))
+        run_query(instance, f"DROP TABLE {name}")
 
 
-def test_storage_s3_put_gzip(cluster):
+@pytest.mark.parametrize("extension,method", [
+    ("bin", "gzip"),
+    ("gz", "auto")
+])
+def test_storage_s3_put_gzip(cluster, extension, method):
     bucket = cluster.minio_bucket
     instance = cluster.instances["dummy"]
-    filename = "test_put_gzip.bin"
+    filename = f"test_put_gzip.{extension}"
     name = "test_put_gzip"
     data = [
         "'Joseph Tomlinson',5",
@@ -541,8 +551,10 @@ def test_storage_s3_put_gzip(cluster):
         "'Yolanda Joseph',89"
     ]
     try:
-        run_query(instance, "CREATE TABLE {} (name String, id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV', 'gzip')".format(
-            name, cluster.minio_host, cluster.minio_port, bucket, filename))
+        run_query(instance, f"""CREATE TABLE {name} (name String, id UInt32) ENGINE = S3(
+                                    'http://{cluster.minio_host}:{cluster.minio_port}/{bucket}/{filename}',
+                                    'CSV',
+                                    '{method}')""")
 
         run_query(instance, "INSERT INTO {} VALUES ({})".format(name, "),(".join(data)))
 
@@ -553,4 +565,4 @@ def test_storage_s3_put_gzip(cluster):
         uncompressed_content = f.read().decode()
         assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 708
     finally:
-        run_query(instance, "DROP TABLE {}".format(name))
+        run_query(instance, f"DROP TABLE {name}")

From 9477f8a8b11bc42a0609140d9c85ab9aa9487e1b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 08:27:58 +0300
Subject: [PATCH 0360/1238] Revert "Remove old non-automated test"

This reverts commit 217d05443a654bb35b760c2d144aa3516d30fd97.
---
 src/Client/tests/CMakeLists.txt   |  2 ++
 src/Client/tests/test_connect.cpp | 59 +++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)
 create mode 100644 src/Client/tests/test_connect.cpp

diff --git a/src/Client/tests/CMakeLists.txt b/src/Client/tests/CMakeLists.txt
index e69de29bb2d..d952c006bb5 100644
--- a/src/Client/tests/CMakeLists.txt
+++ b/src/Client/tests/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_executable(test-connect test_connect.cpp)
+target_link_libraries (test-connect PRIVATE dbms)
diff --git a/src/Client/tests/test_connect.cpp b/src/Client/tests/test_connect.cpp
new file mode 100644
index 00000000000..50075cc24a6
--- /dev/null
+++ b/src/Client/tests/test_connect.cpp
@@ -0,0 +1,59 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <Poco/Net/StreamSocket.h>
+#include <Common/Exception.h>
+#include <IO/ReadHelpers.h>
+
+
+/** In a loop it connects to the server and immediately breaks the connection.
+  * Using the SO_LINGER option, we ensure that the connection is terminated by sending a RST packet (not FIN).
+  * This behavior causes a bug in the TCPServer implementation in the Poco library.
+  */
+int main(int argc, char ** argv)
+try
+{
+    for (size_t i = 0, num_iters = argc >= 2 ? DB::parse<size_t>(argv[1]) : 1; i < num_iters; ++i)
+    {
+        std::cerr << ".";
+
+        Poco::Net::SocketAddress address("localhost", 9000);
+
+        int fd = socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
+
+        if (fd < 0)
+            DB::throwFromErrno("Cannot create socket", 0);
+
+        linger linger_value;
+        linger_value.l_onoff = 1;
+        linger_value.l_linger = 0;
+
+        if (0 != setsockopt(fd, SOL_SOCKET, SO_LINGER, &linger_value, sizeof(linger_value)))
+            DB::throwFromErrno("Cannot set linger", 0);
+
+        try
+        {
+            int res = connect(fd, address.addr(), address.length());
+
+            if (res != 0 && errno != EINPROGRESS && errno != EWOULDBLOCK)
+            {
+                close(fd);
+                DB::throwFromErrno("Cannot connect", 0);
+            }
+
+            close(fd);
+        }
+        catch (const Poco::Exception & e)
+        {
+            std::cerr << e.displayText() << "\n";
+        }
+    }
+
+    std::cerr << "\n";
+}
+catch (const Poco::Exception & e)
+{
+    std::cerr << e.displayText() << "\n";
+}

From 76894ccb9feaeab127521b3d9a120f8289c65c0a Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 08:52:52 +0300
Subject: [PATCH 0361/1238] Update greatCircleDistance.cpp

---
 src/Functions/greatCircleDistance.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp
index ac66c0d42fe..801f8b3da7f 100644
--- a/src/Functions/greatCircleDistance.cpp
+++ b/src/Functions/greatCircleDistance.cpp
@@ -95,7 +95,7 @@ void geodistInit()
 
         sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude)));
 
-        sphere_metric_lut[i] = sqr(cosf(latitude));
+        sphere_metric_lut[i] = sqrf(cosf(latitude));
     }
 }
 

From 96d45ca8f89a7bfea26d73067e6ba1497f57858e Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Fri, 29 Jan 2021 09:11:43 +0300
Subject: [PATCH 0362/1238] Minor update

---
 docs/en/sql-reference/functions/array-functions.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index 48ef103368b..c453acfa39e 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -1311,7 +1311,7 @@ arrayMin([func,] arr)
 
 -   The minimum of function values (or the array minimum). 
 
-Type: matches the array type. 
+Type: matches the array elements type. 
 
 **Examples**
 
@@ -1366,7 +1366,7 @@ arrayMax([func,] arr)
 
 -   The maximum of function values (or the array maximum). 
 
-Type: matches the array type (or type of function result). 
+Type: matches the array elements type. 
 
 **Examples**
 
@@ -1421,7 +1421,7 @@ arraySum([func,] arr)
 
 -   The sum of the function values (or the array sum).
 
-Type: matches the array type. 
+Type: for decimal numbers in source array — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [Int64](sql-reference/data-types/int-uint.md), and for numeric signed — [UInt64](sql-reference/data-types/int-uint.md).
 
 **Examples**
 

From bfcb12c2e9bcde8fd52b24534c6a160359137382 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 09:13:43 +0300
Subject: [PATCH 0363/1238] Add test-connect tool

---
 src/Client/tests/test_connect.cpp | 102 +++++++++++++++++++++---------
 src/Interpreters/executeQuery.cpp |   1 -
 2 files changed, 71 insertions(+), 32 deletions(-)

diff --git a/src/Client/tests/test_connect.cpp b/src/Client/tests/test_connect.cpp
index 50075cc24a6..1259980f9a6 100644
--- a/src/Client/tests/test_connect.cpp
+++ b/src/Client/tests/test_connect.cpp
@@ -3,54 +3,94 @@
 #include <unistd.h>
 
 #include <iostream>
+#include <thread>
+#include <atomic>
 #include <Poco/Net/StreamSocket.h>
 #include <Common/Exception.h>
+#include <Common/Stopwatch.h>
 #include <IO/ReadHelpers.h>
 
 
 /** In a loop it connects to the server and immediately breaks the connection.
   * Using the SO_LINGER option, we ensure that the connection is terminated by sending a RST packet (not FIN).
-  * This behavior causes a bug in the TCPServer implementation in the Poco library.
+  * Long time ago this behavior caused a bug in the TCPServer implementation in the Poco library.
   */
 int main(int argc, char ** argv)
 try
 {
-    for (size_t i = 0, num_iters = argc >= 2 ? DB::parse<size_t>(argv[1]) : 1; i < num_iters; ++i)
+    size_t num_iterations = 1;
+    size_t num_threads = 1;
+    std::string host = "localhost";
+    uint16_t port = 9000;
+
+    if (argc >= 2)
+        num_iterations = DB::parse<size_t>(argv[1]);
+
+    if (argc >= 3)
+        num_threads = DB::parse<size_t>(argv[2]);
+
+    if (argc >= 4)
+        host = argv[3];
+
+    if (argc >= 5)
+        port = DB::parse<uint16_t>(argv[4]);
+
+    std::atomic_bool cancel{false};
+    std::vector<std::thread> threads(num_threads);
+    for (auto & thread : threads)
     {
-        std::cerr << ".";
-
-        Poco::Net::SocketAddress address("localhost", 9000);
-
-        int fd = socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
-
-        if (fd < 0)
-            DB::throwFromErrno("Cannot create socket", 0);
-
-        linger linger_value;
-        linger_value.l_onoff = 1;
-        linger_value.l_linger = 0;
-
-        if (0 != setsockopt(fd, SOL_SOCKET, SO_LINGER, &linger_value, sizeof(linger_value)))
-            DB::throwFromErrno("Cannot set linger", 0);
-
-        try
+        thread = std::thread([&]
         {
-            int res = connect(fd, address.addr(), address.length());
-
-            if (res != 0 && errno != EINPROGRESS && errno != EWOULDBLOCK)
+            for (size_t i = 0; i < num_iterations && !cancel.load(std::memory_order_relaxed); ++i)
             {
-                close(fd);
-                DB::throwFromErrno("Cannot connect", 0);
-            }
+                std::cerr << ".";
 
-            close(fd);
-        }
-        catch (const Poco::Exception & e)
-        {
-            std::cerr << e.displayText() << "\n";
-        }
+                Poco::Net::SocketAddress address(host, port);
+
+                int fd = socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
+
+                if (fd < 0)
+                    DB::throwFromErrno("Cannot create socket", 0);
+
+                linger linger_value;
+                linger_value.l_onoff = 1;
+                linger_value.l_linger = 0;
+
+                if (0 != setsockopt(fd, SOL_SOCKET, SO_LINGER, &linger_value, sizeof(linger_value)))
+                    DB::throwFromErrno("Cannot set linger", 0);
+
+                try
+                {
+                    Stopwatch watch;
+
+                    int res = connect(fd, address.addr(), address.length());
+
+                    if (res != 0 && errno != EINPROGRESS && errno != EWOULDBLOCK)
+                    {
+                        close(fd);
+                        DB::throwFromErrno("Cannot connect", 0);
+                    }
+
+                    close(fd);
+
+                    if (watch.elapsedSeconds() > 0.1)
+                    {
+                        std::cerr << watch.elapsedSeconds() << "\n";
+                        cancel = true;
+                        break;
+                    }
+                }
+                catch (const Poco::Exception & e)
+                {
+                    std::cerr << e.displayText() << "\n";
+                }
+            }
+        });
     }
 
+    for (auto & thread : threads)
+        thread.join();
+
     std::cerr << "\n";
 }
 catch (const Poco::Exception & e)
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 020e5af365a..50e891a3524 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -51,7 +51,6 @@
 #include <Interpreters/Context.h>
 #include <Common/ProfileEvents.h>
 
-#include <Interpreters/DNSCacheUpdater.h>
 #include <Common/SensitiveDataMasker.h>
 
 #include <Processors/Transforms/LimitsCheckingTransform.h>

From 9f71596809b5e045c3cbba6a412a64e2a24d0c96 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Fri, 29 Jan 2021 09:48:53 +0300
Subject: [PATCH 0364/1238] Added translation

---
 .../functions/array-functions.md              |   2 +-
 .../functions/array-functions.md              | 220 +++++++++++++++++-
 2 files changed, 218 insertions(+), 4 deletions(-)

diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index c453acfa39e..ffbdbec535c 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -1421,7 +1421,7 @@ arraySum([func,] arr)
 
 -   The sum of the function values (or the array sum).
 
-Type: for decimal numbers in source array — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [Int64](sql-reference/data-types/int-uint.md), and for numeric signed — [UInt64](sql-reference/data-types/int-uint.md).
+Type: for decimal numbers in source array — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](sql-reference/data-types/int-uint.md).
 
 **Examples**
 
diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md
index 015d14b9de5..ad706e33b1d 100644
--- a/docs/ru/sql-reference/functions/array-functions.md
+++ b/docs/ru/sql-reference/functions/array-functions.md
@@ -1135,11 +1135,225 @@ SELECT
 
 Функция `arrayFirstIndex` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
 
-## arraySum(\[func,\] arr1, …) {#array-sum}
+## arrayMin {#array-min}
 
-Возвращает сумму значений функции `func`. Если функция не указана - просто возвращает сумму элементов массива.
+Возвращает значение минимального элемента в исходном массиве. 
 
-Функция `arraySum` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию.
+Если передана функция `func`, возвращается минимум из элементов массива, преобразованных этой функцией.
+
+Функция `arrayMin` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию.
+
+**Синтаксис**
+
+```sql
+arrayMin([func,] arr)
+```
+
+**Parameters**
+
+-   `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — массив. [Array](../../sql-reference/data-types/array.md).
+
+**Возвращаемое значение**
+
+-   Минимальное значение функции (или минимальный элемент массива).
+
+Тип: соответствует типу элементов массива.
+
+**Примеры**
+
+Запрос:
+
+```sql
+SELECT arrayMin([1, 2, 4]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│   1 │
+└─────┘
+```
+
+Запрос:
+
+```
+SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│  -4 │
+└─────┘
+```
+
+## arrayMax {#array-max}
+
+Возвращает значение максимального элемента в исходном массиве. 
+
+Если передана функция `func`, возвращается максимум из элементов массива, преобразованных этой функцией.
+
+Функция `arrayMax` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию.
+
+**Синтаксис**
+
+```sql
+arrayMax([func,] arr)
+```
+
+**Параметры**
+
+-   `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — массив. [Array](../../sql-reference/data-types/array.md).
+
+**Возвращаемое значение**
+
+-   Максимальное значение функции (или максимальный элемент массива).
+
+Тип: соответствует типу элементов массива.
+
+**Примеры**
+
+Запрос:
+
+```sql
+SELECT arrayMax([1, 2, 4]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│   4 │
+└─────┘
+```
+
+Запрос:
+
+```sql
+SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│  -1 │
+└─────┘
+```
+
+## arraySum {#array-sum}
+
+Возвращает сумму элементов в исходном массиве. 
+
+Если передана функция `func`, возвращается сумма элементов массива, преобразованных этой функцией.
+
+Функция `arraySum` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию.
+
+**Синтаксис**
+
+```sql
+arraySum([func,] arr)
+```
+
+**Параметры**
+
+-   `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — массив. [Array](../../sql-reference/data-types/array.md).   
+
+**Возвращаемое значение**
+
+-   Сумма значений функции (или сумма элементов массива).
+
+Тип: для Decimal чисел в исходном массиве — [Decimal128](../../sql-reference/data-types/decimal.md), для чисел с плавающей точкой — [Float64](../../sql-reference/data-types/float.md), для беззнаковых целых чисел — [UInt64](sql-reference/data-types/int-uint.md), для целых чисел со знаком — [Int64](sql-reference/data-types/int-uint.md).
+
+**Примеры**
+
+Запрос:
+
+```sql
+SELECT arraySum([2, 3]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│   5 │
+└─────┘
+```
+
+Запрос:
+
+```sql
+SELECT arraySum(x -> x*x, [2, 3]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│  13 │
+└─────┘
+```
+
+## arrayAvg {#array-avg}
+
+Возвращает среднее значение элементов в исходном массиве. 
+
+Если передана функция `func`, возвращается среднее значение элементов массива, преобразованных этой функцией.
+
+Функция `arrayAvg` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию.
+
+**Синтаксис**
+
+```sql
+arrayAvg([func,] arr)
+```
+
+**Параметры**
+
+-   `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
+-   `arr` — массив. [Array](../../sql-reference/data-types/array.md).   
+
+**Возвращаемое значение**
+
+-   Среднее значение функции (или среднее значение элементов массива).
+
+Тип: [Float64](../../sql-reference/data-types/float.md).
+
+**Примеры**
+
+Запрос:
+
+```sql
+SELECT arrayAvg([1, 2, 4]) AS res;
+```
+
+Результат:
+
+```text
+┌────────────────res─┐
+│ 2.3333333333333335 │
+└────────────────────┘
+```
+
+Запрос:
+
+```sql
+SELECT arrayAvg(x -> (x * x), [2, 4]) AS res;
+```
+
+Результат:
+
+```text
+┌─res─┐
+│  10 │
+└─────┘
+```
 
 ## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
 

From e89c98044e1b1ff069d9dce20ac405b4b163ae9f Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Fri, 29 Jan 2021 09:58:31 +0300
Subject: [PATCH 0365/1238] fixed links

---
 docs/en/sql-reference/functions/array-functions.md | 2 +-
 docs/ru/sql-reference/functions/array-functions.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index ffbdbec535c..57f2f4e95ed 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -1421,7 +1421,7 @@ arraySum([func,] arr)
 
 -   The sum of the function values (or the array sum).
 
-Type: for decimal numbers in source array — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](sql-reference/data-types/int-uint.md).
+Type: for decimal numbers in source array — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md
index ad706e33b1d..2525787e7bf 100644
--- a/docs/ru/sql-reference/functions/array-functions.md
+++ b/docs/ru/sql-reference/functions/array-functions.md
@@ -1268,7 +1268,7 @@ arraySum([func,] arr)
 
 -   Сумма значений функции (или сумма элементов массива).
 
-Тип: для Decimal чисел в исходном массиве — [Decimal128](../../sql-reference/data-types/decimal.md), для чисел с плавающей точкой — [Float64](../../sql-reference/data-types/float.md), для беззнаковых целых чисел — [UInt64](sql-reference/data-types/int-uint.md), для целых чисел со знаком — [Int64](sql-reference/data-types/int-uint.md).
+Тип: для Decimal чисел в исходном массиве — [Decimal128](../../sql-reference/data-types/decimal.md), для чисел с плавающей точкой — [Float64](../../sql-reference/data-types/float.md), для беззнаковых целых чисел — [UInt64](../../sql-reference/data-types/int-uint.md), для целых чисел со знаком — [Int64](../../sql-reference/data-types/int-uint.md).
 
 **Примеры**
 

From 1bd95e474ca781a587513eda6f61d276391377f5 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Fri, 29 Jan 2021 10:19:01 +0300
Subject: [PATCH 0366/1238] Minor fix

---
 docs/en/sql-reference/functions/array-functions.md | 2 +-
 docs/ru/sql-reference/functions/array-functions.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index 57f2f4e95ed..be6440bbe9c 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -1331,7 +1331,7 @@ Result:
 
 Query:
 
-```
+```sql
 SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res;
 ```
 
diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md
index 2525787e7bf..3bba6f799c3 100644
--- a/docs/ru/sql-reference/functions/array-functions.md
+++ b/docs/ru/sql-reference/functions/array-functions.md
@@ -1178,7 +1178,7 @@ SELECT arrayMin([1, 2, 4]) AS res;
 
 Запрос:
 
-```
+```sql
 SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res;
 ```
 

From 5a92e633a12177168cee7e559ac9e82bec2dca90 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 10:37:46 +0300
Subject: [PATCH 0367/1238] Remove useless headers

---
 src/Core/MySQL/MySQLClient.cpp                | 2 ++
 src/Core/MySQL/MySQLClient.h                  | 1 -
 src/Interpreters/Context.cpp                  | 1 -
 src/Storages/System/StorageSystemClusters.cpp | 1 -
 4 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Core/MySQL/MySQLClient.cpp b/src/Core/MySQL/MySQLClient.cpp
index f65fbe62274..e41b4128738 100644
--- a/src/Core/MySQL/MySQLClient.cpp
+++ b/src/Core/MySQL/MySQLClient.cpp
@@ -6,8 +6,10 @@
 #include <Core/MySQL/PacketsProtocolText.h>
 #include <Core/MySQL/PacketsReplication.h>
 #include <Core/MySQL/MySQLReplication.h>
+#include <Common/DNSResolver.h>
 #include <Poco/String.h>
 
+
 namespace DB
 {
 using namespace Generic;
diff --git a/src/Core/MySQL/MySQLClient.h b/src/Core/MySQL/MySQLClient.h
index 5835e980149..e503c985584 100644
--- a/src/Core/MySQL/MySQLClient.h
+++ b/src/Core/MySQL/MySQLClient.h
@@ -7,7 +7,6 @@
 #include <IO/WriteHelpers.h>
 #include <Poco/Net/NetException.h>
 #include <Poco/Net/StreamSocket.h>
-#include <Common/DNSResolver.h>
 #include <Common/Exception.h>
 #include <Common/NetException.h>
 #include <Core/MySQL/IMySQLWritePacket.h>
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 69e09b36e64..d7fa4da44cd 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -50,7 +50,6 @@
 #include <Interpreters/SystemLog.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/DDLWorker.h>
-#include <Common/DNSResolver.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/UncompressedCache.h>
 #include <Parsers/ASTCreateQuery.h>
diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp
index 83d165f54f7..ae8bcca2804 100644
--- a/src/Storages/System/StorageSystemClusters.cpp
+++ b/src/Storages/System/StorageSystemClusters.cpp
@@ -1,4 +1,3 @@
-#include <Common/DNSResolver.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Interpreters/Cluster.h>

From 031132038b1e89eeb42e1dfb80cb4e4b8a00e6a5 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Fri, 29 Jan 2021 10:37:57 +0300
Subject: [PATCH 0368/1238] fix filtering by uint8 greater than 127

---
 src/Columns/ColumnFixedString.cpp             |  3 ++-
 src/Columns/ColumnVector.cpp                  |  3 ++-
 src/Columns/ColumnsCommon.cpp                 | 25 +++++++++++--------
 src/Common/memcmpSmall.h                      |  3 ++-
 .../MergeTree/MergeTreeRangeReader.cpp        |  9 ++++---
 .../01674_filter_by_uint8.reference           |  8 ++++++
 .../0_stateless/01674_filter_by_uint8.sql     | 14 +++++++++++
 7 files changed, 48 insertions(+), 17 deletions(-)
 create mode 100644 tests/queries/0_stateless/01674_filter_by_uint8.reference
 create mode 100644 tests/queries/0_stateless/01674_filter_by_uint8.sql

diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp
index a20e5d3ca0d..55e387ff2ee 100644
--- a/src/Columns/ColumnFixedString.cpp
+++ b/src/Columns/ColumnFixedString.cpp
@@ -289,7 +289,8 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
 
     while (filt_pos < filt_end_sse)
     {
-        int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
+        UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
+        mask = ~mask;
 
         if (0 == mask)
         {
diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index fcbcc63731a..a075c10a8a9 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -356,7 +356,8 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
 
     while (filt_pos < filt_end_sse)
     {
-        int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
+        UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
+        mask = ~mask;
 
         if (0 == mask)
         {
diff --git a/src/Columns/ColumnsCommon.cpp b/src/Columns/ColumnsCommon.cpp
index f3f10a25df3..3c356afa4da 100644
--- a/src/Columns/ColumnsCommon.cpp
+++ b/src/Columns/ColumnsCommon.cpp
@@ -17,13 +17,17 @@ namespace DB
 static UInt64 toBits64(const Int8 * bytes64)
 {
     static const __m128i zero16 = _mm_setzero_si128();
-    return static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
-        | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16)))
-           << 16)
-        | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16)))
-           << 32)
-        | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16)))
-           << 48);
+    UInt64 res =
+        static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
+            _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
+        | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
+            _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16))) << 16)
+        | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
+            _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16))) << 32)
+        | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
+            _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16))) << 48);
+
+    return ~res;
 }
 #endif
 
@@ -49,7 +53,7 @@ size_t countBytesInFilter(const UInt8 * filt, size_t sz)
 #endif
 
     for (; pos < end; ++pos)
-        count += *pos > 0;
+        count += *pos != 0;
 
     return count;
 }
@@ -82,7 +86,7 @@ size_t countBytesInFilterWithNull(const IColumn::Filter & filt, const UInt8 * nu
 #endif
 
     for (; pos < end; ++pos)
-        count += (*pos & ~*pos2) > 0;
+        count += (*pos & ~*pos2) != 0;
 
     return count;
 }
@@ -232,9 +236,10 @@ namespace
 
         while (filt_pos < filt_end_aligned)
         {
-            const auto mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
+            UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
                 _mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)),
                 zero_vec));
+            mask = ~mask;
 
             if (mask == 0)
             {
diff --git a/src/Common/memcmpSmall.h b/src/Common/memcmpSmall.h
index bafc08a9cbe..db8641cb44d 100644
--- a/src/Common/memcmpSmall.h
+++ b/src/Common/memcmpSmall.h
@@ -120,9 +120,10 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
 
     for (size_t offset = min_size; offset < max_size; offset += 16)
     {
-        uint16_t mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
+        uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
             _mm_loadu_si128(reinterpret_cast<const __m128i *>(longest + offset)),
             zero16));
+        mask = ~mask;
 
         if (mask)
         {
diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
index c13146bd35c..4e5e7e6f946 100644
--- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@@ -414,18 +414,19 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con
         end -= 64;
         const auto * pos = end;
         UInt64 val =
-                static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos)),
                         zero16)))
-                | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 16)),
                         zero16))) << 16u)
-                | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 32)),
                         zero16))) << 32u)
-                | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 48)),
                         zero16))) << 48u);
+        val = ~val;
         if (val == 0)
             count += 64;
         else
diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.reference b/tests/queries/0_stateless/01674_filter_by_uint8.reference
new file mode 100644
index 00000000000..6b522898280
--- /dev/null
+++ b/tests/queries/0_stateless/01674_filter_by_uint8.reference
@@ -0,0 +1,8 @@
+0
+0
+255
+1	['foo','bar']	1	1
+2	['foo','bar']	2	1
+3	['foo','bar']	3	1
+4	['foo','bar']	4	1
+5	['foo','bar']	5	1
diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.sql b/tests/queries/0_stateless/01674_filter_by_uint8.sql
new file mode 100644
index 00000000000..960153d9c5a
--- /dev/null
+++ b/tests/queries/0_stateless/01674_filter_by_uint8.sql
@@ -0,0 +1,14 @@
+-- ORDER BY is to trigger comparison at uninitialized memory after bad filtering.
+SELECT ignore(number) FROM numbers(256) ORDER BY arrayFilter(x -> materialize(255), materialize([257])) LIMIT 1;
+SELECT ignore(number) FROM numbers(256) ORDER BY arrayFilter(x -> materialize(255), materialize(['257'])) LIMIT 1;
+
+SELECT count() FROM numbers(256) WHERE toUInt8(number);
+
+DROP TABLE IF EXISTS t_filter;
+CREATE TABLE t_filter(s String, a Array(FixedString(3)), u UInt64, f UInt8)
+ENGINE = MergeTree ORDER BY u;
+
+INSERT INTO t_filter SELECT toString(number), ['foo', 'bar'], number, toUInt8(number) FROM numbers(1000);
+SELECT * FROM t_filter WHERE f LIMIT 5;
+
+DROP TABLE IF EXISTS t_filter;

From f7fffe173c3487cba20f19f67434d99309a38037 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 29 Jan 2021 10:57:22 +0300
Subject: [PATCH 0369/1238] Set charset to utf8mb4 in mariadb-connector

---
 base/mysqlxx/Connection.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/base/mysqlxx/Connection.cpp b/base/mysqlxx/Connection.cpp
index 55757008562..8a15115cb06 100644
--- a/base/mysqlxx/Connection.cpp
+++ b/base/mysqlxx/Connection.cpp
@@ -116,8 +116,8 @@ void Connection::connect(const char* db,
     if (!mysql_real_connect(driver.get(), server, user, password, db, port, ifNotEmpty(socket), driver->client_flag))
         throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
 
-    /// Sets UTF-8 as default encoding.
-    if (mysql_set_character_set(driver.get(), "UTF8"))
+    /// Sets UTF-8 as default encoding. See https://mariadb.com/kb/en/mysql_set_character_set/
+    if (mysql_set_character_set(driver.get(), "utf8mb4"))
         throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
 
     is_connected = true;

From c4c9ab5d7d372acef2d09fdb3eedec794b6017d0 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 29 Jan 2021 12:27:35 +0300
Subject: [PATCH 0370/1238] Fix bad test

---
 .../0_stateless/01593_concurrent_alter_mutations_kill.sh        | 2 +-
 .../01593_concurrent_alter_mutations_kill_many_replicas.sh      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh
index 7f111538a06..6ae103bdf6e 100755
--- a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh
+++ b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh
@@ -28,7 +28,7 @@ function kill_mutation_thread
         # find any mutation and kill it
         mutation_id=$($CLICKHOUSE_CLIENT --query "SELECT mutation_id FROM system.mutations WHERE is_done=0 and database='${CLICKHOUSE_DATABASE}' and table='concurrent_mutate_kill' LIMIT 1")
         if [ ! -z "$mutation_id" ]; then
-            $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='$mutation_id'" 1> /dev/null
+            $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='$mutation_id' and table='concurrent_mutate_kill' and database='${CLICKHOUSE_DATABASE}'" 1> /dev/null
             sleep 1
         fi
     done
diff --git a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas.sh b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas.sh
index 60e2adb4204..bfa68328c06 100755
--- a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas.sh
+++ b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas.sh
@@ -40,7 +40,7 @@ function kill_mutation_thread
         # find any mutation and kill it
         mutation_id=$($CLICKHOUSE_CLIENT --query "SELECT mutation_id FROM system.mutations WHERE is_done = 0 and table like 'concurrent_kill_%' and database='${CLICKHOUSE_DATABASE}' LIMIT 1")
         if [ ! -z "$mutation_id" ]; then
-            $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='$mutation_id'" 1> /dev/null
+            $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='$mutation_id' and table like 'concurrent_kill_%' and database='${CLICKHOUSE_DATABASE}'" 1> /dev/null
             sleep 1
         fi
     done

From b1a02481dbcf7de420eab47cf738cfc15476cd35 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Fri, 29 Jan 2021 12:51:47 +0300
Subject: [PATCH 0371/1238] add test

---
 .../0_stateless/00945_bloom_filter_index.reference     |  8 ++++++++
 tests/queries/0_stateless/00945_bloom_filter_index.sql | 10 ++++++++++
 2 files changed, 18 insertions(+)

diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.reference b/tests/queries/0_stateless/00945_bloom_filter_index.reference
index 184aafdd568..c0c2254648e 100644
--- a/tests/queries/0_stateless/00945_bloom_filter_index.reference
+++ b/tests/queries/0_stateless/00945_bloom_filter_index.reference
@@ -211,6 +211,14 @@
 2
 1
 1
+2
+2
+2
+2
+1
+2
+1
+2
 1	value1
 1	value2
 2	value3
diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.sql b/tests/queries/0_stateless/00945_bloom_filter_index.sql
index ad9c807fc5a..f45c4c04290 100644
--- a/tests/queries/0_stateless/00945_bloom_filter_index.sql
+++ b/tests/queries/0_stateless/00945_bloom_filter_index.sql
@@ -348,6 +348,16 @@ SELECT id FROM test_bf_indexOf WHERE 1 <= indexOf(ary, 'value1') ORDER BY id FOR
 SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') >= 2 ORDER BY id FORMAT TSV;
 SELECT id FROM test_bf_indexOf WHERE 2 <= indexOf(ary, 'value1') ORDER BY id FORMAT TSV;
 
+SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') = toDecimal32(0, 2) ORDER BY id FORMAT TSV;
+SELECT id FROM test_bf_indexOf WHERE toDecimal128(0, 2) = indexOf(ary, 'value1') ORDER BY id FORMAT TSV;
+SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') = '0' ORDER BY id FORMAT TSV;
+SELECT id FROM test_bf_indexOf WHERE '0' = indexOf(ary, 'value1') ORDER BY id FORMAT TSV;
+
+SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') > toDecimal32(0, 2) ORDER BY id FORMAT TSV;
+SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') < toDecimal128(1, 2) ORDER BY id FORMAT TSV;
+SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') > '0' ORDER BY id FORMAT TSV;
+SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') < '1' ORDER BY id FORMAT TSV;
+
 SELECT id, ary[indexOf(ary, 'value1')] FROM test_bf_indexOf WHERE ary[indexOf(ary, 'value1')] = 'value1' ORDER BY id FORMAT TSV;
 SELECT id, ary[indexOf(ary, 'value2')] FROM test_bf_indexOf WHERE ary[indexOf(ary, 'value2')] = 'value2' ORDER BY id FORMAT TSV;
 SELECT id, ary[indexOf(ary, 'value3')] FROM test_bf_indexOf WHERE ary[indexOf(ary, 'value3')] = 'value3' ORDER BY id FORMAT TSV;

From 9f9b7fc3e26da5118a74aaddc7dcae038b7e9d46 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 29 Jan 2021 13:05:21 +0300
Subject: [PATCH 0372/1238] Update ya.make

---
 src/Processors/ya.make | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index f05a9f0bdba..1c6f0cf1f7c 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -119,6 +119,7 @@ SRCS(
     QueryPlan/Optimizations/liftUpArrayJoin.cpp
     QueryPlan/Optimizations/limitPushDown.cpp
     QueryPlan/Optimizations/mergeExpressions.cpp
+    QueryPlan/Optimizations/optimizeTree.cpp
     QueryPlan/Optimizations/splitFilter.cpp
     QueryPlan/PartialSortingStep.cpp
     QueryPlan/QueryPlan.cpp

From 0ee05d34fc487d044339337a6c96a8440be7ce97 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <agololobov@gmail.com>
Date: Fri, 29 Jan 2021 14:54:54 +0300
Subject: [PATCH 0373/1238] Fix dependency on ODBC for Yandex internal build

---
 src/Dictionaries/XDBCDictionarySource.cpp | 2 +-
 src/Dictionaries/ya.make                  | 5 ++++-
 src/Dictionaries/ya.make.in               | 5 ++++-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp
index aa27f4efc25..3615f72605f 100644
--- a/src/Dictionaries/XDBCDictionarySource.cpp
+++ b/src/Dictionaries/XDBCDictionarySource.cpp
@@ -21,7 +21,7 @@
 #include "registerDictionaries.h"
 
 #if USE_ODBC
-#    include <Poco/Data/ODBC/Connector.h>
+#    include <Poco/Data/ODBC/Connector.h> // Y_IGNORE
 #endif
 
 namespace DB
diff --git a/src/Dictionaries/ya.make b/src/Dictionaries/ya.make
index 19a0f5008b8..1e658430e27 100644
--- a/src/Dictionaries/ya.make
+++ b/src/Dictionaries/ya.make
@@ -6,12 +6,15 @@ LIBRARY()
 PEERDIR(
     clickhouse/src/Common
     contrib/libs/poco/Data
-    contrib/libs/poco/Data/ODBC
     contrib/libs/poco/MongoDB
     contrib/libs/poco/Redis
     contrib/libs/sparsehash
 )
 
+IF (USE_ODBC)
+    PEERDIR(contrib/libs/poco/Data/ODBC)
+ENDIF ()
+
 NO_COMPILER_WARNINGS()
 
 
diff --git a/src/Dictionaries/ya.make.in b/src/Dictionaries/ya.make.in
index 5df5803e7f4..e52b106d034 100644
--- a/src/Dictionaries/ya.make.in
+++ b/src/Dictionaries/ya.make.in
@@ -5,12 +5,15 @@ LIBRARY()
 PEERDIR(
     clickhouse/src/Common
     contrib/libs/poco/Data
-    contrib/libs/poco/Data/ODBC
     contrib/libs/poco/MongoDB
     contrib/libs/poco/Redis
     contrib/libs/sparsehash
 )
 
+IF (USE_ODBC)
+    PEERDIR(contrib/libs/poco/Data/ODBC)
+ENDIF ()
+
 NO_COMPILER_WARNINGS()
 
 
From b602f259f5e2c5e9c08de8ab02a677eecb3fa1cb Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 29 Jan 2021 15:24:52 +0300
Subject: [PATCH 0374/1238] Fix race in NuRaft

---
 .gitmodules    | 2 +-
 contrib/NuRaft | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index ecccf0633e2..ecefbc32ae6 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -220,4 +220,4 @@
 	url = https://github.com/ClickHouse-Extras/boringssl.git
 [submodule "contrib/NuRaft"]
 	path = contrib/NuRaft
-	url = https://github.com/eBay/NuRaft.git
+	url = https://github.com/ClickHouse-Extras/NuRaft.git
diff --git a/contrib/NuRaft b/contrib/NuRaft
index 410bd149da8..6b6aedebcf1 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit 410bd149da84cdde60b4436b02b738749f4e87e1
+Subproject commit 6b6aedebcf15ec362c4b6a1390c0b0802bb3e2c2

From 6781c9f61da6b601969bf059162e623b07324b09 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 29 Jan 2021 15:34:53 +0300
Subject: [PATCH 0375/1238] One more fix

---
 contrib/NuRaft | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/NuRaft b/contrib/NuRaft
index 6b6aedebcf1..644c264252a 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit 6b6aedebcf15ec362c4b6a1390c0b0802bb3e2c2
+Subproject commit 644c264252aae91d9ad58366b086641bf8314008

From bac8cc55d2c48404a4b6b85ca09d15114620ef52 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 29 Jan 2021 15:39:04 +0300
Subject: [PATCH 0376/1238] Now we answer from follower nodes

---
 src/Coordination/NuKeeperServer.cpp            |  3 ++-
 .../TestKeeperStorageDispatcher.cpp            |  9 +++------
 src/Server/TestKeeperTCPHandler.cpp            | 18 +++++++-----------
 src/Server/TestKeeperTCPHandler.h              |  2 +-
 4 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 8995b51a13b..bcc348d1be3 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -47,6 +47,7 @@ void NuKeeperServer::startup()
     params.reserved_log_items_ = 5000;
     params.snapshot_distance_ = 5000;
     params.client_req_timeout_ = 10000;
+    params.auto_forwarding_ = true;
     params.return_method_ = nuraft::raft_params::blocking;
 
     raft_instance = launcher.init(
@@ -146,7 +147,7 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(n
 
 TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKeeperStorage::RequestsForSessions & requests)
 {
-    if (isLeader() && requests.size() == 1 && requests[0].request->isReadRequest())
+    if (raft_instance->is_leader_alive() && requests.size() == 1 && requests[0].request->isReadRequest())
     {
         return state_machine->processReadRequest(requests[0]);
     }
diff --git a/src/Coordination/TestKeeperStorageDispatcher.cpp b/src/Coordination/TestKeeperStorageDispatcher.cpp
index 685fa58f8ad..d5682e1688b 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.cpp
+++ b/src/Coordination/TestKeeperStorageDispatcher.cpp
@@ -175,12 +175,9 @@ void TestKeeperStorageDispatcher::shutdown()
         if (server)
         {
             TestKeeperStorage::RequestsForSessions expired_requests;
-            if (server->isLeader())
-            {
-                TestKeeperStorage::RequestForSession request;
-                while (requests_queue.tryPop(request))
-                    expired_requests.push_back(TestKeeperStorage::RequestForSession{request});
-            }
+            TestKeeperStorage::RequestForSession request;
+            while (requests_queue.tryPop(request))
+                expired_requests.push_back(TestKeeperStorage::RequestForSession{request});
 
             auto expired_responses = server->shutdown(expired_requests);
 
diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp
index 04e5c6ece1d..5e5ba19f1a6 100644
--- a/src/Server/TestKeeperTCPHandler.cpp
+++ b/src/Server/TestKeeperTCPHandler.cpp
@@ -232,14 +232,10 @@ TestKeeperTCPHandler::TestKeeperTCPHandler(IServer & server_, const Poco::Net::S
 {
 }
 
-void TestKeeperTCPHandler::sendHandshake(bool is_leader)
+void TestKeeperTCPHandler::sendHandshake()
 {
     Coordination::write(Coordination::SERVER_HANDSHAKE_LENGTH, *out);
-    if (is_leader)
-        Coordination::write(Coordination::ZOOKEEPER_PROTOCOL_VERSION, *out);
-    else /// Specially ignore connections if we are not leader, client will throw exception
-        Coordination::write(42, *out);
-
+    Coordination::write(Coordination::ZOOKEEPER_PROTOCOL_VERSION, *out);
     Coordination::write(Coordination::DEFAULT_SESSION_TIMEOUT_MS, *out);
     Coordination::write(session_id, *out);
     std::array<char, Coordination::PASSWORD_LENGTH> passwd{};
@@ -319,18 +315,18 @@ void TestKeeperTCPHandler::runImpl()
         return;
     }
 
-    if (test_keeper_storage_dispatcher->isLeader())
+    try
     {
         session_id = test_keeper_storage_dispatcher->getSessionID();
-        sendHandshake(true);
     }
-    else
+    catch (const Exception & e)
     {
-        sendHandshake(false);
-        LOG_WARNING(log, "Ignoring connection because we are not leader");
+        LOG_WARNING(log, "Cannot receive session id {}", e.displayText());
         return;
     }
 
+    sendHandshake();
+
     auto response_fd = poll_wrapper->getResponseFD();
     auto response_callback = [this, response_fd] (const Coordination::ZooKeeperResponsePtr & response)
     {
diff --git a/src/Server/TestKeeperTCPHandler.h b/src/Server/TestKeeperTCPHandler.h
index bb74513afce..e7372e8dd82 100644
--- a/src/Server/TestKeeperTCPHandler.h
+++ b/src/Server/TestKeeperTCPHandler.h
@@ -45,7 +45,7 @@ private:
 
     void runImpl();
 
-    void sendHandshake(bool is_leader);
+    void sendHandshake();
     void receiveHandshake();
 
     std::pair<Coordination::OpNum, Coordination::XID> receiveRequest();

From 0943ecb37b94df3c1c90cb1bb977ba5c84eab144 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 29 Jan 2021 15:46:14 +0300
Subject: [PATCH 0377/1238] More sequential tests

---
 tests/queries/skip_list.json | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 96da47ee9c2..b25b96b6af9 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -557,6 +557,8 @@
         "01541_max_memory_usage_for_user",
         "01542_dictionary_load_exception_race",
         "01575_disable_detach_table_of_dictionary",
+        "01593_concurrent_alter_mutations_kill",
+        "01593_concurrent_alter_mutations_kill_many_replicas",
         "01600_count_of_parts_metrics", // tests global system metrics
         "01600_detach_permanently",
         "01600_log_queries_with_extensive_info",

From 626a23afc7ff71eabfb89750d2c77307040d2e23 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 29 Jan 2021 15:51:10 +0300
Subject: [PATCH 0378/1238] Fix option

---
 docker/test/stateless/run.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index fb510a87fcd..575be721a54 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -53,10 +53,12 @@ function run_tests()
     if [ "$NUM_TRIES" -gt "1" ]; then
         ADDITIONAL_OPTIONS+=('--skip')
         ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip')
+        ADDITIONAL_OPTIONS+=('--jobs')
+        ADDITIONAL_OPTIONS+=('4')
     fi
 
     clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
-            --test-runs "$NUM_TRIES" --jobs 4 \
+            --test-runs "$NUM_TRIES" \
             "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
         | ts '%Y-%m-%d %H:%M:%S' \
         | tee -a test_output/test_result.txt

From 941f60a7b50973fb2a773b51c586bd521429d38f Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 29 Jan 2021 16:17:17 +0300
Subject: [PATCH 0379/1238] Temporary retrun dicttoxml to integration tests
 runner

---
 docker/test/integration/runner/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index 56abf1122b2..f353931f0a0 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -63,6 +63,7 @@ RUN python3 -m pip install \
     cassandra-driver \
     confluent-kafka \
     dict2xml \
+    dicttoxml \
     docker \
     docker-compose==1.22.0 \
     grpcio \

From 71f4acd48b6a66aaf5ca1f7191ad5af3513aab66 Mon Sep 17 00:00:00 2001
From: Pavel Kruglov <avogar@sandbox-633380738>
Date: Fri, 29 Jan 2021 17:30:14 +0300
Subject: [PATCH 0380/1238] Use one pool for lonely parts, update tests

---
 .../MergeTree/MergeTreeDataSelectExecutor.cpp | 91 +++++++++++++------
 .../optimized_select_final_one_part.xml       |  2 +-
 ...e_across_partitions_select_final.reference |  1 +
 ...t_merge_across_partitions_select_final.sql |  1 +
 4 files changed, 67 insertions(+), 28 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index e2aa33d2be3..cf8799052df 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -1335,11 +1335,12 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
         data_settings->index_granularity,
         index_granularity_bytes);
 
-    const size_t min_marks_for_concurrent_read = roundRowsOrBytesToMarks(
+    const size_t min_marks_for_concurrent_read = minMarksForConcurrentRead(
         settings.merge_tree_min_rows_for_concurrent_read,
         settings.merge_tree_min_bytes_for_concurrent_read,
         data_settings->index_granularity,
-        index_granularity_bytes);
+        index_granularity_bytes,
+        sum_marks);
 
     if (sum_marks > max_marks_to_use_cache)
         use_uncompressed_cache = false;
@@ -1376,6 +1377,11 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
 
     std::vector<QueryPlanPtr> partition_plans;
 
+    /// If do_not_merge_across_partitions_select_final is true and num_streams > 1
+    /// we will store lonely parts with level > 0 to use parallel select on them.
+    std::vector<RangesInDataPart> lonely_parts;
+    size_t total_rows_in_lonely_parts = 0;
+
     for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index)
     {
         QueryPlanPtr plan;
@@ -1385,35 +1391,15 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
 
             /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
             /// with level > 0 then we won't postprocess this part and if num_streams > 1 we
-            /// can use parallel select on this part.
+            /// can use parallel select on such parts. We save such parts in one vector and then use
+            /// MergeTreeReadPool and MergeTreeThreadSelectBlockInputProcessor for parallel select.
             if (num_streams > 1 && settings.do_not_merge_across_partitions_select_final &&
                 std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 &&
                 parts_to_merge_ranges[range_index]->data_part->info.level > 0)
             {
-                MergeTreeReadPoolPtr pool = std::make_shared<MergeTreeReadPool>(
-                    num_streams,
-                    sum_marks,
-                    min_marks_for_concurrent_read,
-                    std::vector{*std::move(parts_to_merge_ranges[range_index])},
-                    data,
-                    metadata_snapshot,
-                    query_info.prewhere_info,
-                    true,
-                    column_names,
-                    MergeTreeReadPool::BackoffSettings(settings),
-                    settings.preferred_block_size_bytes,
-                    false);
-
-                for (size_t i = 0; i < num_streams; ++i)
-                {
-                    auto source = std::make_shared<MergeTreeThreadSelectBlockInputProcessor>(
-                        i, pool, min_marks_for_concurrent_read, max_block_size,
-                        settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes,
-                        data, metadata_snapshot, use_uncompressed_cache,
-                        query_info.prewhere_info, reader_settings, virt_columns);
-
-                    pipes.emplace_back(std::move(source));
-                }
+                total_rows_in_lonely_parts += parts_to_merge_ranges[range_index]->getRowsCount();
+                lonely_parts.push_back(std::move(*parts_to_merge_ranges[range_index]));
+                continue;
             }
             else
             {
@@ -1493,6 +1479,57 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
         partition_plans.emplace_back(std::move(plan));
     }
 
+    if (!lonely_parts.empty())
+    {
+        Pipes pipes;
+
+        size_t num_streams_for_lonely_parts = num_streams * lonely_parts.size();
+
+        MergeTreeReadPoolPtr pool = std::make_shared<MergeTreeReadPool>(
+            num_streams_for_lonely_parts,
+            sum_marks,
+            min_marks_for_concurrent_read,
+            std::move(lonely_parts),
+            data,
+            metadata_snapshot,
+            query_info.prewhere_info,
+            true,
+            column_names,
+            MergeTreeReadPool::BackoffSettings(settings),
+            settings.preferred_block_size_bytes,
+            false);
+
+        LOG_TRACE(log, "Reading approx. {} rows with {} streams", total_rows_in_lonely_parts, num_streams_for_lonely_parts);
+
+        for (size_t i = 0; i < num_streams_for_lonely_parts; ++i)
+        {
+            auto source = std::make_shared<MergeTreeThreadSelectBlockInputProcessor>(
+                i, pool, min_marks_for_concurrent_read, max_block_size,
+                settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes,
+                data, metadata_snapshot, use_uncompressed_cache,
+                query_info.prewhere_info, reader_settings, virt_columns);
+
+            pipes.emplace_back(std::move(source));
+        }
+
+        auto pipe = Pipe::unitePipes(std::move(pipes));
+
+        /// Drop temporary columns, added by 'sorting_key_expr'
+        if (!out_projection)
+            out_projection = createProjection(pipe.getHeader());
+
+        QueryPlanPtr plan = createPlanFromPipe(std::move(pipe), "with final");
+
+        auto expression_step = std::make_unique<ExpressionStep>(
+            plan->getCurrentDataStream(),
+            metadata_snapshot->getSortingKey().expression->getActionsDAG().clone());
+
+        expression_step->setStepDescription("Calculate sorting key expression");
+        plan->addStep(std::move(expression_step));
+
+        partition_plans.emplace_back(std::move(plan));
+    }
+
     if (partition_plans.empty())
         return {};
 
diff --git a/tests/performance/optimized_select_final_one_part.xml b/tests/performance/optimized_select_final_one_part.xml
index c7d505c89cb..92c8eed859a 100644
--- a/tests/performance/optimized_select_final_one_part.xml
+++ b/tests/performance/optimized_select_final_one_part.xml
@@ -9,7 +9,7 @@
         PARTITION BY toYYYYMM(t) ORDER BY x
     </create_query>
 
-    <fill_query>INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), number, 'string' FROM numbers(100000000)</fill_query>
+    <fill_query>INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), number, 'string' FROM numbers(50000000)</fill_query>
 
     <fill_query>OPTIMIZE TABLE optimized_select_final FINAL</fill_query>
 
diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
index facdf3dab26..963359440c6 100644
--- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
+++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
@@ -5,3 +5,4 @@
 2000-01-01 00:00:00	2	
 2020-01-01 00:00:00	2	
 1
+16
diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
index c24990b598a..0e0f0325c7b 100644
--- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
+++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
@@ -22,6 +22,7 @@ INSERT INTO select_final SELECT toDate('2020-01-01'), number, '' FROM numbers(2)
 INSERT INTO select_final SELECT toDate('2020-01-01'), number, 'updated' FROM numbers(2);
 
 SELECT max(x) FROM select_final FINAL where string = 'updated' SETTINGS do_not_merge_across_partitions_select_final = 1;
+SELECT arrayUniq(thread_ids) FROM system.query_log ORDER BY event_time LIMIT 1;
 
 DROP TABLE select_final;
 

From 004517009e2117ba41aa1d412237902483505772 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Fri, 29 Jan 2021 18:11:44 +0300
Subject: [PATCH 0381/1238] fix

---
 src/Common/ThreadStatus.cpp       | 4 +++-
 src/Interpreters/Context.cpp      | 6 ------
 src/Interpreters/Context.h        | 2 +-
 src/Interpreters/executeQuery.cpp | 2 ++
 src/Server/GRPCServer.cpp         | 3 ++-
 5 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp
index f2256fbf192..8c01ed2d46f 100644
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@@ -99,8 +99,10 @@ ThreadStatus::~ThreadStatus()
         /// We've already allocated a little bit more than the limit and cannot track it in the thread memory tracker or its parent.
     }
 
+#if !defined(ARCADIA_BUILD)
     /// It may cause segfault if query_context was destroyed, but was not detached
-    assert((!query_context && query_id.empty()) || (query_id == query_context->getCurrentQueryId()));
+    assert((!query_context && query_id.empty()) || (query_context && query_id == query_context->getCurrentQueryId()));
+#endif
 
     if (deleter)
         deleter();
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 69e09b36e64..065c68113e6 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1136,12 +1136,6 @@ String Context::getCurrentDatabase() const
 }
 
 
-String Context::getCurrentQueryId() const
-{
-    return client_info.current_query_id;
-}
-
-
 String Context::getInitialQueryId() const
 {
     return client_info.initial_query_id;
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 8e15d0a4fed..f4716890b68 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -436,7 +436,7 @@ public:
     StoragePtr getViewSource();
 
     String getCurrentDatabase() const;
-    String getCurrentQueryId() const;
+    String getCurrentQueryId() const { return client_info.current_query_id; }
 
     /// Id of initiating query for distributed queries; or current query id if it's not a distributed query.
     String getInitialQueryId() const;
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 770e6e65d24..5297f625186 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -326,8 +326,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 {
     const auto current_time = std::chrono::system_clock::now();
 
+#if !defined(ARCADIA_BUILD)
     assert(internal || CurrentThread::get().getQueryContext());
     assert(internal || CurrentThread::get().getQueryContext()->getCurrentQueryId() == CurrentThread::getQueryId());
+#endif
 
     const Settings & settings = context.getSettingsRef();
 
diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp
index 475bfc81801..1654cf48ba4 100644
--- a/src/Server/GRPCServer.cpp
+++ b/src/Server/GRPCServer.cpp
@@ -652,7 +652,6 @@ namespace
 
         /// Create context.
         query_context.emplace(iserver.context());
-        query_scope.emplace(*query_context);
 
         /// Authentication.
         query_context->setUser(user, password, user_address);
@@ -670,6 +669,8 @@ namespace
             query_context->setSessionContext(session->context);
         }
 
+        query_scope.emplace(*query_context);
+
         /// Set client info.
         ClientInfo & client_info = query_context->getClientInfo();
         client_info.query_kind = ClientInfo::QueryKind::INITIAL_QUERY;

From 8763b8bc59077b6a6f3040906aa7022e3cd0d80e Mon Sep 17 00:00:00 2001
From: Vasily Nemkov <V.Nemkov@gmail.com>
Date: Fri, 29 Jan 2021 17:14:56 +0200
Subject: [PATCH 0382/1238] Updated docs on encrypt/decrypt functions

---
 .../functions/encryption-functions.md         | 292 ++++++++----------
 1 file changed, 135 insertions(+), 157 deletions(-)

diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md
index bef2f8137d0..9e360abfe26 100644
--- a/docs/en/sql-reference/functions/encryption-functions.md
+++ b/docs/en/sql-reference/functions/encryption-functions.md
@@ -11,7 +11,7 @@ Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128
 
 Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored). 
 
-Note that these functions work slowly.
+Note that these functions work slowly until ClickHouse 21.1.
 
 ## encrypt {#encrypt}
 
@@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
 
 **Returned value**
 
--   Ciphered String. [String](../../sql-reference/data-types/string.md#string).
+-   Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
 
 **Examples**
 
@@ -52,57 +52,38 @@ Query:
 ``` sql
 CREATE TABLE encryption_test
 (
-    input String,
-    key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
-    iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
-    key32 String DEFAULT substring(key, 1, 32),
-    key24 String DEFAULT substring(key, 1, 24),
-    key16 String DEFAULT substring(key, 1, 16)
-) Engine = Memory;
+    `comment` String,
+    `secret` String
+)
+ENGINE = Memory
 ```
 
-Insert this data:
+Insert some data (please avoid storing the keys/ivs in the database as this undermines the whole concept of encryption), also storing 'hints' is unsafe too and used only for illustrative purposes:
 
 Query:
 
 ``` sql
-INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
+INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\
+('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
+('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
+('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212'));
 ```
 
-Example without `iv`:
-
 Query:
 
 ``` sql
-SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
+SELECT comment, hex(secret) FROM encryption_test;
 ```
 
 Result:
 
 ``` text
-┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
-│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F                                 │
-│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03                                 │
-│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
-└─────────────┴──────────────────────────────────────────────────────────────────┘
-```
-
-Example with `iv`:
-
-Query:
-
-``` sql
-SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
-```
-
-Result:
-
-``` text
-┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
-│ aes-256-ctr │                                               │
-│ aes-256-ctr │ 7FB039F7                                      │
-│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949        │
-└─────────────┴───────────────────────────────────────────────┘
+┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
+│ aes-256-cfb128 no IV                │ B4972BDC4459                     │
+│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9                     │
+│ aes-256-cfb128 with IV              │ 5E6CB398F653                     │
+│ aes-256-cbc no IV                   │ 1BC0629A92450D9E73A00E7D02CF4142 │
+└─────────────────────────────────────┴──────────────────────────────────┘
 ```
 
 Example with `-gcm`:
@@ -110,40 +91,26 @@ Example with `-gcm`:
 Query:
 
 ``` sql
-SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
+INSERT INTO encryption_test VALUES('aes-256-gcm', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')), \
+('aes-256-gcm with AAD', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv', 'aad'));
+
+SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
 ```
 
 Result:
 
 ``` text
-┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
-│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA                                       │
-│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414                               │
-│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
-└─────────────┴────────────────────────────────────────────────────────────────────────┘
-```
-
-Example with `-gcm` mode and with `aad`:
-
-Query:
-
-``` sql
-SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
-```
-
-Result:
-
-``` text
-┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
-│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB                                       │
-│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447                               │
-│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
-└─────────────┴────────────────────────────────────────────────────────────────────────┘
+┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
+│ aes-256-gcm          │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
+│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
+└──────────────────────┴──────────────────────────────────────────────┘
 ```
 
 ## aes_encrypt_mysql {#aes_encrypt_mysql}
 
-Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
+Compatible with mysql encryption and resulting ciphertext can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
+
+Will produce same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `IV`.
 
 Supported encryption modes:
 
@@ -156,7 +123,7 @@ Supported encryption modes:
 
 **Syntax**
 
-```sql
+``` sql
 aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
 ```
 
@@ -164,78 +131,98 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
 
 -   `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
 -   `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
--   `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
--   `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
+-   `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string).
+-   `iv` — Initialization vector. Optinal, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
 
 **Returned value**
 
--   Ciphered String. [String](../../sql-reference/data-types/string.md#string).
+- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
+
 
 **Examples**
 
-Create this table:
+Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext:
 
 Query:
 
 ``` sql
-CREATE TABLE encryption_test
-(
-    input String,
-    key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
-    iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
-    key32 String DEFAULT substring(key, 1, 32),
-    key24 String DEFAULT substring(key, 1, 24),
-    key16 String DEFAULT substring(key, 1, 16)
-) Engine = Memory;
+SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
 ```
 
-Insert this data:
+Result:
 
-Query:
-
-``` sql
-INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
+```
+┌─ciphertexts_equal─┐
+│                 1 │
+└───────────────────┘
 ```
 
-Example without `iv`:
+
+But `encrypt` fails when `key` or `iv` is longer than expected:
 
 Query:
 
 ``` sql
-SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
+SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
 ```
 
 Result:
 
 ``` text
-┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
-│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83                                 │
-│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A                                 │
-│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
-└─────────────┴──────────────────────────────────────────────────────────────────┘
+Received exception from server (version 21.1.2):
+Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123'). 
 ```
 
-Example with `iv`:
+While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
 
 Query:
 
 ``` sql
-SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
+SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
+```
+
+Result:
+
+```text
+┌─ciphertext───┐
+│ 24E9E4966469 │
+└──────────────┘
+```
+
+Notice how supplying even longer `IV` produces the same result
+
+Query:
+
+``` sql
+SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
 ```
 
 Result:
 
 ``` text
-┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
-│ aes-256-cfb128 │                                                            │
-│ aes-256-cfb128 │ 7FB039F7                                                   │
-│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F                     │
-└────────────────┴────────────────────────────────────────────────────────────┘
+┌─ciphertext───┐
+│ 24E9E4966469 │
+└──────────────┘
+```
+
+Which is binary equal to what MySQL produces on same inputs:
+
+``` sql
+mysql> SET  block_encryption_mode='aes-256-cfb128';
+Query OK, 0 rows affected (0.00 sec)
+
+mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
++------------------------+
+| ciphertext             |
++------------------------+
+| 0x24E9E4966469         |
++------------------------+
+1 row in set (0.00 sec)
 ```
 
 ## decrypt {#decrypt}
 
-This function decrypts data using these modes:
+This function decrypts ciphertext into a plaintext using these modes:
 
 -   aes-128-ecb, aes-192-ecb, aes-256-ecb
 -   aes-128-cbc, aes-192-cbc, aes-256-cbc
@@ -247,7 +234,7 @@ This function decrypts data using these modes:
 
 **Syntax**
 
-```sql
+``` sql
 decrypt('mode', 'ciphertext', 'key' [, iv, aad])
 ```
 
@@ -265,51 +252,56 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
 
 **Examples**
 
-Create this table:
+Re-using table from [encrypt](./encryption-functions.md#encrypt).
 
 Query:
 
 ``` sql
-CREATE TABLE encryption_test
-(
-    input String,
-    key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
-    iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
-    key32 String DEFAULT substring(key, 1, 32),
-    key24 String DEFAULT substring(key, 1, 24),
-    key16 String DEFAULT substring(key, 1, 16)
-) Engine = Memory;
-```
-
-Insert this data:
-
-Query:
-
-``` sql
-INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
-```
-
-Query:
-
-``` sql
-
-SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
+SELECT comment, hex(secret) FROM encryption_test;
 ```
 
 Result:
 
-```text
-┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
-│ aes-128-ecb │                                                                     │
-│ aes-128-ecb │ text                                                                │
-│ aes-128-ecb │ What Is ClickHouse?                                                 │
-└─────────────┴─────────────────────────────────────────────────────────────────────┘
+``` text
+┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
+│ aes-256-gcm          │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
+│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
+└──────────────────────┴──────────────────────────────────────────────┘
+┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
+│ aes-256-cfb128 no IV                │ B4972BDC4459                     │
+│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9                     │
+│ aes-256-cfb128 with IV              │ 5E6CB398F653                     │
+│ aes-256-cbc no IV                   │ 1BC0629A92450D9E73A00E7D02CF4142 │
+└─────────────────────────────────────┴──────────────────────────────────┘
 ```
 
+Now let's try to decrypt all that data.
+
+Query:
+
+``` sql
+SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test
+```
+
+Result:
+``` text
+┌─comment─────────────────────────────┬─plaintext─┐
+│ aes-256-cfb128 no IV                │ Secret    │
+│ aes-256-cfb128 no IV, different key │ �4�
+                                           �         │
+│ aes-256-cfb128 with IV              │ ���6�~        │
+ │aes-256-cbc no IV                   │ �2*4�h3c�4w��@
+└─────────────────────────────────────┴───────────┘
+```
+
+Notice how only portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
+
 ## aes_decrypt_mysql {#aes_decrypt_mysql}
 
 Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
 
+Will produce same plaintext as `decrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_decrypt_mysql` will stick to what MySQL's `aes_decrypt` does: 'fold' `key` and ignore excess bits of `IV`.
+
 Supported decryption modes:
 
 -   aes-128-ecb, aes-192-ecb, aes-256-ecb
@@ -321,7 +313,7 @@ Supported decryption modes:
 
 **Syntax**
 
-```sql
+``` sql
 aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
 ```
 
@@ -338,44 +330,30 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
 
 **Examples**
 
-Create this table:
-
-Query:
-
+Let's decrypt data we've previously encrypted with MySQL:
 ``` sql
-CREATE TABLE encryption_test
-(
-    input String,
-    key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
-    iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
-    key32 String DEFAULT substring(key, 1, 32),
-    key24 String DEFAULT substring(key, 1, 24),
-    key16 String DEFAULT substring(key, 1, 16)
-) Engine = Memory;
-```
+mysql> SET  block_encryption_mode='aes-256-cfb128';
+Query OK, 0 rows affected (0.00 sec)
 
-Insert this data:
-
-Query:
-
-``` sql
-INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
+mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
++------------------------+
+| ciphertext             |
++------------------------+
+| 0x24E9E4966469         |
++------------------------+
+1 row in set (0.00 sec)
 ```
 
 Query:
-
 ``` sql
-SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
+SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext
 ```
 
 Result:
-
 ``` text
-┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
-│ aes-128-cbc │                                                                                     │
-│ aes-128-cbc │ text                                                                                │
-│ aes-128-cbc │ What Is ClickHouse?                                                                 │
-└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
+┌─plaintext─┐
+│ Secret    │
+└───────────┘
 ```
 
 [Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) <!--hide-->

From c373d92a80a0a04d689ded6c3959484eca5c922d Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 29 Jan 2021 18:50:08 +0300
Subject: [PATCH 0383/1238] Less strict check

---
 .../MergeTree/ReplicatedMergeTreePartCheckThread.cpp      | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
index a306547d843..51bd373102f 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
@@ -193,12 +193,8 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible(
         /// Is it in the replication queue? If there is - delete, because the task can not be processed.
         if (!storage.queue.remove(zookeeper, part_name))
         {
-            /// The part was not in our queue. Why did it happen?
-#ifdef NDEBUG
-            LOG_ERROR(log, "Missing part {} is not in our queue.", part_name);
-#else
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing part {} is not in our queue.", part_name);
-#endif
+            /// The part was not in our queue.
+            LOG_WARNING(log, "Missing part {} is not in our queue, this can happen rarely.", part_name);
         }
 
         /** This situation is possible if on all the replicas where the part was, it deteriorated.

From f8ae63995e7e456c11fdd8c7c1808fb4dfeb605f Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Fri, 29 Jan 2021 19:11:50 +0300
Subject: [PATCH 0384/1238] Fix msan warnings

---
 .gitmodules                                  | 2 +-
 base/glibc-compatibility/musl/sched_getcpu.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index ecccf0633e2..519ba082304 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -184,7 +184,7 @@
 	url = https://github.com/ClickHouse-Extras/krb5
 [submodule "contrib/cyrus-sasl"]
 	path = contrib/cyrus-sasl
-	url = https://github.com/cyrusimap/cyrus-sasl
+	url = https://github.com/ClickHouse-Extras/cyrus-sasl
 	branch = cyrus-sasl-2.1
 [submodule "contrib/croaring"]
 	path = contrib/croaring
diff --git a/base/glibc-compatibility/musl/sched_getcpu.c b/base/glibc-compatibility/musl/sched_getcpu.c
index 57b8b416043..f290f01d153 100644
--- a/base/glibc-compatibility/musl/sched_getcpu.c
+++ b/base/glibc-compatibility/musl/sched_getcpu.c
@@ -31,7 +31,7 @@ static void *volatile vdso_func = (void *)getcpu_init;
 int sched_getcpu(void)
 {
 	int r;
-	unsigned cpu;
+	unsigned cpu = 0;
 
 #ifdef VDSO_GETCPU_SYM
 	getcpu_f f = (getcpu_f)vdso_func;

From 5f6a4ad60da2582948b86eb0dfa1485156171cfa Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Wed, 27 Jan 2021 14:26:49 +0300
Subject: [PATCH 0385/1238] Remove --project-directory for docker-compose. Fix
 logs formatting from docker container.

---
 tests/integration/helpers/cluster.py | 77 ++++++++++++----------------
 1 file changed, 32 insertions(+), 45 deletions(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index ee4ea8c94d5..8d68951be71 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -127,7 +127,6 @@ class ClickHouseCluster:
         if custom_dockerd_host:
             self.base_cmd += ['--host', custom_dockerd_host]
 
-        self.base_cmd += ['--project-directory', self.base_dir, '--project-name', self.project_name]
         self.base_zookeeper_cmd = None
         self.base_mysql_cmd = []
         self.base_kafka_cmd = []
@@ -260,25 +259,23 @@ class ClickHouseCluster:
             self.with_zookeeper = True
             self.zookeeper_use_tmpfs = zookeeper_use_tmpfs
             self.base_cmd.extend(['--file', zookeeper_docker_compose_path])
-            self.base_zookeeper_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                       self.project_name, '--file', zookeeper_docker_compose_path]
+            self.base_zookeeper_cmd = ['docker-compose', '--project-name', self.project_name,
+                                       '--file', zookeeper_docker_compose_path]
             cmds.append(self.base_zookeeper_cmd)
 
         if with_mysql and not self.with_mysql:
             self.with_mysql = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')])
-            self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                   self.project_name, '--file',
-                                   p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
+            self.base_mysql_cmd = ['docker-compose', '--project-name', self.project_name,
+                                   '--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
 
             cmds.append(self.base_mysql_cmd)
 
         if with_postgres and not self.with_postgres:
             self.with_postgres = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')])
-            self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                      self.project_name, '--file',
-                                      p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
+            self.base_postgres_cmd = ['docker-compose', '--project-name', self.project_name,
+                                      '--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
             cmds.append(self.base_postgres_cmd)
 
         if with_odbc_drivers and not self.with_odbc_drivers:
@@ -286,64 +283,57 @@ class ClickHouseCluster:
             if not self.with_mysql:
                 self.with_mysql = True
                 self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')])
-                self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                       self.project_name, '--file',
-                                       p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
+                self.base_mysql_cmd = ['docker-compose', '--project-name', self.project_name,
+                                       '--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
                 cmds.append(self.base_mysql_cmd)
 
             if not self.with_postgres:
                 self.with_postgres = True
                 self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')])
-                self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                          self.project_name, '--file',
-                                          p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
+                self.base_postgres_cmd = ['docker-compose', '--project-name', self.project_name,
+                                          '--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
                 cmds.append(self.base_postgres_cmd)
 
         if with_kafka and not self.with_kafka:
             self.with_kafka = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')])
-            self.base_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                   self.project_name, '--file',
-                                   p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')]
+            self.base_kafka_cmd = ['docker-compose', '--project-name', self.project_name,
+                                   '--file', p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')]
             cmds.append(self.base_kafka_cmd)
 
         if with_kerberized_kafka and not self.with_kerberized_kafka:
             self.with_kerberized_kafka = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')])
-            self.base_kerberized_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                   self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')]
+            self.base_kerberized_kafka_cmd = ['docker-compose','--project-name', self.project_name,
+                                              '--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')]
             cmds.append(self.base_kerberized_kafka_cmd)
 
         if with_rabbitmq and not self.with_rabbitmq:
             self.with_rabbitmq = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')])
-            self.base_rabbitmq_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                      self.project_name, '--file',
-                                      p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')]
+            self.base_rabbitmq_cmd = ['docker-compose', '--project-name', self.project_name,
+                                      '--file', p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')]
             cmds.append(self.base_rabbitmq_cmd)
 
         if with_hdfs and not self.with_hdfs:
             self.with_hdfs = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')])
-            self.base_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                  self.project_name, '--file',
-                                  p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')]
+            self.base_hdfs_cmd = ['docker-compose', '--project-name', self.project_name,
+                                  '--file', p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')]
             cmds.append(self.base_hdfs_cmd)
 
         if with_kerberized_hdfs and not self.with_kerberized_hdfs:
             self.with_kerberized_hdfs = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_hdfs.yml')])
-            self.base_kerberized_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                             self.project_name, '--file',
-                                             p.join(docker_compose_yml_dir, 'docker_compose_kerberized_hdfs.yml')]
+            self.base_kerberized_hdfs_cmd = ['docker-compose', '--project-name', self.project_name,
+                                             '--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_hdfs.yml')]
             cmds.append(self.base_kerberized_hdfs_cmd)
 
         if with_mongo and not self.with_mongo:
             self.with_mongo = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')])
-            self.base_mongo_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                   self.project_name, '--file',
-                                   p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')]
+            self.base_mongo_cmd = ['docker-compose', '--project-name', self.project_name,
+                                   '--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')]
             cmds.append(self.base_mongo_cmd)
 
         if self.with_net_trics:
@@ -353,25 +343,22 @@ class ClickHouseCluster:
         if with_redis and not self.with_redis:
             self.with_redis = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')])
-            self.base_redis_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                   self.project_name, '--file',
-                                   p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')]
+            self.base_redis_cmd = ['docker-compose', '--project-name', self.project_name,
+                                   '--file', p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')]
 
         if with_minio and not self.with_minio:
             self.with_minio = True
             self.minio_certs_dir = minio_certs_dir
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')])
-            self.base_minio_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                   self.project_name, '--file',
-                                   p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')]
+            self.base_minio_cmd = ['docker-compose', '--project-name', self.project_name,
+                                   '--file', p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')]
             cmds.append(self.base_minio_cmd)
 
         if with_cassandra and not self.with_cassandra:
             self.with_cassandra = True
             self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')])
-            self.base_cassandra_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
-                                       self.project_name, '--file',
-                                       p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')]
+            self.base_cassandra_cmd = ['docker-compose', '--project-name', self.project_name,
+                                       '--file', p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')]
 
         return instance
 
@@ -936,7 +923,7 @@ class ClickHouseInstance:
         self.with_cassandra = with_cassandra
 
         self.path = p.join(self.cluster.instances_dir, name)
-        self.docker_compose_path = p.join(self.path, 'docker_compose.yml')
+        self.docker_compose_path = p.join(self.path, 'docker-compose.yml')
         self.env_variables = env_variables or {}
         if with_odbc_drivers:
             self.odbc_ini_path = self.path + "/odbc.ini:/etc/odbc.ini"
@@ -1162,15 +1149,15 @@ class ClickHouseInstance:
             status = handle.status
             if status == 'exited':
                 raise Exception(
-                    "Instance `{}' failed to start. Container status: {}, logs: {}".format(self.name, status,
-                                                                                           handle.logs()))
+                    "Instance `{}' failed to start. Container status: {}, logs: {!s}".format(self.name, status,
+                                                                                           handle.logs().decode('utf-8')))
 
             current_time = time.time()
             time_left = deadline - current_time
             if deadline is not None and current_time >= deadline:
                 raise Exception("Timed out while waiting for instance `{}' with ip address {} to start. "
                                 "Container status: {}, logs: {}".format(self.name, self.ip_address, status,
-                                                                        handle.logs()))
+                                                                        handle.logs().decode('utf-8')))
 
             # Repeatedly poll the instance address until there is something that listens there.
             # Usually it means that ClickHouse is ready to accept queries.

From 01c9b90144a10de8bcfa01f031cc0dd806ea9df4 Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Wed, 27 Jan 2021 19:21:54 +0300
Subject: [PATCH 0386/1238] fix project name for clickhouse instance

---
 tests/integration/helpers/cluster.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 8d68951be71..64f7e5dd889 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -113,9 +113,9 @@ class ClickHouseCluster:
         self.zookeeper_config_path = p.join(self.base_dir, zookeeper_config_path) if zookeeper_config_path else p.join(
             HELPERS_DIR, 'zookeeper_config.xml')
 
-        self.project_name = pwd.getpwuid(os.getuid()).pw_name + p.basename(self.base_dir) + self.name
+        project_name = pwd.getpwuid(os.getuid()).pw_name + p.basename(self.base_dir) + self.name
         # docker-compose removes everything non-alphanumeric from project names so we do it too.
-        self.project_name = re.sub(r'[^a-z0-9]', '', self.project_name.lower())
+        self.project_name = re.sub(r'[^a-z0-9]', '', project_name.lower())
         self.instances_dir = p.join(self.base_dir, '_instances' + ('' if not self.name else '_' + self.name))
         self.docker_logs_path = p.join(self.instances_dir, 'docker.log')
 
@@ -126,6 +126,7 @@ class ClickHouseCluster:
         self.base_cmd = ['docker-compose']
         if custom_dockerd_host:
             self.base_cmd += ['--host', custom_dockerd_host]
+        self.base_cmd += ['--project-name', self.project_name]
 
         self.base_zookeeper_cmd = None
         self.base_mysql_cmd = []
@@ -360,6 +361,8 @@ class ClickHouseCluster:
             self.base_cassandra_cmd = ['docker-compose', '--project-name', self.project_name,
                                        '--file', p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')]
 
+        print("Cluster name:{} project_name:{}. Added instance name:{} tag:{} base_cmd:{} docker_compose_yml_dir:{}".format(
+            self.name, self.project_name, name, tag, self.base_cmd, docker_compose_yml_dir))
         return instance
 
     def get_instance_docker_id(self, instance_name):
@@ -394,7 +397,10 @@ class ClickHouseCluster:
         return node
 
     def get_instance_ip(self, instance_name):
+        print("get_instance_ip instance_name={}".format(instance_name))
         docker_id = self.get_instance_docker_id(instance_name)
+        # for cont in self.docker_client.containers.list():
+        #     print("CONTAINERS LIST: ID={} NAME={} STATUS={}".format(cont.id, cont.name, cont.status))
         handle = self.docker_client.containers.get(docker_id)
         return list(handle.attrs['NetworkSettings']['Networks'].values())[0]['IPAddress']
 
@@ -1149,7 +1155,7 @@ class ClickHouseInstance:
             status = handle.status
             if status == 'exited':
                 raise Exception(
-                    "Instance `{}' failed to start. Container status: {}, logs: {!s}".format(self.name, status,
+                    "Instance `{}' failed to start. Container status: {}, logs: {}".format(self.name, status,
                                                                                            handle.logs().decode('utf-8')))
 
             current_time = time.time()

From ac168fe4e309d7af4a219c24970380dba233c90a Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Fri, 29 Jan 2021 19:09:48 +0300
Subject: [PATCH 0387/1238] add sleep

---
 tests/integration/test_storage_kerberized_kafka/test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py
index 59fb043b546..865afc8b162 100644
--- a/tests/integration/test_storage_kerberized_kafka/test.py
+++ b/tests/integration/test_storage_kerberized_kafka/test.py
@@ -105,6 +105,8 @@ def test_kafka_json_as_string(kafka_cluster):
                      kafka_flush_interval_ms=1000;
         ''')
 
+    time.sleep(3)
+
     result = instance.query('SELECT * FROM test.kafka;')
     expected = '''\
 {"t": 123, "e": {"x": "woof"} }

From c07fb8a6fee447534e866b8e407489735f71c448 Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Fri, 29 Jan 2021 19:11:50 +0300
Subject: [PATCH 0388/1238] Fix msan warnings

---
 .gitmodules                                  | 2 +-
 base/glibc-compatibility/musl/sched_getcpu.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index ecccf0633e2..519ba082304 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -184,7 +184,7 @@
 	url = https://github.com/ClickHouse-Extras/krb5
 [submodule "contrib/cyrus-sasl"]
 	path = contrib/cyrus-sasl
-	url = https://github.com/cyrusimap/cyrus-sasl
+	url = https://github.com/ClickHouse-Extras/cyrus-sasl
 	branch = cyrus-sasl-2.1
 [submodule "contrib/croaring"]
 	path = contrib/croaring
diff --git a/base/glibc-compatibility/musl/sched_getcpu.c b/base/glibc-compatibility/musl/sched_getcpu.c
index 57b8b416043..f290f01d153 100644
--- a/base/glibc-compatibility/musl/sched_getcpu.c
+++ b/base/glibc-compatibility/musl/sched_getcpu.c
@@ -31,7 +31,7 @@ static void *volatile vdso_func = (void *)getcpu_init;
 int sched_getcpu(void)
 {
 	int r;
-	unsigned cpu;
+	unsigned cpu = 0;
 
 #ifdef VDSO_GETCPU_SYM
 	getcpu_f f = (getcpu_f)vdso_func;

From ab8db8a2398ca9153c917ac0d2b7d27c644cd16c Mon Sep 17 00:00:00 2001
From: Mikhail Filimonov <mfilimonov@altinity.com>
Date: Fri, 29 Jan 2021 17:57:52 +0100
Subject: [PATCH 0389/1238] Try to make test_dir.tar smaller

---
 .../test_limited_replicated_fetches/test.py   |  3 ++
 tests/integration/test_multiple_disks/test.py | 29 ++++++++++++++++++-
 .../test_system_replicated_fetches/test.py    |  3 ++
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_limited_replicated_fetches/test.py b/tests/integration/test_limited_replicated_fetches/test.py
index 2091c65857e..9b9b8befd67 100644
--- a/tests/integration/test_limited_replicated_fetches/test.py
+++ b/tests/integration/test_limited_replicated_fetches/test.py
@@ -69,3 +69,6 @@ def test_limited_fetches(started_cluster):
 
     assert max([len(parts) for parts in fetches_result]) == 3, "Strange, but we don't utilize max concurrent threads for fetches"
     assert(max(background_fetches_metric)) == 3, "Just checking metric consistent with table"
+
+    node1.query("DROP TABLE IF EXISTS t SYNC")
+    node2.query("DROP TABLE IF EXISTS t SYNC")
\ No newline at end of file
diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py
index 24ee6c0493b..6877a9fc357 100644
--- a/tests/integration/test_multiple_disks/test.py
+++ b/tests/integration/test_multiple_disks/test.py
@@ -287,6 +287,8 @@ def test_query_parser(start_cluster):
                 "ALTER TABLE table_with_normal_policy MODIFY SETTING storage_policy='moving_jbod_with_external'")
     finally:
         node1.query("DROP TABLE IF EXISTS table_with_normal_policy SYNC")
+        node1.query("DROP TABLE IF EXISTS table_with_absent_policy SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -328,6 +330,7 @@ def test_alter_policy(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def get_random_string(length):
@@ -397,6 +400,7 @@ def test_round_robin(start_cluster, name, engine):
         assert used_disks[2] == used_disks[0]
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -424,6 +428,7 @@ def test_max_data_part_size(start_cluster, name, engine):
         assert used_disks[0] == 'external'
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -477,6 +482,8 @@ def test_jbod_overflow(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node.query("DROP TABLE IF EXISTS system.part_log SYNC")
+
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -527,6 +534,7 @@ def test_background_move(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -614,6 +622,7 @@ def test_start_stop_moves(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def get_path_for_part_from_part_log(node, table, part_name):
@@ -702,7 +711,7 @@ def test_alter_move(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 @pytest.mark.parametrize("volume_or_disk", [
     "DISK",
@@ -751,6 +760,7 @@ def test_alter_move_half_of_partition(start_cluster, volume_or_disk):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("volume_or_disk", [
@@ -795,6 +805,7 @@ def test_alter_double_move_partition(start_cluster, volume_or_disk):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def produce_alter_move(node, name):
@@ -879,6 +890,7 @@ def test_concurrent_alter_move(start_cluster, name, engine):
         assert node1.query("SELECT COUNT() FROM {}".format(name)) == "500\n"
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -932,6 +944,7 @@ def test_concurrent_alter_move_and_drop(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -963,6 +976,7 @@ def test_detach_attach(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -1009,6 +1023,7 @@ def test_mutate_to_another_disk(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -1067,6 +1082,7 @@ def test_concurrent_alter_modify(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_simple_replication_and_moves(start_cluster):
@@ -1134,6 +1150,7 @@ def test_simple_replication_and_moves(start_cluster):
     finally:
         for node in [node1, node2]:
             node.query("DROP TABLE IF EXISTS replicated_table_for_moves SYNC")
+            node.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_download_appropriate_disk(start_cluster):
@@ -1168,6 +1185,7 @@ def test_download_appropriate_disk(start_cluster):
     finally:
         for node in [node1, node2]:
             node.query("DROP TABLE IF EXISTS replicated_table_for_download SYNC")
+            node.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_rename(start_cluster):
@@ -1207,6 +1225,7 @@ def test_rename(start_cluster):
         node1.query("DROP TABLE IF EXISTS default.renaming_table SYNC")
         node1.query("DROP TABLE IF EXISTS default.renaming_table1 SYNC")
         node1.query("DROP TABLE IF EXISTS test.renaming_table2 SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_freeze(start_cluster):
@@ -1242,6 +1261,7 @@ def test_freeze(start_cluster):
     finally:
         node1.query("DROP TABLE IF EXISTS default.freezing_table SYNC")
         node1.exec_in_container(["rm", "-rf", "/jbod1/shadow", "/external/shadow"])
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_kill_while_insert(start_cluster):
@@ -1285,6 +1305,7 @@ def test_kill_while_insert(start_cluster):
     finally:
         try:
             node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+            node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
         except:
             """ClickHouse may be inactive at this moment and we don't want to mask a meaningful exception."""
 
@@ -1346,6 +1367,7 @@ def test_move_while_merge(start_cluster):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_move_across_policies_does_not_work(start_cluster):
@@ -1388,6 +1410,7 @@ def test_move_across_policies_does_not_work(start_cluster):
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
         node1.query(f"DROP TABLE IF EXISTS {name}2 SYNC")
+        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def _insert_merge_execute(node, name, policy, parts, cmds, parts_before_cmds, parts_after_cmds):
@@ -1487,6 +1510,7 @@ def test_no_merges_in_configuration_allow_from_query_without_reload(start_cluste
 
     finally:
         node1.query("SYSTEM STOP MERGES ON VOLUME {}.external".format(policy))
+        node.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_no_merges_in_configuration_allow_from_query_with_reload(start_cluster):
@@ -1506,6 +1530,7 @@ def test_no_merges_in_configuration_allow_from_query_with_reload(start_cluster):
 
     finally:
         node1.query("SYSTEM STOP MERGES ON VOLUME {}.external".format(policy))
+        node.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_yes_merges_in_configuration_disallow_from_query_without_reload(start_cluster):
@@ -1525,6 +1550,7 @@ def test_yes_merges_in_configuration_disallow_from_query_without_reload(start_cl
 
     finally:
         node1.query("SYSTEM START MERGES ON VOLUME {}.external".format(policy))
+        node.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_yes_merges_in_configuration_disallow_from_query_with_reload(start_cluster):
@@ -1545,3 +1571,4 @@ def test_yes_merges_in_configuration_disallow_from_query_with_reload(start_clust
 
     finally:
         node1.query("SYSTEM START MERGES ON VOLUME {}.external".format(policy))
+        node.query("DROP TABLE IF EXISTS system.part_log SYNC")
diff --git a/tests/integration/test_system_replicated_fetches/test.py b/tests/integration/test_system_replicated_fetches/test.py
index cefb3256893..fcbdd4addd9 100644
--- a/tests/integration/test_system_replicated_fetches/test.py
+++ b/tests/integration/test_system_replicated_fetches/test.py
@@ -91,3 +91,6 @@ def test_system_replicated_fetches(started_cluster):
     for elem in fetches_result:
         assert elem['elapsed'] >= prev_elapsed, "Elapsed time decreasing prev {}, next {}? It's a bug".format(prev_elapsed, elem['elapsed'])
         prev_elapsed = elem['elapsed']
+
+    node1.query("DROP TABLE IF EXISTS t SYNC")
+    node2.query("DROP TABLE IF EXISTS t SYNC")

From f4236fd7656ab3ee742ae54a022799de3ca05965 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 29 Jan 2021 20:12:53 +0300
Subject: [PATCH 0390/1238] Fix style

---
 src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
index 51bd373102f..b2a144ca748 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp
@@ -18,7 +18,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int TABLE_DIFFERS_TOO_MUCH;
-    extern const int LOGICAL_ERROR;
 }
 
 static const auto PART_CHECK_ERROR_SLEEP_MS = 5 * 1000;

From 78371e15dcc9b86f2fdf4b30637393dd22863be8 Mon Sep 17 00:00:00 2001
From: Pavel Kruglov <avogar@sandbox-633380738>
Date: Fri, 29 Jan 2021 21:00:08 +0300
Subject: [PATCH 0391/1238] Update test, reduce num_threads_for_lonely_parts if
 data is small

---
 .../MergeTree/MergeTreeDataSelectExecutor.cpp | 23 ++++++++++++-------
 ...t_merge_across_partitions_select_final.sql | 20 ++++++++++++----
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index cf8799052df..8b1e5989de5 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -1335,13 +1335,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
         data_settings->index_granularity,
         index_granularity_bytes);
 
-    const size_t min_marks_for_concurrent_read = minMarksForConcurrentRead(
-        settings.merge_tree_min_rows_for_concurrent_read,
-        settings.merge_tree_min_bytes_for_concurrent_read,
-        data_settings->index_granularity,
-        index_granularity_bytes,
-        sum_marks);
-
     if (sum_marks > max_marks_to_use_cache)
         use_uncompressed_cache = false;
 
@@ -1381,6 +1374,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
     /// we will store lonely parts with level > 0 to use parallel select on them.
     std::vector<RangesInDataPart> lonely_parts;
     size_t total_rows_in_lonely_parts = 0;
+    size_t sum_marks_in_lonely_parts = 0;
 
     for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index)
     {
@@ -1398,6 +1392,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
                 parts_to_merge_ranges[range_index]->data_part->info.level > 0)
             {
                 total_rows_in_lonely_parts += parts_to_merge_ranges[range_index]->getRowsCount();
+                sum_marks_in_lonely_parts += parts_to_merge_ranges[range_index]->getMarksCount();
                 lonely_parts.push_back(std::move(*parts_to_merge_ranges[range_index]));
                 continue;
             }
@@ -1485,9 +1480,21 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
 
         size_t num_streams_for_lonely_parts = num_streams * lonely_parts.size();
 
+        const size_t min_marks_for_concurrent_read = minMarksForConcurrentRead(
+            settings.merge_tree_min_rows_for_concurrent_read,
+            settings.merge_tree_min_bytes_for_concurrent_read,
+            data_settings->index_granularity,
+            index_granularity_bytes,
+            sum_marks_in_lonely_parts);
+
+        /// Reduce the number of num_streams_for_lonely_parts if the data is small.
+        if (sum_marks_in_lonely_parts < num_streams_for_lonely_parts * min_marks_for_concurrent_read && lonely_parts.size() < num_streams_for_lonely_parts)
+            num_streams_for_lonely_parts = std::max((sum_marks_in_lonely_parts + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, lonely_parts.size());
+
+
         MergeTreeReadPoolPtr pool = std::make_shared<MergeTreeReadPool>(
             num_streams_for_lonely_parts,
-            sum_marks,
+            sum_marks_in_lonely_parts,
             min_marks_for_concurrent_read,
             std::move(lonely_parts),
             data,
diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
index 0e0f0325c7b..b50e47daa0c 100644
--- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
+++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
@@ -1,5 +1,7 @@
 DROP TABLE IF EXISTS select_final;
 
+SET do_not_merge_across_partitions_select_final = 1;
+
 CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); 
 
 INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(2);
@@ -9,7 +11,7 @@ INSERT INTO select_final SELECT toDate('2020-01-01'), number, '' FROM numbers(2)
 INSERT INTO select_final SELECT toDate('2020-01-01'), number + 1, '' FROM numbers(2);
 
 
-SELECT * FROM select_final FINAL ORDER BY x SETTINGS do_not_merge_across_partitions_select_final = 1;
+SELECT * FROM select_final FINAL ORDER BY x;
 
 TRUNCATE TABLE select_final;
 
@@ -21,8 +23,18 @@ OPTIMIZE TABLE select_final FINAL;
 INSERT INTO select_final SELECT toDate('2020-01-01'), number, '' FROM numbers(2);
 INSERT INTO select_final SELECT toDate('2020-01-01'), number, 'updated' FROM numbers(2);
 
-SELECT max(x) FROM select_final FINAL where string = 'updated' SETTINGS do_not_merge_across_partitions_select_final = 1;
-SELECT arrayUniq(thread_ids) FROM system.query_log ORDER BY event_time LIMIT 1;
+SELECT max(x) FROM select_final FINAL where string = 'updated';
+
+TRUNCATE TABLE select_final;
+
+INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(500000);
+
+OPTIMIZE TABLE select_final FINAL;
+
+SELECT max(x) FROM select_final FINAL;
+
+SYSTEM FLUSH LOGS;
+
+SELECT length(thread_ids) > 1 FROM system.query_log WHERE query='SELECT max(x) FROM select_final FINAL;' AND type='QueryFinish' ORDER BY event_time DESC LIMIT 1;
 
 DROP TABLE select_final;
-

From 89c4055202b9d08459f90ee5791d4e3017b82fbf Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 29 Jan 2021 21:37:57 +0300
Subject: [PATCH 0392/1238] Lock MEMORY_LIMIT_EXCEEDED error from
 tryLogCurrentException()

This will avoid hiding some exceptions in logs, when the server is under
high memory pressure (i.e. when any new allocation will lead to
MEMORY_LIMIT_EXCEEDED error).

This became more relevent after all memory allocations was tracked with
MemoryTracker, by falling back to total_memory_tracking, in #16121
---
 src/Common/Exception.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp
index 231b45a49c6..f5a40a11d9c 100644
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@@ -119,6 +119,13 @@ void tryLogCurrentException(const char * log_name, const std::string & start_of_
 
 void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message)
 {
+    /// Under high memory pressure, any new allocation will definitelly lead
+    /// to MEMORY_LIMIT_EXCEEDED exception.
+    ///
+    /// And in this case the exception will not be logged, so let's block the
+    /// MemoryTracker until the exception will be logged.
+    MemoryTracker::LockExceptionInThread lock_memory_tracker;
+
     try
     {
         if (start_of_message.empty())

From 1fe003b85e6af635fd3359c16c77355ad1c71803 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 29 Jan 2021 22:09:34 +0300
Subject: [PATCH 0393/1238] Make system.asynchronous_metrics available just
 after start

---
 src/Interpreters/AsynchronousMetrics.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h
index 88c2221be76..f727b1d6b48 100644
--- a/src/Interpreters/AsynchronousMetrics.h
+++ b/src/Interpreters/AsynchronousMetrics.h
@@ -58,6 +58,9 @@ public:
     /// Separate method allows to initialize the `servers` variable beforehand.
     void start()
     {
+        /// Update once right now, to make metrics available just after server start
+        /// (without waiting for asynchronous_metrics_update_period_s).
+        update();
         thread = std::make_unique<ThreadFromGlobalPool>([this] { run(); });
     }
 

From 3001608f2c64db055948355696f8efa7e1f7b540 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 29 Jan 2021 22:22:38 +0300
Subject: [PATCH 0394/1238] check-style: fix typo (s/pytest/pylint) in docker
 image

---
 docker/test/style/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile
index 7047007d2fc..f1f40fa3090 100644
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@@ -1,7 +1,7 @@
 # docker build -t yandex/clickhouse-style-test .
 FROM ubuntu:20.04
 
-RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip python3-pytest && pip3 install codespell
+RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip pylint && pip3 install codespell
 
 
 CMD cd /ClickHouse/utils/check-style && \

From 2430551f1c1d2558aac4af405141694825d00057 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 29 Jan 2021 22:30:13 +0300
Subject: [PATCH 0395/1238] style-check: count stderr as an error too

Since there were precedents
---
 docker/test/style/Dockerfile | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile
index f1f40fa3090..74af8eafc17 100644
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@@ -4,9 +4,13 @@ FROM ubuntu:20.04
 RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip pylint && pip3 install codespell
 
 
+# For |& syntax
+SHELL ["bash", "-c"]
+
 CMD cd /ClickHouse/utils/check-style && \
-    ./check-style -n | tee /test_output/style_output.txt && \
-    ./check-typos | tee /test_output/typos_output.txt && \
-    ./check-whitespaces -n | tee /test_output/whitespaces_output.txt && \
-    ./check-duplicate-includes.sh | tee /test_output/duplicate_output.txt && \
-    ./shellcheck-run.sh | tee /test_output/shellcheck_output.txt
+    ./check-style -n              |& tee /test_output/style_output.txt && \
+    ./check-typos                 |& tee /test_output/typos_output.txt && \
+    ./check-whitespaces -n        |& tee /test_output/whitespaces_output.txt && \
+    ./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt && \
+    ./shellcheck-run.sh           |& tee /test_output/shellcheck_output.txt && \
+    true

From 480c75bacf67820b9e8aa07a547bfb37d3a0efd1 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 29 Jan 2021 23:11:53 +0300
Subject: [PATCH 0396/1238] check-style: pass path to rc file for pylint

---
 utils/check-style/check-style | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index 9e2b5fc6fef..8f0901e84b6 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -70,7 +70,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' |
     xargs xmllint --noout --nonet
 
 # FIXME: for now only clickhouse-test
-pylint --score=n $ROOT_PATH/tests/clickhouse-test
+pylint --rcfile=$ROOT_PATH/.pylintrc --score=n $ROOT_PATH/tests/clickhouse-test
 
 # Machine translation to Russian is strictly prohibited
 find $ROOT_PATH/docs/ru -name '*.md' |

From d3fe53a44fa4bc181bd853b63814dfff8b1841bc Mon Sep 17 00:00:00 2001
From: Pavel Kruglov <avogar@sandbox-633380738>
Date: Fri, 29 Jan 2021 23:15:26 +0300
Subject: [PATCH 0397/1238] Update reference

---
 ...01524_do_not_merge_across_partitions_select_final.reference | 3 ++-
 .../01524_do_not_merge_across_partitions_select_final.sql      | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
index 963359440c6..87eb40c57b0 100644
--- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
+++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
@@ -5,4 +5,5 @@
 2000-01-01 00:00:00	2	
 2020-01-01 00:00:00	2	
 1
-16
+499999
+1
diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
index b50e47daa0c..5d20330014a 100644
--- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
+++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
@@ -35,6 +35,6 @@ SELECT max(x) FROM select_final FINAL;
 
 SYSTEM FLUSH LOGS;
 
-SELECT length(thread_ids) > 1 FROM system.query_log WHERE query='SELECT max(x) FROM select_final FINAL;' AND type='QueryFinish' ORDER BY event_time DESC LIMIT 1;
+SELECT length(thread_ids) > 1 FROM system.query_log WHERE query='SELECT max(x) FROM select_final FINAL;' AND type='QueryFinish' AND current_database = currentDatabase() ORDER BY event_time DESC LIMIT 1;
 
 DROP TABLE select_final;

From 7d9c892e60580f94f8cde37277d8c793df023469 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 29 Jan 2021 23:20:57 +0300
Subject: [PATCH 0398/1238] check-style: fix "fatal: not a git repository:
 /place/sandbox-data/tasks/0/2/882869720/ClickHouse/.git/modules/contrib/AMQP-CPP"
 error

check-style uses "git status" to see changed ya.make files.

However it seems that that the sources was cloned some ancient git
version, likely <2.8, since there was a bug, that has been fixed in 2.8
[1]:

    " * A partial rewrite of "git submodule" in the 2.7 timeframe changed
        the way the gitdir: pointer in the submodules point at the real
        repository location to use absolute paths by accident.  This has
        been corrected."

  [1]: https://github.com/git/git/blob/cf11a67975b057a144618badf16dc4e3d25b9407/Documentation/RelNotes/2.8.3.txt#L33-L36
---
 utils/check-style/check-style | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index 8f0901e84b6..f62c7ca5849 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -107,7 +107,23 @@ find $ROOT_PATH -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*'
 
 # Check that ya.make files are auto-generated
 "$ROOT_PATH"/utils/generate-ya-make/generate-ya-make.sh
-git status -uno | grep ya.make && echo "ya.make files should be generated with utils/generate-ya-make/generate-ya-make.sh"
+# FIXME: apparently sandbox (don't confuse it with docker) cloning sources
+# using some ancient git version, <2.8, that contains one bug for submodules
+# initialization [1]:
+#
+#    " * A partial rewrite of "git submodule" in the 2.7 timeframe changed
+#        the way the gitdir: pointer in the submodules point at the real
+#        repository location to use absolute paths by accident.  This has
+#        been corrected."
+#
+#  [1]: https://github.com/git/git/blob/cf11a67975b057a144618badf16dc4e3d25b9407/Documentation/RelNotes/2.8.3.txt#L33-L36
+#
+# Due to which "git status" will report the following error:
+#
+#     fatal: not a git repository: /place/sandbox-data/tasks/0/2/882869720/ClickHouse/.git/modules/contrib/AMQP-CPP
+#
+# Anyway this check does not requires any submodule traverse, so it is fine to ignore those errors.
+git status -uno 2> >(grep "fatal: not a git repository: /place/sandbox-data/tasks/.*/ClickHouse/\\.git/modules/contrib") | grep ya.make && echo "ya.make files should be generated with utils/generate-ya-make/generate-ya-make.sh"
 
 # Check that every header file has #pragma once in first line
 find $ROOT_PATH/{src,programs,utils} -name '*.h' |

From 2a3aaed562281ba40cb7290fb15aa12c4368f664 Mon Sep 17 00:00:00 2001
From: Pavel Kruglov <avogar@sandbox-633380738>
Date: Sat, 30 Jan 2021 00:41:44 +0300
Subject: [PATCH 0399/1238] Update test

---
 .../01524_do_not_merge_across_partitions_select_final.reference | 2 +-
 .../01524_do_not_merge_across_partitions_select_final.sql       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
index 87eb40c57b0..a3f2106cd5f 100644
--- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
+++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference
@@ -6,4 +6,4 @@
 2020-01-01 00:00:00	2	
 1
 499999
-1
+5
diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
index 5d20330014a..25c47c008bd 100644
--- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
+++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql
@@ -35,6 +35,6 @@ SELECT max(x) FROM select_final FINAL;
 
 SYSTEM FLUSH LOGS;
 
-SELECT length(thread_ids) > 1 FROM system.query_log WHERE query='SELECT max(x) FROM select_final FINAL;' AND type='QueryFinish' AND current_database = currentDatabase() ORDER BY event_time DESC LIMIT 1;
+SELECT length(thread_ids) FROM system.query_log WHERE query='SELECT max(x) FROM select_final FINAL;' AND type='QueryFinish' AND current_database = currentDatabase() ORDER BY event_time DESC LIMIT 1;
 
 DROP TABLE select_final;

From c53c414b99043ef41c7a08eb39e0e333b6b0c856 Mon Sep 17 00:00:00 2001
From: Pavel Kruglov <avogar@sandbox-633380738>
Date: Sat, 30 Jan 2021 01:20:05 +0300
Subject: [PATCH 0400/1238] Add test for incorrect data on insert into
 CollapsingMegeTree

---
 ...9_incorrect_data_on_insert_collapsing.reference |  1 +
 .../01679_incorrect_data_on_insert_collapsing.sh   | 14 ++++++++++++++
 2 files changed, 15 insertions(+)
 create mode 100644 tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.reference
 create mode 100755 tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.sh

diff --git a/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.reference b/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.reference
new file mode 100644
index 00000000000..d86bac9de59
--- /dev/null
+++ b/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.reference
@@ -0,0 +1 @@
+OK
diff --git a/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.sh b/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.sh
new file mode 100755
index 00000000000..e8d89c2e45a
--- /dev/null
+++ b/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS collapsing_merge_tree"
+
+${CLICKHOUSE_CLIENT} --query "CREATE TABLE collapsing_merge_tree (key UInt32, sign Int8, date Datetime) ENGINE=CollapsingMergeTree(sign) PARTITION BY date ORDER BY key"
+
+${CLICKHOUSE_CLIENT} --query "INSERT INTO collapsing_merge_tree VALUES (1, -117, '2020-01-01')" 2>&1 | grep -q 'Incorrect data: Sign = -117' && echo 'OK' || echo 'FAIL'; 
+
+${CLICKHOUSE_CLIENT} --query "DROP TABLE collapsing_merge_tree;"
+

From a9d9a8f3a98b1803c41e3643364b90d4b13c6d41 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sat, 30 Jan 2021 01:21:51 +0300
Subject: [PATCH 0401/1238] Update Suggest.cpp

---
 programs/client/Suggest.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp
index 297b371d987..dfa7048349e 100644
--- a/programs/client/Suggest.cpp
+++ b/programs/client/Suggest.cpp
@@ -87,10 +87,8 @@ Suggest::Suggest()
 
 void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit)
 {
-    ///
     /// NOTE: Once you will update the completion list,
     /// do not forget to update 01676_clickhouse_client_autocomplete.sh
-    ///
 
     std::stringstream query;        // STYLE_CHECK_ALLOW_STD_STRING_STREAM
     query << "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM ("

From 8545ce11115f2fd2a311aa185ad37c6533bd11df Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Fri, 29 Jan 2021 17:55:08 -0500
Subject: [PATCH 0402/1238] Adding support for retrying docker-compose start,
 stop and restart commands. Fixing ldap tests to use large tail value to
 accomodate changes in exception length.

---
 tests/testflows/helpers/cluster.py            | 43 +++++++++++++------
 .../ldap/authentication/tests/common.py       |  4 +-
 .../authentication/tests/server_config.py     |  2 +-
 .../ldap/authentication/tests/user_config.py  |  4 +-
 .../external_user_directory/tests/common.py   |  2 +-
 .../tests/server_config.py                    | 10 ++---
 .../ldap/role_mapping/tests/server_config.py  |  4 +-
 7 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/tests/testflows/helpers/cluster.py b/tests/testflows/helpers/cluster.py
index 3be79132ec3..5e8717e7a8e 100755
--- a/tests/testflows/helpers/cluster.py
+++ b/tests/testflows/helpers/cluster.py
@@ -26,7 +26,7 @@ class Node(object):
     def repr(self):
         return f"Node(name='{self.name}')"
 
-    def restart(self, timeout=300, safe=True):
+    def restart(self, timeout=300, retries=5):
         """Restart node.
         """
         with self.cluster.lock:
@@ -35,15 +35,20 @@ class Node(object):
                     shell = self.cluster._bash.pop(key)
                     shell.__exit__(None, None, None)
 
-        self.cluster.command(None, f'{self.cluster.docker_compose} restart {self.name}', timeout=timeout)
+        for retry in range(retries):
+            r = self.cluster.command(None, f'{self.cluster.docker_compose} restart {self.name}', timeout=timeout)
+            if r.exitcode == 0:
+                break
 
-    def start(self, timeout=300, safe=True):
+    def start(self, timeout=300, retries=5):
         """Start node.
         """
-        self.cluster.command(None, f'{self.cluster.docker_compose} start {self.name}', timeout=timeout)
+        for retry in range(retries):
+            r = self.cluster.command(None, f'{self.cluster.docker_compose} start {self.name}', timeout=timeout)
+            if r.exitcode == 0:
+                break
 
-
-    def stop(self, timeout=300, safe=True):
+    def stop(self, timeout=300, retries=5):
         """Stop node.
         """
         with self.cluster.lock:
@@ -52,7 +57,10 @@ class Node(object):
                     shell = self.cluster._bash.pop(key)
                     shell.__exit__(None, None, None)
 
-        self.cluster.command(None, f'{self.cluster.docker_compose} stop {self.name}', timeout=timeout)
+        for retry in range(retries):
+            r = self.cluster.command(None, f'{self.cluster.docker_compose} stop {self.name}', timeout=timeout)
+            if r.exitcode == 0:
+                break
 
     def command(self, *args, **kwargs):
         return self.cluster.command(self.name, *args, **kwargs)
@@ -71,7 +79,7 @@ class ClickHouseNode(Node):
                     continue
                 assert False, "container is not healthy"
 
-    def stop(self, timeout=300, safe=True):
+    def stop(self, timeout=300, safe=True, retries=5):
         """Stop node.
         """
         if safe:
@@ -89,17 +97,23 @@ class ClickHouseNode(Node):
                     shell = self.cluster._bash.pop(key)
                     shell.__exit__(None, None, None)
 
-        self.cluster.command(None, f'{self.cluster.docker_compose} stop {self.name}', timeout=timeout)
+        for retry in range(retries):
+            r = self.cluster.command(None, f'{self.cluster.docker_compose} stop {self.name}', timeout=timeout)
+            if r.exitcode == 0:
+                break
 
-    def start(self, timeout=300, wait_healthy=True):
+    def start(self, timeout=300, wait_healthy=True, retries=5):
         """Start node.
         """
-        self.cluster.command(None, f'{self.cluster.docker_compose} start {self.name}', timeout=timeout)
+        for retry in range(retries):
+            r = self.cluster.command(None, f'{self.cluster.docker_compose} start {self.name}', timeout=timeout)
+            if r.exitcode == 0:
+                break
 
         if wait_healthy:
             self.wait_healthy(timeout)
 
-    def restart(self, timeout=300, safe=True, wait_healthy=True):
+    def restart(self, timeout=300, safe=True, wait_healthy=True, retries=5):
         """Restart node.
         """
         if safe:
@@ -117,7 +131,10 @@ class ClickHouseNode(Node):
                     shell = self.cluster._bash.pop(key)
                     shell.__exit__(None, None, None)
 
-        self.cluster.command(None, f'{self.cluster.docker_compose} restart {self.name}', timeout=timeout)
+        for retry in range(retries):
+            r = self.cluster.command(None, f'{self.cluster.docker_compose} restart {self.name}', timeout=timeout)
+            if r.exitcode == 0:
+                break
 
         if wait_healthy:
             self.wait_healthy(timeout)
diff --git a/tests/testflows/ldap/authentication/tests/common.py b/tests/testflows/ldap/authentication/tests/common.py
index 8efb389a23f..7f9f16e827c 100644
--- a/tests/testflows/ldap/authentication/tests/common.py
+++ b/tests/testflows/ldap/authentication/tests/common.py
@@ -270,7 +270,7 @@ def ldap_authenticated_users(*users, config_d_dir="/etc/clickhouse-server/users.
             config = create_ldap_users_config_content(*users, config_d_dir=config_d_dir, config_file=config_file)
         return add_config(config, restart=restart)
 
-def invalid_server_config(servers, message=None, tail=13, timeout=60):
+def invalid_server_config(servers, message=None, tail=30, timeout=60):
     """Check that ClickHouse errors when trying to load invalid LDAP servers configuration file.
     """
     node = current().context.node
@@ -299,7 +299,7 @@ def invalid_server_config(servers, message=None, tail=13, timeout=60):
             with By("removing the config file", description=config.path):
                 node.command(f"rm -rf {config.path}", exitcode=0)
 
-def invalid_user_config(servers, config, message=None, tail=13, timeout=60):
+def invalid_user_config(servers, config, message=None, tail=30, timeout=60):
     """Check that ClickHouse errors when trying to load invalid LDAP users configuration file.
     """
     node = current().context.node
diff --git a/tests/testflows/ldap/authentication/tests/server_config.py b/tests/testflows/ldap/authentication/tests/server_config.py
index 38ec859226b..4053b5f61ed 100644
--- a/tests/testflows/ldap/authentication/tests/server_config.py
+++ b/tests/testflows/ldap/authentication/tests/server_config.py
@@ -245,7 +245,7 @@ def invalid_verification_cooldown_value(self, invalid_value, timeout=20):
         }}
 
     with When("I try to use this configuration then it should not work"):
-        invalid_server_config(servers, message=error_message, tail=17, timeout=timeout)
+        invalid_server_config(servers, message=error_message, tail=30, timeout=timeout)
 
 @TestScenario
 @Requirements(
diff --git a/tests/testflows/ldap/authentication/tests/user_config.py b/tests/testflows/ldap/authentication/tests/user_config.py
index 36ed33ed17a..0f296ea31c6 100644
--- a/tests/testflows/ldap/authentication/tests/user_config.py
+++ b/tests/testflows/ldap/authentication/tests/user_config.py
@@ -39,7 +39,7 @@ def empty_server_name(self, timeout=20):
         "message": "DB::Exception: user1: Authentication failed: password is incorrect or there is no user with such name"
     }]
     config = create_ldap_users_config_content(*users)
-    invalid_user_config(servers, config, message=message, tail=15, timeout=timeout)
+    invalid_user_config(servers, config, message=message, tail=30, timeout=timeout)
 
 @TestScenario
 @Requirements(
@@ -147,7 +147,7 @@ def ldap_and_password(self):
     error_message = "DB::Exception: More than one field of 'password'"
 
     with Then("I expect an error when I try to load the configuration file", description=error_message):
-        invalid_user_config(servers, new_config, message=error_message, tail=16)
+        invalid_user_config(servers, new_config, message=error_message, tail=30)
 
 @TestFeature
 @Name("user config")
diff --git a/tests/testflows/ldap/external_user_directory/tests/common.py b/tests/testflows/ldap/external_user_directory/tests/common.py
index e5980640721..23a8d68be0d 100644
--- a/tests/testflows/ldap/external_user_directory/tests/common.py
+++ b/tests/testflows/ldap/external_user_directory/tests/common.py
@@ -133,7 +133,7 @@ def create_entries_ldap_external_user_directory_config_content(entries, config_d
 
     return Config(content, path, name, uid, "config.xml")
 
-def invalid_ldap_external_user_directory_config(server, roles, message, tail=20, timeout=60, config=None):
+def invalid_ldap_external_user_directory_config(server, roles, message, tail=30, timeout=60, config=None):
     """Check that ClickHouse errors when trying to load invalid LDAP external user directory
     configuration file.
     """
diff --git a/tests/testflows/ldap/external_user_directory/tests/server_config.py b/tests/testflows/ldap/external_user_directory/tests/server_config.py
index 4e2e586f77c..8d0d1db976a 100644
--- a/tests/testflows/ldap/external_user_directory/tests/server_config.py
+++ b/tests/testflows/ldap/external_user_directory/tests/server_config.py
@@ -41,7 +41,7 @@ def invalid_host(self):
     RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Invalid("1.0"),
     RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Host("1.0")
 )
-def empty_host(self, tail=20, timeout=60):
+def empty_host(self, tail=30, timeout=60):
     """Check that server returns an error when LDAP server
     host value is empty.
     """
@@ -50,14 +50,14 @@ def empty_host(self, tail=20, timeout=60):
 
     servers = {"foo": {"host": "", "port": "389", "enable_tls": "no"}}
 
-    invalid_server_config(servers, message=message, tail=16, timeout=timeout)
+    invalid_server_config(servers, message=message, tail=30, timeout=timeout)
 
 @TestScenario
 @Requirements(
     RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Invalid("1.0"),
     RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Host("1.0")
 )
-def missing_host(self, tail=20, timeout=60):
+def missing_host(self, tail=30, timeout=60):
     """Check that server returns an error when LDAP server
     host is missing.
     """
@@ -148,7 +148,7 @@ def invalid_enable_tls_value(self, timeout=60):
     servers = {"openldap1": {"host": "openldap1", "port": "389", "enable_tls": "foo",
         "auth_dn_prefix": "cn=", "auth_dn_suffix": ",ou=users,dc=company,dc=com"
     }}
-    invalid_server_config(servers, message=message, tail=18, timeout=timeout)
+    invalid_server_config(servers, message=message, tail=30, timeout=timeout)
 
 @TestScenario
 @Requirements(
@@ -259,7 +259,7 @@ def invalid_verification_cooldown_value(self, invalid_value, timeout=20):
         }}
 
     with When("I try to use this configuration then it should not work"):
-        invalid_server_config(servers, message=error_message, tail=17, timeout=timeout)
+        invalid_server_config(servers, message=error_message, tail=30, timeout=timeout)
 
 @TestScenario
 @Requirements(
diff --git a/tests/testflows/ldap/role_mapping/tests/server_config.py b/tests/testflows/ldap/role_mapping/tests/server_config.py
index 85fe33f4388..8008d9003d7 100644
--- a/tests/testflows/ldap/role_mapping/tests/server_config.py
+++ b/tests/testflows/ldap/role_mapping/tests/server_config.py
@@ -65,7 +65,7 @@ def bind_dn_conflict_with_auth_dn(self, timeout=60):
         }
     }
 
-    invalid_server_config(servers, message=message, tail=18, timeout=timeout)
+    invalid_server_config(servers, message=message, tail=30, timeout=timeout)
 
 
 @TestFeature
@@ -75,4 +75,4 @@ def feature(self, node="clickhouse1"):
     """
     self.context.node = self.context.cluster.node(node)
     for scenario in loads(current_module(), Scenario):
-        scenario()
\ No newline at end of file
+        scenario()

From eebff7155ca036e46786c55c3d66b7b4518f77cc Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Fri, 29 Jan 2021 19:43:33 -0500
Subject: [PATCH 0403/1238] Adding SRS source for LDAP role mapping.

---
 .../role_mapping/requirements/requirements.md | 504 ++++++++++++++++++
 .../role_mapping/requirements/requirements.py |  58 +-
 2 files changed, 524 insertions(+), 38 deletions(-)
 create mode 100644 tests/testflows/ldap/role_mapping/requirements/requirements.md

diff --git a/tests/testflows/ldap/role_mapping/requirements/requirements.md b/tests/testflows/ldap/role_mapping/requirements/requirements.md
new file mode 100644
index 00000000000..e79baa9cd7c
--- /dev/null
+++ b/tests/testflows/ldap/role_mapping/requirements/requirements.md
@@ -0,0 +1,504 @@
+# SRS-014 ClickHouse LDAP Role Mapping
+# Software Requirements Specification
+
+## Table of Contents
+
+* 1 [Revision History](#revision-history)
+* 2 [Introduction](#introduction)
+* 3 [Terminology](#terminology)
+  * 3.1 [LDAP](#ldap)
+* 4 [Requirements](#requirements)
+  * 4.1 [General](#general)
+    * 4.1.1 [RQ.SRS-014.LDAP.RoleMapping](#rqsrs-014ldaprolemapping)
+    * 4.1.2 [RQ.SRS-014.LDAP.RoleMapping.WithFixedRoles](#rqsrs-014ldaprolemappingwithfixedroles)
+    * 4.1.3 [RQ.SRS-014.LDAP.RoleMapping.Search](#rqsrs-014ldaprolemappingsearch)
+  * 4.2 [Mapped Role Names](#mapped-role-names)
+    * 4.2.1 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithUTF8Characters](#rqsrs-014ldaprolemappingmaprolenamewithutf8characters)
+    * 4.2.2 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.Long](#rqsrs-014ldaprolemappingmaprolenamelong)
+    * 4.2.3 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialXMLCharacters](#rqsrs-014ldaprolemappingmaprolenamewithspecialxmlcharacters)
+    * 4.2.4 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialRegexCharacters](#rqsrs-014ldaprolemappingmaprolenamewithspecialregexcharacters)
+  * 4.3 [Multiple Roles](#multiple-roles)
+    * 4.3.1 [RQ.SRS-014.LDAP.RoleMapping.Map.MultipleRoles](#rqsrs-014ldaprolemappingmapmultipleroles)
+  * 4.4 [LDAP Groups](#ldap-groups)
+    * 4.4.1 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.Removed](#rqsrs-014ldaprolemappingldapgroupremoved)
+    * 4.4.2 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.RemovedAndAdded.Parallel](#rqsrs-014ldaprolemappingldapgroupremovedandaddedparallel)
+    * 4.4.3 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemoved](#rqsrs-014ldaprolemappingldapgroupuserremoved)
+    * 4.4.4 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemovedAndAdded.Parallel](#rqsrs-014ldaprolemappingldapgroupuserremovedandaddedparallel)
+  * 4.5 [RBAC Roles](#rbac-roles)
+    * 4.5.1 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NotPresent](#rqsrs-014ldaprolemappingrbacrolenotpresent)
+    * 4.5.2 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Added](#rqsrs-014ldaprolemappingrbacroleadded)
+    * 4.5.3 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Removed](#rqsrs-014ldaprolemappingrbacroleremoved)
+    * 4.5.4 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Readded](#rqsrs-014ldaprolemappingrbacrolereadded)
+    * 4.5.5 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedAndAdded.Parallel](#rqsrs-014ldaprolemappingrbacroleremovedandaddedparallel)
+    * 4.5.6 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.New](#rqsrs-014ldaprolemappingrbacrolenew)
+    * 4.5.7 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NewPrivilege](#rqsrs-014ldaprolemappingrbacrolenewprivilege)
+    * 4.5.8 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedPrivilege](#rqsrs-014ldaprolemappingrbacroleremovedprivilege)
+  * 4.6 [Authentication](#authentication)
+    * 4.6.1 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel](#rqsrs-014ldaprolemappingauthenticationparallel)
+    * 4.6.2 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.ValidAndInvalid](#rqsrs-014ldaprolemappingauthenticationparallelvalidandinvalid)
+    * 4.6.3 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.MultipleServers](#rqsrs-014ldaprolemappingauthenticationparallelmultipleservers)
+    * 4.6.4 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalOnly](#rqsrs-014ldaprolemappingauthenticationparallellocalonly)
+    * 4.6.5 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalAndMultipleLDAP](#rqsrs-014ldaprolemappingauthenticationparallellocalandmultipleldap)
+    * 4.6.6 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.SameUser](#rqsrs-014ldaprolemappingauthenticationparallelsameuser)
+  * 4.7 [Server Configuration](#server-configuration)
+    * 4.7.1 [BindDN Parameter](#binddn-parameter)
+      * 4.7.1.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN](#rqsrs-014ldaprolemappingconfigurationserverbinddn)
+      * 4.7.1.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN.ConflictWith.AuthDN](#rqsrs-014ldaprolemappingconfigurationserverbinddnconflictwithauthdn)
+  * 4.8 [External User Directory Configuration](#external-user-directory-configuration)
+    * 4.8.1 [Syntax](#syntax)
+      * 4.8.1.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingsyntax)
+    * 4.8.2 [Special Characters Escaping](#special-characters-escaping)
+      * 4.8.2.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SpecialCharactersEscaping](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingspecialcharactersescaping)
+    * 4.8.3 [Multiple Sections](#multiple-sections)
+      * 4.8.3.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingmultiplesections)
+      * 4.8.3.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections.IdenticalParameters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingmultiplesectionsidenticalparameters)
+    * 4.8.4 [BaseDN Parameter](#basedn-parameter)
+      * 4.8.4.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.BaseDN](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingbasedn)
+    * 4.8.5 [Attribute Parameter](#attribute-parameter)
+      * 4.8.5.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Attribute](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingattribute)
+    * 4.8.6 [Scope Parameter](#scope-parameter)
+      * 4.8.6.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscope)
+      * 4.8.6.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Base](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluebase)
+      * 4.8.6.3 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.OneLevel](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevalueonelevel)
+      * 4.8.6.4 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Children](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluechildren)
+      * 4.8.6.5 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Subtree](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluesubtree)
+      * 4.8.6.6 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Default](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluedefault)
+    * 4.8.7 [Search Filter Parameter](#search-filter-parameter)
+      * 4.8.7.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SearchFilter](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingsearchfilter)
+    * 4.8.8 [Prefix Parameter](#prefix-parameter)
+      * 4.8.8.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefix)
+      * 4.8.8.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.Default](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixdefault)
+      * 4.8.8.3 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithUTF8Characters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixwithutf8characters)
+      * 4.8.8.4 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialXMLCharacters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixwithspecialxmlcharacters)
+      * 4.8.8.5 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialRegexCharacters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixwithspecialregexcharacters)
+* 5 [References](#references)
+
+## Revision History
+
+This document is stored in an electronic form using [Git] source control management software
+hosted in a [GitHub Repository].
+All the updates are tracked using the [Revision History].
+
+## Introduction
+
+The [SRS-007 ClickHouse Authentication of Users via LDAP] added support for authenticating
+users using an [LDAP] server and the [SRS-009 ClickHouse LDAP External User Directory] added
+support for authenticating users using an [LDAP] external user directory. 
+
+This requirements specification adds additional functionality for mapping [LDAP] groups to 
+the corresponding [ClickHouse] [RBAC] roles when [LDAP] external user directory is configured.
+This functionality will enable easier access management for [LDAP] authenticated users
+as the privileges granted by the roles can be granted or revoked by granting or revoking
+a corresponding [LDAP] group to one or more [LDAP] users.
+
+For the use case when only [LDAP] user authentication is used, the roles can be
+managed using [RBAC] in the same way as for non-[LDAP] authenticated users.
+
+## Terminology
+
+### LDAP
+
+* Lightweight Directory Access Protocol
+
+## Requirements
+
+### General
+
+#### RQ.SRS-014.LDAP.RoleMapping
+version: 1.0
+
+[ClickHouse] SHALL support mapping of [LDAP] groups to [RBAC] roles
+for users authenticated using [LDAP] external user directory.
+
+#### RQ.SRS-014.LDAP.RoleMapping.WithFixedRoles
+version: 1.0
+
+[ClickHouse] SHALL support mapping of [LDAP] groups to [RBAC] roles
+for users authenticated using [LDAP] external user directory when
+one or more roles are specified in the `<roles>` section.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Search
+version: 1.0
+
+[ClickHouse] SHALL perform search on the [LDAP] server and map the results to [RBAC] role names 
+when authenticating users using the [LDAP] external user directory if the `<role_mapping>` section is configured
+as part of the [LDAP] external user directory. The matched roles SHALL be assigned to the user.
+
+### Mapped Role Names
+
+#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithUTF8Characters
+version: 1.0
+
+[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory
+to an [RBAC] role that contains UTF-8 characters.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.Long
+version: 1.0
+
+[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory
+to an [RBAC] role that has a name with more than 128 characters.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialXMLCharacters
+version: 1.0
+
+[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory
+to an [RBAC] role that has a name that contains special characters that need to be escaped in XML.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialRegexCharacters
+version: 1.0
+
+[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory
+to an [RBAC] role that has a name that contains special characters that need to be escaped in regex.
+
+### Multiple Roles
+
+#### RQ.SRS-014.LDAP.RoleMapping.Map.MultipleRoles
+version: 1.0
+
+[ClickHouse] SHALL support mapping one or more [LDAP] search results for users authenticated using 
+[LDAP] external user directory to one or more [RBAC] role.
+
+### LDAP Groups
+
+#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.Removed
+version: 1.0
+
+[ClickHouse] SHALL not assign [RBAC] role(s) for any users authenticated using [LDAP] external user directory
+if the corresponding [LDAP] group(s) that map those role(s) are removed. Any users that have active sessions SHALL still
+have privileges provided by the role(s) until the next time they are authenticated.
+
+#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.RemovedAndAdded.Parallel
+version: 1.0
+
+[ClickHouse] SHALL support authenticating users using [LDAP] external user directory 
+when [LDAP] groups are removed and added 
+at the same time as [LDAP] user authentications are performed in parallel.
+
+#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemoved
+version: 1.0
+
+[ClickHouse] SHALL not assign [RBAC] role(s) for the user authenticated using [LDAP] external user directory
+if the user has been removed from the corresponding [LDAP] group(s) that map those role(s). 
+Any active user sessions SHALL have privileges provided by the role(s) until the next time the user is authenticated.
+
+#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemovedAndAdded.Parallel
+version: 1.0
+
+[ClickHouse] SHALL support authenticating users using [LDAP] external user directory
+when [LDAP] users are added and removed from [LDAP] groups used to map to [RBAC] roles
+at the same time as [LDAP] user authentications are performed in parallel.
+
+### RBAC Roles
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NotPresent
+version: 1.0
+
+[ClickHouse] SHALL not reject authentication attempt using [LDAP] external user directory if any of the roles that are 
+are mapped from [LDAP] but are not present locally.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Added
+version: 1.0
+
+[ClickHouse] SHALL add the privileges provided by the [LDAP] mapped role when the
+role is not present during user authentication using [LDAP] external user directory
+as soon as the role is added.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Removed
+version: 1.0
+
+[ClickHouse] SHALL remove the privileges provided by the role from all the
+users authenticated using [LDAP] external user directory if the [RBAC] role that was mapped
+as a result of [LDAP] search is removed.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Readded
+version: 1.0
+
+[ClickHouse] SHALL reassign the [RBAC] role and add all the privileges provided by the role
+when it is re-added after removal for all [LDAP] users authenticated using external user directory
+for any role that was mapped as a result of [LDAP] search.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedAndAdded.Parallel
+version: 1.0
+
+[ClickHouse] SHALL support authenticating users using [LDAP] external user directory
+when [RBAC] roles that are mapped by [LDAP] groups
+are added and removed at the same time as [LDAP] user authentications are performed in parallel.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.New
+version: 1.0
+
+[ClickHouse] SHALL not allow any new roles to be assigned to any
+users authenticated using [LDAP] external user directory unless the role is specified
+in the configuration of the external user directory or was mapped as a result of [LDAP] search.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NewPrivilege
+version: 1.0
+
+[ClickHouse] SHALL add new privilege to all the users authenticated using [LDAP] external user directory
+when new privilege is added to one of the roles that were mapped as a result of [LDAP] search.
+
+#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedPrivilege
+version: 1.0
+
+[ClickHouse] SHALL remove privilege from all the users authenticated using [LDAP] external user directory
+when the privilege that was provided by the mapped role is removed from all the roles 
+that were mapped as a result of [LDAP] search.
+
+### Authentication
+
+#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel
+version: 1.0
+
+[ClickHouse] SHALL support parallel authentication of users using [LDAP] server
+when using [LDAP] external user directory that has role mapping enabled.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.ValidAndInvalid
+version: 1.0
+
+[ClickHouse] SHALL support authentication of valid users and
+prohibit authentication of invalid users using [LDAP] server
+in parallel without having invalid attempts affecting valid authentications
+when using [LDAP] external user directory that has role mapping enabled.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.MultipleServers
+version: 1.0
+
+[ClickHouse] SHALL support parallel authentication of external [LDAP] users
+authenticated using multiple [LDAP] external user directories that have
+role mapping enabled.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalOnly
+version: 1.0
+
+[ClickHouse] SHALL support parallel authentication of users defined only locally
+when one or more [LDAP] external user directories with role mapping
+are specified in the configuration file.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalAndMultipleLDAP
+version: 1.0
+
+[ClickHouse] SHALL support parallel authentication of local and external [LDAP] users
+authenticated using multiple [LDAP] external user directories with role mapping enabled.
+
+#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.SameUser
+version: 1.0
+
+[ClickHouse] SHALL support parallel authentication of the same external [LDAP] user
+authenticated using the same [LDAP] external user directory with role mapping enabled.
+
+### Server Configuration
+
+#### BindDN Parameter
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN
+version: 1.0
+
+[ClickHouse] SHALL support the `<bind_dn>` parameter in the `<ldap_servers><server_name>` section
+of the `config.xml` that SHALL be used to construct the `DN` to bind to.
+The resulting `DN` SHALL be constructed by replacing all `{user_name}` substrings of the template 
+with the actual user name during each authentication attempt.
+
+For example, 
+
+```xml
+<yandex>
+    <ldap_servers>
+        <my_ldap_server>
+            <!-- ... -->
+            <bind_dn>uid={user_name},ou=users,dc=example,dc=com</bind_dn>
+            <!-- ... -->
+        </my_ldap_server>
+    </ldap_servers>
+</yandex>
+```
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN.ConflictWith.AuthDN
+version: 1.0
+
+[ClickHouse] SHALL return an error if both `<bind_dn>` and `<auth_dn_prefix>` or `<auth_dn_suffix>` parameters
+are specified as part of [LDAP] server description in the `<ldap_servers>` section of the `config.xml`.
+
+### External User Directory Configuration
+
+#### Syntax
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax
+version: 1.0
+
+[ClickHouse] SHALL support the `role_mapping` sub-section in the `<user_directories><ldap>` section
+of the `config.xml`.
+
+For example,
+
+```xml
+<yandex>
+    <user_directories>
+        <ldap>
+            <!-- ... -->
+            <role_mapping>
+                <base_dn>ou=groups,dc=example,dc=com</base_dn>
+                <attribute>cn</attribute>
+                <scope>subtree</scope>
+                <search_filter>(&amp;(objectClass=groupOfNames)(member={bind_dn}))</search_filter>
+                <prefix>clickhouse_</prefix>
+            </role_mapping>
+        </ldap>
+    </user_directories>
+</yandex>
+```
+
+#### Special Characters Escaping
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SpecialCharactersEscaping
+version: 1.0
+
+[ClickHouse] SHALL support properly escaped special XML characters that can be present
+as part of the values for different configuration parameters inside the
+`<user_directories><ldap><role_mapping>` section of the `config.xml` such as
+
+* `<search_filter>` parameter
+* `<prefix>` parameter
+
+#### Multiple Sections
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections
+version: 1.0
+
+[ClickHouse] SHALL support multiple `<role_mapping>` sections defined inside the same `<user_directories><ldap>` section 
+of the `config.xml` and all of the `<role_mapping>` sections SHALL be applied.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections.IdenticalParameters
+version: 1.0
+
+[ClickHouse] SHALL not duplicate mapped roles when multiple `<role_mapping>` sections 
+with identical parameters are defined inside the `<user_directories><ldap>` section 
+of the `config.xml`.
+
+#### BaseDN Parameter
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.BaseDN
+version: 1.0
+
+[ClickHouse] SHALL support the `<base_dn>` parameter in the `<user_directories><ldap><role_mapping>` section 
+of the `config.xml` that SHALL specify the template to be used to construct the base `DN` for the [LDAP] search.
+
+The resulting `DN` SHALL be constructed by replacing all the `{user_name}` and `{bind_dn}` substrings of 
+the template with the actual user name and bind `DN` during each [LDAP] search.
+
+#### Attribute Parameter
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Attribute
+version: 1.0
+
+[ClickHouse] SHALL support the `<attribute>` parameter in the `<user_directories><ldap><role_mapping>` section of 
+the `config.xml` that SHALL specify the name of the attribute whose values SHALL be returned by the [LDAP] search.
+
+#### Scope Parameter
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope
+version: 1.0
+
+[ClickHouse] SHALL support the `<scope>` parameter in the `<user_directories><ldap><role_mapping>` section of 
+the `config.xml` that SHALL define the scope of the LDAP search as defined 
+by the https://ldapwiki.com/wiki/LDAP%20Search%20Scopes.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Base
+version: 1.0
+
+[ClickHouse] SHALL support the `base` value for the the `<scope>` parameter in the 
+`<user_directories><ldap><role_mapping>` section of the `config.xml` that SHALL
+limit the scope as specified by the https://ldapwiki.com/wiki/BaseObject.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.OneLevel
+version: 1.0
+
+[ClickHouse] SHALL support the `one_level` value for the the `<scope>` parameter in the 
+`<user_directories><ldap><role_mapping>` section of the `config.xml` that SHALL
+limit the scope as specified by the https://ldapwiki.com/wiki/SingleLevel.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Children
+version: 1.0
+
+[ClickHouse] SHALL support the `children` value for the the `<scope>` parameter in the 
+`<user_directories><ldap><role_mapping>` section of the `config.xml` that SHALL
+limit the scope as specified by the https://ldapwiki.com/wiki/SubordinateSubtree.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Subtree
+version: 1.0
+
+[ClickHouse] SHALL support the `children` value for the the `<scope>` parameter in the 
+`<user_directories><ldap><role_mapping>` section of the `config.xml` that SHALL
+limit the scope as specified by the https://ldapwiki.com/wiki/WholeSubtree.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Default
+version: 1.0
+
+[ClickHouse] SHALL support the `subtree` as the default value for the the `<scope>` parameter in the 
+`<user_directories><ldap><role_mapping>` section of the `config.xml` when the `<scope>` parameter is not specified.
+
+#### Search Filter Parameter
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SearchFilter
+version: 1.0
+
+[ClickHouse] SHALL support the `<search_filter>` parameter in the `<user_directories><ldap><role_mapping>`
+section of the `config.xml` that SHALL specify the template used to construct 
+the [LDAP filter](https://ldap.com/ldap-filters/) for the search.
+
+The resulting filter SHALL be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` substrings 
+of the template with the actual user name, bind `DN`, and base `DN` during each the [LDAP] search.
+ 
+#### Prefix Parameter
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix
+version: 1.0
+
+[ClickHouse] SHALL support the `<prefix>` parameter in the `<user directories><ldap><role_mapping>`
+section of the `config.xml` that SHALL be expected to be in front of each string in 
+the original list of strings returned by the [LDAP] search. 
+Prefix SHALL be removed from the original strings and resulting strings SHALL be treated as [RBAC] role names. 
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.Default
+version: 1.0
+
+[ClickHouse] SHALL support empty string as the default value of the `<prefix>` parameter in 
+the `<user directories><ldap><role_mapping>` section of the `config.xml`.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithUTF8Characters
+version: 1.0
+
+[ClickHouse] SHALL support UTF8 characters as the value of the `<prefix>` parameter in
+the `<user directories><ldap><role_mapping>` section of the `config.xml`.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialXMLCharacters
+version: 1.0
+
+[ClickHouse] SHALL support XML special characters as the value of the `<prefix>` parameter in
+the `<user directories><ldap><role_mapping>` section of the `config.xml`.
+
+##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialRegexCharacters
+version: 1.0
+
+[ClickHouse] SHALL support regex special characters as the value of the `<prefix>` parameter in
+the `<user directories><ldap><role_mapping>` section of the `config.xml`.
+
+## References
+
+* **Access Control and Account Management**: https://clickhouse.tech/docs/en/operations/access-rights/
+* **LDAP**: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol
+* **ClickHouse:** https://clickhouse.tech
+* **GitHub Repository**: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/role_mapping/requirements/requirements.md
+* **Revision History**: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/role_mapping/requirements/requirements.md 
+* **Git:** https://git-scm.com/
+
+[RBAC]: https://clickhouse.tech/docs/en/operations/access-rights/
+[SRS]: #srs
+[Access Control and Account Management]: https://clickhouse.tech/docs/en/operations/access-rights/
+[SRS-009 ClickHouse LDAP External User Directory]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/external_user_directory/requirements/requirements.md
+[SRS-007 ClickHouse Authentication of Users via LDAP]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/authentication/requirements/requirements.md
+[LDAP]: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol
+[ClickHouse]: https://clickhouse.tech
+[GitHub Repository]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/role_mapping/requirements/requirements.md
+[Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/role_mapping/requirements/requirements.md
+[Git]: https://git-scm.com/
+[GitHub]: https://github.com
diff --git a/tests/testflows/ldap/role_mapping/requirements/requirements.py b/tests/testflows/ldap/role_mapping/requirements/requirements.py
index ca7192e9dad..b2748762e03 100644
--- a/tests/testflows/ldap/role_mapping/requirements/requirements.py
+++ b/tests/testflows/ldap/role_mapping/requirements/requirements.py
@@ -1,6 +1,6 @@
 # These requirements were auto generated
 # from software requirements specification (SRS)
-# document by TestFlows v1.6.210101.1235930.
+# document by TestFlows v1.6.210129.1222545.
 # Do not edit by hand but re-generate instead
 # using 'tfs requirements generate' command.
 from testflows.core import Specification
@@ -814,15 +814,15 @@ RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithS
     level=4,
     num='4.8.8.5')
 
-QA_SRS014_ClickHouse_LDAP_Role_Mapping = Specification(
-    name='QA-SRS014 ClickHouse LDAP Role Mapping', 
+SRS_014_ClickHouse_LDAP_Role_Mapping = Specification(
+    name='SRS-014 ClickHouse LDAP Role Mapping', 
     description=None,
-    author='vzakaznikov',
-    date='December 4, 2020', 
-    status='-', 
-    approved_by='-',
-    approved_date='-',
-    approved_version='-',
+    author=None,
+    date=None, 
+    status=None, 
+    approved_by=None,
+    approved_date=None,
+    approved_version=None,
     version=None,
     group=None,
     type=None,
@@ -950,27 +950,9 @@ QA_SRS014_ClickHouse_LDAP_Role_Mapping = Specification(
         RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithSpecialRegexCharacters,
         ),
     content='''
-# QA-SRS014 ClickHouse LDAP Role Mapping
+# SRS-014 ClickHouse LDAP Role Mapping
 # Software Requirements Specification
 
-(c) 2020 Altinity LTD. All Rights Reserved.
-
-**Document status:** Confidential
-
-**Author:** vzakaznikov
-
-**Date:** December 4, 2020
-
-## Approval
-
-**Status:** -
-
-**Version:** -
-
-**Approved by:** -
-
-**Date:** -
-
 ## Table of Contents
 
 * 1 [Revision History](#revision-history)
@@ -1046,13 +1028,13 @@ QA_SRS014_ClickHouse_LDAP_Role_Mapping = Specification(
 ## Revision History
 
 This document is stored in an electronic form using [Git] source control management software
-hosted in a [GitLab Repository].
+hosted in a [GitHub Repository].
 All the updates are tracked using the [Revision History].
 
 ## Introduction
 
-The [QA-SRS007 ClickHouse Authentication of Users via LDAP] added support for authenticating
-users using an [LDAP] server and the [QA-SRS009 ClickHouse LDAP External User Directory] added
+The [SRS-007 ClickHouse Authentication of Users via LDAP] added support for authenticating
+users using an [LDAP] server and the [SRS-009 ClickHouse LDAP External User Directory] added
 support for authenticating users using an [LDAP] external user directory. 
 
 This requirements specification adds additional functionality for mapping [LDAP] groups to 
@@ -1457,19 +1439,19 @@ the `<user directories><ldap><role_mapping>` section of the `config.xml`.
 * **Access Control and Account Management**: https://clickhouse.tech/docs/en/operations/access-rights/
 * **LDAP**: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol
 * **ClickHouse:** https://clickhouse.tech
-* **GitLab Repository**: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/blob/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md
-* **Revision History**: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/commits/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md
+* **GitHub Repository**: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/role_mapping/requirements/requirements.md
+* **Revision History**: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/role_mapping/requirements/requirements.md 
 * **Git:** https://git-scm.com/
 
 [RBAC]: https://clickhouse.tech/docs/en/operations/access-rights/
 [SRS]: #srs
 [Access Control and Account Management]: https://clickhouse.tech/docs/en/operations/access-rights/
-[QA-SRS009 ClickHouse LDAP External User Directory]: https://gitlab.com/altinity-qa/documents/qa-srs009-clickhouse-ldap-external-user-directory/-/blob/master/QA_SRS009_ClickHouse_LDAP_External_User_Directory.md
-[QA-SRS007 ClickHouse Authentication of Users via LDAP]: https://gitlab.com/altinity-qa/documents/qa-srs007-clickhouse-athentication-of-users-via-ldap/-/blob/master/QA_SRS007_ClickHouse_Authentication_of_Users_via_LDAP.md
+[SRS-009 ClickHouse LDAP External User Directory]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/external_user_directory/requirements/requirements.md
+[SRS-007 ClickHouse Authentication of Users via LDAP]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/authentication/requirements/requirements.md
 [LDAP]: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol
 [ClickHouse]: https://clickhouse.tech
-[GitLab Repository]: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/blob/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md
-[Revision History]: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/commits/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md
+[GitHub Repository]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/role_mapping/requirements/requirements.md
+[Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/role_mapping/requirements/requirements.md
 [Git]: https://git-scm.com/
-[GitLab]: https://gitlab.com
+[GitHub]: https://github.com
 ''')

From 5519e4c1347b4bcc69252fccf89df8eae2ec8b54 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Sat, 30 Jan 2021 04:16:44 +0300
Subject: [PATCH 0404/1238] unbounded following frame end

---
 src/Interpreters/AggregateDescription.cpp     |  28 ----
 src/Interpreters/AggregateDescription.h       |  74 +--------
 src/Interpreters/ExpressionAnalyzer.h         |   1 +
 src/Interpreters/WindowDescription.cpp        |  36 +++++
 src/Interpreters/WindowDescription.h          | 140 ++++++++++++++++++
 src/Parsers/ASTWindowDefinition.cpp           |  35 ++++-
 src/Parsers/ASTWindowDefinition.h             |   2 +-
 src/Parsers/ExpressionElementParsers.cpp      |  85 +++++++++--
 src/Processors/QueryPlan/WindowStep.h         |   2 +-
 src/Processors/Transforms/WindowTransform.cpp |  37 ++++-
 src/Processors/Transforms/WindowTransform.h   |   6 +-
 .../01591_window_functions.reference          |  44 ++++++
 .../0_stateless/01591_window_functions.sql    |  15 ++
 13 files changed, 376 insertions(+), 129 deletions(-)
 create mode 100644 src/Interpreters/WindowDescription.cpp
 create mode 100644 src/Interpreters/WindowDescription.h

diff --git a/src/Interpreters/AggregateDescription.cpp b/src/Interpreters/AggregateDescription.cpp
index 2748a2abe9d..e483eb1b7a1 100644
--- a/src/Interpreters/AggregateDescription.cpp
+++ b/src/Interpreters/AggregateDescription.cpp
@@ -1,7 +1,6 @@
 #include <Interpreters/AggregateDescription.h>
 #include <Common/FieldVisitors.h>
 #include <IO/Operators.h>
-#include <Parsers/ASTFunction.h>
 
 namespace DB
 {
@@ -100,31 +99,4 @@ void AggregateDescription::explain(WriteBuffer & out, size_t indent) const
     }
 }
 
-std::string WindowFunctionDescription::dump() const
-{
-    WriteBufferFromOwnString ss;
-
-    ss << "window function '" << column_name << "\n";
-    ss << "function node " << function_node->dumpTree() << "\n";
-    ss << "aggregate function '" << aggregate_function->getName() << "'\n";
-    if (!function_parameters.empty())
-    {
-        ss << "parameters " << toString(function_parameters) << "\n";
-    }
-
-    return ss.str();
-}
-
-std::string WindowDescription::dump() const
-{
-    WriteBufferFromOwnString ss;
-
-    ss << "window '" << window_name << "'\n";
-    ss << "partition_by " << dumpSortDescription(partition_by) << "\n";
-    ss << "order_by " << dumpSortDescription(order_by) << "\n";
-    ss << "full_sort_description " << dumpSortDescription(full_sort_description) << "\n";
-
-    return ss.str();
-}
-
 }
diff --git a/src/Interpreters/AggregateDescription.h b/src/Interpreters/AggregateDescription.h
index ce05a6600bd..3af0dc38586 100644
--- a/src/Interpreters/AggregateDescription.h
+++ b/src/Interpreters/AggregateDescription.h
@@ -1,18 +1,14 @@
 #pragma once
 
 #include <AggregateFunctions/IAggregateFunction.h>
-#include <DataTypes/IDataType.h>
 #include <Core/ColumnNumbers.h>
 #include <Core/Names.h>
-#include <Core/SortDescription.h>
-#include <Parsers/IAST_fwd.h>
+#include <Core/Types.h>
 
 
 namespace DB
 {
 
-class ASTFunction;
-
 struct AggregateDescription
 {
     AggregateFunctionPtr function;
@@ -26,72 +22,4 @@ struct AggregateDescription
 
 using AggregateDescriptions = std::vector<AggregateDescription>;
 
-
-struct WindowFunctionDescription
-{
-    std::string column_name;
-    const ASTFunction * function_node;
-    AggregateFunctionPtr aggregate_function;
-    Array function_parameters;
-    DataTypes argument_types;
-    Names argument_names;
-
-    std::string dump() const;
-};
-
-struct WindowFrame
-{
-    enum class FrameType { Rows, Groups, Range };
-    enum class OffsetType { Unbounded, Current, Offset };
-
-    // This flag signifies that the frame properties were not set explicitly by
-    // user, but the fields of this structure still have to contain proper values
-    // for the default frame of RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW.
-    bool is_default = true;
-
-    FrameType type = FrameType::Range;
-
-    /*
-     * We don't need these yet.
-     * OffsetType begin_offset = Unbounded;
-
-     * OffsetType end_offset = Current;
-     */
-
-
-    bool operator == (const WindowFrame & other) const
-    {
-        // We don't compare is_default because it's not a real property of the
-        // frame, and only influences how we display it.
-        return other.type == type;
-    }
-};
-
-struct WindowDescription
-{
-    std::string window_name;
-
-    // We don't care about the particular order of keys for PARTITION BY, only
-    // that they are sorted. For now we always require ASC, but we could be more
-    // flexible and match any direction, or even different order of columns.
-    SortDescription partition_by;
-
-    SortDescription order_by;
-
-    // To calculate the window function, we sort input data first by PARTITION BY,
-    // then by ORDER BY. This field holds this combined sort order.
-    SortDescription full_sort_description;
-
-    WindowFrame frame;
-
-    // The window functions that are calculated for this window.
-    std::vector<WindowFunctionDescription> window_functions;
-
-    std::string dump() const;
-};
-
-using WindowFunctionDescriptions = std::vector<WindowFunctionDescription>;
-
-using WindowDescriptions = std::unordered_map<std::string, WindowDescription>;
-
 }
diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h
index 71301ad64a2..319be9c1409 100644
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@@ -3,6 +3,7 @@
 #include <DataStreams/IBlockStream_fwd.h>
 #include <Columns/FilterDescription.h>
 #include <Interpreters/AggregateDescription.h>
+#include <Interpreters/WindowDescription.h>
 #include <Interpreters/TreeRewriter.h>
 #include <Interpreters/SubqueryForSet.h>
 #include <Parsers/IAST_fwd.h>
diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp
new file mode 100644
index 00000000000..bfb53ebb79f
--- /dev/null
+++ b/src/Interpreters/WindowDescription.cpp
@@ -0,0 +1,36 @@
+#include <Interpreters/WindowDescription.h>
+
+#include <IO/Operators.h>
+#include <Parsers/ASTFunction.h>
+
+namespace DB
+{
+
+std::string WindowFunctionDescription::dump() const
+{
+    WriteBufferFromOwnString ss;
+
+    ss << "window function '" << column_name << "\n";
+    ss << "function node " << function_node->dumpTree() << "\n";
+    ss << "aggregate function '" << aggregate_function->getName() << "'\n";
+    if (!function_parameters.empty())
+    {
+        ss << "parameters " << toString(function_parameters) << "\n";
+    }
+
+    return ss.str();
+}
+
+std::string WindowDescription::dump() const
+{
+    WriteBufferFromOwnString ss;
+
+    ss << "window '" << window_name << "'\n";
+    ss << "partition_by " << dumpSortDescription(partition_by) << "\n";
+    ss << "order_by " << dumpSortDescription(order_by) << "\n";
+    ss << "full_sort_description " << dumpSortDescription(full_sort_description) << "\n";
+
+    return ss.str();
+}
+
+}
diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h
new file mode 100644
index 00000000000..cbdfc56ba4e
--- /dev/null
+++ b/src/Interpreters/WindowDescription.h
@@ -0,0 +1,140 @@
+#pragma once
+
+// ASTLiteral.h
+#include <Core/Field.h>
+#include <Parsers/ASTWithAlias.h>
+#include <Parsers/TokenIterator.h>
+#include <Common/FieldVisitors.h>
+
+#include <optional>
+
+// ASTLiteral.cpp
+#include <Common/SipHash.h>
+#include <Common/FieldVisitors.h>
+#include <Parsers/ASTLiteral.h>
+#include <IO/WriteHelpers.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+
+
+// The really needed includes follow
+#include <Common/FieldVisitors.h>
+#include <Core/Field.h>
+#include <Parsers/IAST_fwd.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <Core/SortDescription.h>
+#include <DataTypes/IDataType.h>
+#include <Core/Names.h>
+#include <Core/Types.h>
+
+namespace DB
+{
+
+class ASTFunction;
+
+struct WindowFunctionDescription
+{
+    std::string column_name;
+    const ASTFunction * function_node;
+    AggregateFunctionPtr aggregate_function;
+    Array function_parameters;
+    DataTypes argument_types;
+    Names argument_names;
+
+    std::string dump() const;
+};
+
+struct WindowFrame
+{
+    enum class FrameType { Rows, Groups, Range };
+    enum class BoundaryType { Unbounded, Current, Offset };
+
+    // This flag signifies that the frame properties were not set explicitly by
+    // user, but the fields of this structure still have to contain proper values
+    // for the default frame of RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW.
+    bool is_default = true;
+
+    FrameType type = FrameType::Range;
+
+    // UNBOUNDED FOLLOWING for the frame end doesn't make much sense, so
+    // Unbounded here means UNBOUNDED PRECEDING.
+    // Offset might be both preceding and following, preceding is negative
+    // (be careful, this is not symmetric w/the frame end unlike in the grammar,
+    // so a positive literal in PRECEDING will give a negative number here).
+    BoundaryType begin_type = BoundaryType::Unbounded;
+    // This should have been a Field but I'm getting some crazy linker errors.
+    int64_t begin_offset = 0;
+
+    // Here as well, Unbounded is UNBOUNDED FOLLOWING, and positive Offset is
+    // following.
+    BoundaryType end_type = BoundaryType::Current;
+    int64_t end_offset = 0;
+
+
+    bool operator == (const WindowFrame & other) const
+    {
+        // We don't compare is_default because it's not a real property of the
+        // frame, and only influences how we display it.
+        return other.type == type
+            && other.begin_type == begin_type
+            && other.begin_offset == begin_offset
+            && other.end_type == end_type
+            && other.end_offset == end_offset
+            ;
+    }
+
+    static std::string toString(FrameType type)
+    {
+        switch (type)
+        {
+            case FrameType::Rows:
+                return "ROWS";
+            case FrameType::Groups:
+                return "GROUPS";
+            case FrameType::Range:
+                return "RANGE";
+        }
+    }
+
+    static std::string toString(BoundaryType type)
+    {
+        switch (type)
+        {
+            case BoundaryType::Unbounded:
+                return "UNBOUNDED";
+            case BoundaryType::Offset:
+                return "OFFSET";
+            case BoundaryType::Current:
+                return "CURRENT ROW";
+        }
+    }
+};
+
+struct WindowDescription
+{
+    std::string window_name;
+
+    // We don't care about the particular order of keys for PARTITION BY, only
+    // that they are sorted. For now we always require ASC, but we could be more
+    // flexible and match any direction, or even different order of columns.
+    SortDescription partition_by;
+
+    SortDescription order_by;
+
+    // To calculate the window function, we sort input data first by PARTITION BY,
+    // then by ORDER BY. This field holds this combined sort order.
+    SortDescription full_sort_description;
+
+    WindowFrame frame;
+
+    // The window functions that are calculated for this window.
+    std::vector<WindowFunctionDescription> window_functions;
+
+    std::string dump() const;
+};
+
+using WindowFunctionDescriptions = std::vector<WindowFunctionDescription>;
+
+using WindowDescriptions = std::unordered_map<std::string, WindowDescription>;
+
+}
diff --git a/src/Parsers/ASTWindowDefinition.cpp b/src/Parsers/ASTWindowDefinition.cpp
index af2c49d4e4c..e141ba2ff4e 100644
--- a/src/Parsers/ASTWindowDefinition.cpp
+++ b/src/Parsers/ASTWindowDefinition.cpp
@@ -59,11 +59,36 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
 
     if (!frame.is_default)
     {
-        const auto * name = frame.type == WindowFrame::FrameType::Rows
-            ? "ROWS" : frame.type == WindowFrame::FrameType::Groups
-                ? "GROUPS" : "RANGE";
-
-        settings.ostr << name << " UNBOUNDED PRECEDING";
+        settings.ostr << WindowFrame::toString(frame.type) << " BETWEEN ";
+        if (frame.begin_type == WindowFrame::BoundaryType::Current)
+        {
+            settings.ostr << "CURRENT ROW";
+        }
+        else if (frame.begin_type == WindowFrame::BoundaryType::Unbounded)
+        {
+            settings.ostr << "UNBOUNDED PRECEDING";
+        }
+        else
+        {
+            settings.ostr << abs(frame.begin_offset);
+            settings.ostr << " "
+                << (frame.begin_offset > 0 ? "FOLLOWING" : "PRECEDING");
+        }
+        settings.ostr << " AND ";
+        if (frame.end_type == WindowFrame::BoundaryType::Current)
+        {
+            settings.ostr << "CURRENT ROW";
+        }
+        else if (frame.end_type == WindowFrame::BoundaryType::Unbounded)
+        {
+            settings.ostr << "UNBOUNDED PRECEDING";
+        }
+        else
+        {
+            settings.ostr << abs(frame.end_offset);
+            settings.ostr << " "
+                << (frame.end_offset > 0 ? "FOLLOWING" : "PRECEDING");
+        }
     }
 }
 
diff --git a/src/Parsers/ASTWindowDefinition.h b/src/Parsers/ASTWindowDefinition.h
index fa20b74b0fc..b57c1094e42 100644
--- a/src/Parsers/ASTWindowDefinition.h
+++ b/src/Parsers/ASTWindowDefinition.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Interpreters/AggregateDescription.h>
+#include <Interpreters/WindowDescription.h>
 
 #include <Parsers/IAST.h>
 
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index c337b4bd80c..664cf30425c 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -532,6 +532,7 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
     ParserKeyword keyword_between("BETWEEN");
     ParserKeyword keyword_unbounded("UNBOUNDED");
     ParserKeyword keyword_preceding("PRECEDING");
+    ParserKeyword keyword_following("FOLLOWING");
     ParserKeyword keyword_and("AND");
     ParserKeyword keyword_current_row("CURRENT ROW");
 
@@ -539,40 +540,92 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
     // 1) ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
     // 2) ROWS UNBOUNDED PRECEDING
     // When the frame end is not specified (2), it defaults to CURRENT ROW.
-    if (keyword_between.ignore(pos, expected))
+    const bool has_frame_end = keyword_between.ignore(pos, expected);
+
+    if (keyword_current_row.ignore(pos, expected))
     {
-        // 1) ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-        if (!keyword_unbounded.ignore(pos, expected))
+        node->frame.begin_type = WindowFrame::BoundaryType::Current;
+    }
+    else
+    {
+        ParserLiteral parser_literal;
+        ASTPtr ast_literal;
+        if (keyword_unbounded.ignore(pos, expected))
+        {
+            node->frame.begin_type = WindowFrame::BoundaryType::Unbounded;
+        }
+        else if (parser_literal.parse(pos, ast_literal, expected))
+        {
+            node->frame.begin_offset = ast_literal->as<ASTLiteral &>().value.safeGet<Int64>();
+        }
+        else
         {
             return false;
         }
 
-        if (!keyword_preceding.ignore(pos, expected))
+        if (keyword_preceding.ignore(pos, expected))
+        {
+            node->frame.begin_offset = - node->frame.begin_offset;
+        }
+        else if (keyword_following.ignore(pos, expected))
+        {
+            if (node->frame.begin_type == WindowFrame::BoundaryType::Unbounded)
+            {
+                throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                    "Frame start UNBOUNDED FOLLOWING is not implemented");
+            }
+        }
+        else
         {
             return false;
         }
+    }
 
+    if (has_frame_end)
+    {
         if (!keyword_and.ignore(pos, expected))
         {
             return false;
         }
 
-        if (!keyword_current_row.ignore(pos, expected))
+        if (keyword_current_row.ignore(pos, expected))
         {
-            return false;
+            node->frame.end_type = WindowFrame::BoundaryType::Current;
         }
-    }
-    else
-    {
-        // 2) ROWS UNBOUNDED PRECEDING
-        if (!keyword_unbounded.ignore(pos, expected))
+        else
         {
-            return false;
-        }
+            ParserLiteral parser_literal;
+            ASTPtr ast_literal;
+            if (keyword_unbounded.ignore(pos, expected))
+            {
+                node->frame.end_type = WindowFrame::BoundaryType::Unbounded;
+            }
+            else if (parser_literal.parse(pos, ast_literal, expected))
+            {
+                node->frame.end_offset = ast_literal->as<ASTLiteral &>().value.safeGet<Int64>();
+            }
+            else
+            {
+                return false;
+            }
 
-        if (!keyword_preceding.ignore(pos, expected))
-        {
-            return false;
+            if (keyword_preceding.ignore(pos, expected))
+            {
+                if (node->frame.end_type == WindowFrame::BoundaryType::Unbounded)
+                {
+                    throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "Frame end UNBOUNDED PRECEDING is not implemented");
+                }
+
+                node->frame.end_offset = -node->frame.end_offset;
+            }
+            else if (keyword_following.ignore(pos, expected))
+            {
+            }
+            else
+            {
+                return false;
+            }
         }
     }
 
diff --git a/src/Processors/QueryPlan/WindowStep.h b/src/Processors/QueryPlan/WindowStep.h
index 069d02c655c..ffd5e78df67 100644
--- a/src/Processors/QueryPlan/WindowStep.h
+++ b/src/Processors/QueryPlan/WindowStep.h
@@ -1,7 +1,7 @@
 #pragma once
 #include <Processors/QueryPlan/ITransformingStep.h>
 
-#include <Interpreters/AggregateDescription.h>
+#include <Interpreters/WindowDescription.h>
 
 namespace DB
 {
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 8b4d77d4d72..172c2c89578 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -7,6 +7,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    const extern int NOT_IMPLEMENTED;
+}
+
 WindowTransform::WindowTransform(const Block & input_header_,
         const Block & output_header_,
         const WindowDescription & window_description_,
@@ -164,6 +169,13 @@ void WindowTransform::advanceFrameStart()
 {
     // Frame start is always UNBOUNDED PRECEDING for now, so we don't have to
     // move it. It is initialized when the new partition starts.
+    if (window_description.frame.begin_type
+        != WindowFrame::BoundaryType::Unbounded)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+            "Frame start type '{}' is not implemented",
+            WindowFrame::toString(window_description.frame.begin_type));
+    }
 }
 
 bool WindowTransform::arePeers(const RowNumber & x, const RowNumber & y) const
@@ -180,7 +192,7 @@ bool WindowTransform::arePeers(const RowNumber & x, const RowNumber & y) const
         return false;
     }
 
-    // For RANGE frame, rows that compare equal w/ORDER BY are peers.
+    // For RANGE and GROUPS frames, rows that compare equal w/ORDER BY are peers.
     assert(window_description.frame.type == WindowFrame::FrameType::Range);
     const size_t n = order_by_indices.size();
     if (n == 0)
@@ -272,6 +284,13 @@ void WindowTransform::advanceFrameEndCurrentRow()
     frame_ended = partition_ended;
 }
 
+void WindowTransform::advanceFrameEndUnbounded()
+{
+    // The UNBOUNDED FOLLOWING frame ends when the partition ends.
+    frame_end = partition_end;
+    frame_ended = partition_ended;
+}
+
 void WindowTransform::advanceFrameEnd()
 {
     // No reason for this function to be called again after it succeeded.
@@ -279,8 +298,20 @@ void WindowTransform::advanceFrameEnd()
 
     const auto frame_end_before = frame_end;
 
-    // The only frame end we have for now is CURRENT ROW.
-    advanceFrameEndCurrentRow();
+    switch (window_description.frame.end_type)
+    {
+        case WindowFrame::BoundaryType::Current:
+            // The only frame end we have for now is CURRENT ROW.
+            advanceFrameEndCurrentRow();
+            break;
+        case WindowFrame::BoundaryType::Unbounded:
+            advanceFrameEndUnbounded();
+            break;
+        case WindowFrame::BoundaryType::Offset:
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                "The frame end type '{}' is not implemented",
+                WindowFrame::toString(window_description.frame.end_type));
+    }
 
 //    fmt::print(stderr, "frame_end {} -> {}\n", frame_end_before, frame_end);
 
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index f1dd9682d00..34f4e9b6e9f 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -1,7 +1,8 @@
 #pragma once
-#include <Processors/ISimpleTransform.h>
 
-#include <Interpreters/AggregateDescription.h>
+#include <Interpreters/WindowDescription.h>
+
+#include <Processors/IProcessor.h>
 
 #include <Common/AlignedBuffer.h>
 
@@ -103,6 +104,7 @@ private:
     void advanceFrameStart();
     void advanceFrameEnd();
     void advanceFrameEndCurrentRow();
+    void advanceFrameEndUnbounded();
     bool arePeers(const RowNumber & x, const RowNumber & y) const;
     void writeOutCurrentRow();
 
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index e6b49b5207a..1e9b83b9983 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -472,3 +472,47 @@ select min(number) over (partition by p)  from (select number, intDiv(number, 3)
 6
 6
 9
+-- UNBOUNDED FOLLOWING frame end
+select
+    min(number) over wa, min(number) over wo,
+    max(number) over wa, max(number) over wo
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(31))
+window
+    wa as (partition by p order by o
+        range between unbounded preceding and unbounded following),
+    wo as (partition by p order by o
+        rows between unbounded preceding and unbounded following)
+settings max_block_size = 2;
+0	0	2	2
+0	0	2	2
+0	0	2	2
+3	3	5	5
+3	3	5	5
+3	3	5	5
+6	6	8	8
+6	6	8	8
+6	6	8	8
+9	9	11	11
+9	9	11	11
+9	9	11	11
+12	12	14	14
+12	12	14	14
+12	12	14	14
+15	15	17	17
+15	15	17	17
+15	15	17	17
+18	18	20	20
+18	18	20	20
+18	18	20	20
+21	21	23	23
+21	21	23	23
+21	21	23	23
+24	24	26	26
+24	24	26	26
+24	24	26	26
+27	27	29	29
+27	27	29	29
+27	27	29	29
+30	30	30	30
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index e56fe9cb315..a5b30fb884a 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -148,3 +148,18 @@ settings max_block_size = 5
 -- A case where the partition end is in the current block, and the frame end
 -- is triggered by the partition end.
 select min(number) over (partition by p)  from (select number, intDiv(number, 3) p from numbers(10));
+
+-- UNBOUNDED FOLLOWING frame end
+select
+    min(number) over wa, min(number) over wo,
+    max(number) over wa, max(number) over wo
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(31))
+window
+    wa as (partition by p order by o
+        range between unbounded preceding and unbounded following),
+    wo as (partition by p order by o
+        rows between unbounded preceding and unbounded following)
+settings max_block_size = 2;
+

From 7da40832376aa66f270f4398bb1b77c471f8d225 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 30 Jan 2021 07:13:49 +0300
Subject: [PATCH 0405/1238] Forbid bit functions for floats

This will fix the following UB report [1]:

  "../src/Functions/bitAnd.cpp:24:61: runtime error: nan is outside the
range of representable values of type 'long' Received signal -3 Received
signal Unknown signal (-3)"

  [1]: https://clickhouse-test-reports.s3.yandex.net/19824/89c4055202b9d08459f90ee5791d4e3017b82fbf/fuzzer_ubsan/report.html#fail1
---
 src/Functions/FunctionBinaryArithmetic.h      | 62 +++++++++++++++----
 src/Functions/bitAnd.cpp                      |  2 +-
 src/Functions/bitOr.cpp                       |  2 +-
 src/Functions/bitRotateLeft.cpp               |  2 +-
 src/Functions/bitRotateRight.cpp              |  2 +-
 src/Functions/bitShiftLeft.cpp                |  2 +-
 src/Functions/bitShiftRight.cpp               |  2 +-
 src/Functions/bitTest.cpp                     |  2 +-
 src/Functions/bitXor.cpp                      |  2 +-
 .../01576_if_null_external_aggregation.sql    |  2 +-
 .../0_stateless/01677_bit_float.reference     |  0
 tests/queries/0_stateless/01677_bit_float.sql |  9 +++
 12 files changed, 68 insertions(+), 21 deletions(-)
 create mode 100644 tests/queries/0_stateless/01677_bit_float.reference
 create mode 100644 tests/queries/0_stateless/01677_bit_float.sql

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index c10ff581b0e..f61c9c91d00 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -504,7 +504,7 @@ private:
 using namespace traits_;
 using namespace impl_;
 
-template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
+template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
 class FunctionBinaryArithmetic : public IFunction
 {
     static constexpr const bool is_plus = IsOperation<Op>::plus;
@@ -542,16 +542,54 @@ class FunctionBinaryArithmetic : public IFunction
         >(type, std::forward<F>(f));
     }
 
+    template <typename F>
+    static bool castTypeNoFloats(const IDataType * type, F && f)
+    {
+        return castTypeToEither<
+            DataTypeUInt8,
+            DataTypeUInt16,
+            DataTypeUInt32,
+            DataTypeUInt64,
+            DataTypeUInt256,
+            DataTypeInt8,
+            DataTypeInt16,
+            DataTypeInt32,
+            DataTypeInt64,
+            DataTypeInt128,
+            DataTypeInt256,
+            DataTypeDate,
+            DataTypeDateTime,
+            DataTypeDecimal<Decimal32>,
+            DataTypeDecimal<Decimal64>,
+            DataTypeDecimal<Decimal128>,
+            DataTypeDecimal<Decimal256>,
+            DataTypeFixedString
+        >(type, std::forward<F>(f));
+    }
+
     template <typename F>
     static bool castBothTypes(const IDataType * left, const IDataType * right, F && f)
     {
-        return castType(left, [&](const auto & left_)
+        if constexpr (valid_on_float_arguments)
         {
-            return castType(right, [&](const auto & right_)
+            return castType(left, [&](const auto & left_)
             {
-                return f(left_, right_);
+                return castType(right, [&](const auto & right_)
+                {
+                    return f(left_, right_);
+                });
             });
-        });
+        }
+        else
+        {
+            return castTypeNoFloats(left, [&](const auto & left_)
+            {
+                return castTypeNoFloats(right, [&](const auto & right_)
+                {
+                    return f(left_, right_);
+                });
+            });
+        }
     }
 
     static FunctionOverloadResolverPtr
@@ -1319,11 +1357,11 @@ public:
 };
 
 
-template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
-class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>
+template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
+class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>
 {
 public:
-    using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>;
+    using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>;
     using Monotonicity = typename Base::Monotonicity;
 
     static FunctionPtr create(
@@ -1488,7 +1526,7 @@ private:
     DataTypePtr return_type;
 };
 
-template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
+template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
 class BinaryArithmeticOverloadResolver : public IFunctionOverloadResolverImpl
 {
 public:
@@ -1512,14 +1550,14 @@ public:
                 || (arguments[1].column && isColumnConst(*arguments[1].column))))
         {
             return std::make_unique<DefaultFunction>(
-                FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments>::create(
+                FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(
                     arguments[0], arguments[1], return_type, context),
                 ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
                 return_type);
         }
 
         return std::make_unique<DefaultFunction>(
-            FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::create(context),
+            FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(context),
             ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
             return_type);
     }
@@ -1530,7 +1568,7 @@ public:
             throw Exception(
                 "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 2",
                 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
-        return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::getReturnTypeImplStatic(arguments, context);
+        return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::getReturnTypeImplStatic(arguments, context);
     }
 
 private:
diff --git a/src/Functions/bitAnd.cpp b/src/Functions/bitAnd.cpp
index 08fc4699c36..aa4b7593c79 100644
--- a/src/Functions/bitAnd.cpp
+++ b/src/Functions/bitAnd.cpp
@@ -37,7 +37,7 @@ struct BitAndImpl
 };
 
 struct NameBitAnd { static constexpr auto name = "bitAnd"; };
-using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true>;
+using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true, false>;
 
 }
 
diff --git a/src/Functions/bitOr.cpp b/src/Functions/bitOr.cpp
index 400c478fcbf..adf2244bc0a 100644
--- a/src/Functions/bitOr.cpp
+++ b/src/Functions/bitOr.cpp
@@ -36,7 +36,7 @@ struct BitOrImpl
 };
 
 struct NameBitOr { static constexpr auto name = "bitOr"; };
-using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true>;
+using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true, false>;
 
 }
 
diff --git a/src/Functions/bitRotateLeft.cpp b/src/Functions/bitRotateLeft.cpp
index aac0197f2c5..9962ac174fd 100644
--- a/src/Functions/bitRotateLeft.cpp
+++ b/src/Functions/bitRotateLeft.cpp
@@ -43,7 +43,7 @@ struct BitRotateLeftImpl
 };
 
 struct NameBitRotateLeft { static constexpr auto name = "bitRotateLeft"; };
-using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft>;
+using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft, true, false>;
 
 }
 
diff --git a/src/Functions/bitRotateRight.cpp b/src/Functions/bitRotateRight.cpp
index e8932eccaa3..8352f2f92bc 100644
--- a/src/Functions/bitRotateRight.cpp
+++ b/src/Functions/bitRotateRight.cpp
@@ -42,7 +42,7 @@ struct BitRotateRightImpl
 };
 
 struct NameBitRotateRight { static constexpr auto name = "bitRotateRight"; };
-using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight>;
+using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight, true, false>;
 
 }
 
diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp
index 9f8adf39741..96e0fbe6d6f 100644
--- a/src/Functions/bitShiftLeft.cpp
+++ b/src/Functions/bitShiftLeft.cpp
@@ -42,7 +42,7 @@ struct BitShiftLeftImpl
 };
 
 struct NameBitShiftLeft { static constexpr auto name = "bitShiftLeft"; };
-using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft>;
+using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft, true, false>;
 
 }
 
diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp
index bc54d3c8b5d..37c3908f8de 100644
--- a/src/Functions/bitShiftRight.cpp
+++ b/src/Functions/bitShiftRight.cpp
@@ -42,7 +42,7 @@ struct BitShiftRightImpl
 };
 
 struct NameBitShiftRight { static constexpr auto name = "bitShiftRight"; };
-using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight>;
+using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight, true, false>;
 
 }
 
diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp
index 54c932d9311..b7e9a84c6c1 100644
--- a/src/Functions/bitTest.cpp
+++ b/src/Functions/bitTest.cpp
@@ -34,7 +34,7 @@ struct BitTestImpl
 };
 
 struct NameBitTest { static constexpr auto name = "bitTest"; };
-using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest>;
+using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest, true, false>;
 
 }
 
diff --git a/src/Functions/bitXor.cpp b/src/Functions/bitXor.cpp
index c5867c8d2ca..d8901c6c9ff 100644
--- a/src/Functions/bitXor.cpp
+++ b/src/Functions/bitXor.cpp
@@ -36,7 +36,7 @@ struct BitXorImpl
 };
 
 struct NameBitXor { static constexpr auto name = "bitXor"; };
-using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true>;
+using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true, false>;
 
 }
 
diff --git a/tests/queries/0_stateless/01576_if_null_external_aggregation.sql b/tests/queries/0_stateless/01576_if_null_external_aggregation.sql
index b9c36a9cecc..cffeb73b1ca 100644
--- a/tests/queries/0_stateless/01576_if_null_external_aggregation.sql
+++ b/tests/queries/0_stateless/01576_if_null_external_aggregation.sql
@@ -3,5 +3,5 @@ SET max_bytes_before_external_group_by = 200000000;
 SET max_memory_usage = 1500000000;
 SET max_threads = 12;
 
-SELECT bitAnd(number, pow(2, 20) - 1) as k, argMaxIf(k, number % 2 = 0 ? number : Null, number > 42),  uniq(number) AS u FROM numbers(1000000) GROUP BY k format Null;
+SELECT bitAnd(number, toUInt64(pow(2, 20) - 1)) as k, argMaxIf(k, number % 2 = 0 ? number : Null, number > 42),  uniq(number) AS u FROM numbers(1000000) GROUP BY k format Null;
 
diff --git a/tests/queries/0_stateless/01677_bit_float.reference b/tests/queries/0_stateless/01677_bit_float.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01677_bit_float.sql b/tests/queries/0_stateless/01677_bit_float.sql
new file mode 100644
index 00000000000..3692d8ac6a5
--- /dev/null
+++ b/tests/queries/0_stateless/01677_bit_float.sql
@@ -0,0 +1,9 @@
+SELECT bitAnd(0, inf); -- { serverError 43 }
+SELECT bitXor(0, inf); -- { serverError 43 }
+SELECT bitOr(0, inf); -- { serverError 43 }
+SELECT bitTest(inf, 0); -- { serverError 43 }
+SELECT bitTest(0, inf); -- { serverError 43 }
+SELECT bitRotateLeft(inf, 0); -- { serverError 43 }
+SELECT bitRotateRight(inf, 0); -- { serverError 43 }
+SELECT bitShiftLeft(inf, 0); -- { serverError 43 }
+SELECT bitShiftRight(inf, 0); -- { serverError 43 }

From 502d981ab450711f12e8e7cb1e0940144622cc0e Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 30 Jan 2021 07:57:14 +0300
Subject: [PATCH 0406/1238] Use
 BinaryArithmeticOverloadResolver::valid_on_float_arguments for lcm/gcd

---
 src/Functions/GCDLCMImpl.h                    |  4 +--
 src/Functions/gcd.cpp                         |  2 +-
 src/Functions/lcm.cpp                         |  2 +-
 tests/queries/0_stateless/00515_gcd_lcm.sql   | 28 +++++++++----------
 .../0_stateless/01666_gcd_ubsan.reference     |  2 +-
 tests/queries/0_stateless/01666_gcd_ubsan.sql |  2 +-
 .../0_stateless/01666_lcm_ubsan.reference     |  2 +-
 tests/queries/0_stateless/01666_lcm_ubsan.sql |  2 +-
 8 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/Functions/GCDLCMImpl.h b/src/Functions/GCDLCMImpl.h
index dffd91f8d6a..b83fe37abb6 100644
--- a/src/Functions/GCDLCMImpl.h
+++ b/src/Functions/GCDLCMImpl.h
@@ -21,7 +21,7 @@ namespace ErrorCodes
 }
 
 template <class T>
-inline constexpr bool is_gcd_lcm_implemeted = !(is_big_int_v<T> || std::is_floating_point_v<T>);
+inline constexpr bool is_gcd_lcm_implemeted = !is_big_int_v<T>;
 
 template <typename A, typename B, typename Impl, typename Name>
 struct GCDLCMImpl
@@ -33,7 +33,7 @@ struct GCDLCMImpl
     static inline std::enable_if_t<!is_gcd_lcm_implemeted<Result>, Result>
     apply(A, B)
     {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers and floats", Name::name);
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers", Name::name);
     }
 
     template <typename Result = ResultType>
diff --git a/src/Functions/gcd.cpp b/src/Functions/gcd.cpp
index 9cb53212c7f..52addfc66ed 100644
--- a/src/Functions/gcd.cpp
+++ b/src/Functions/gcd.cpp
@@ -23,7 +23,7 @@ struct GCDImpl : public GCDLCMImpl<A, B, GCDImpl<A, B>, NameGCD>
     }
 };
 
-using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false>;
+using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false, false>;
 
 }
 
diff --git a/src/Functions/lcm.cpp b/src/Functions/lcm.cpp
index 5155a80e6cd..f46e11dfa17 100644
--- a/src/Functions/lcm.cpp
+++ b/src/Functions/lcm.cpp
@@ -54,7 +54,7 @@ struct LCMImpl : public GCDLCMImpl<A, B, LCMImpl<A, B>, NameLCM>
     }
 };
 
-using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false>;
+using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false, false>;
 
 }
 
diff --git a/tests/queries/0_stateless/00515_gcd_lcm.sql b/tests/queries/0_stateless/00515_gcd_lcm.sql
index c3bf3275bb8..67fab1c9d59 100644
--- a/tests/queries/0_stateless/00515_gcd_lcm.sql
+++ b/tests/queries/0_stateless/00515_gcd_lcm.sql
@@ -24,18 +24,18 @@ select lcm(2147483647, 2147483646);
 select lcm(4611686011984936962, 2147483647);
 select lcm(-2147483648, 1);
 -- test gcd float
-select gcd(1280.1, 1024.1); -- { serverError 48 }
-select gcd(11.1, 121.1); -- { serverError 48 }
-select gcd(-256.1, 64.1); -- { serverError 48 }
-select gcd(1.1, 1.1); -- { serverError 48 }
-select gcd(4.1, 2.1); -- { serverError 48 }
-select gcd(15.1, 49.1); -- { serverError 48 }
-select gcd(255.1, 254.1); -- { serverError 48 }
+select gcd(1280.1, 1024.1); -- { serverError 43 }
+select gcd(11.1, 121.1); -- { serverError 43 }
+select gcd(-256.1, 64.1); -- { serverError 43 }
+select gcd(1.1, 1.1); -- { serverError 43 }
+select gcd(4.1, 2.1); -- { serverError 43 }
+select gcd(15.1, 49.1); -- { serverError 43 }
+select gcd(255.1, 254.1); -- { serverError 43 }
 -- test lcm float
-select lcm(1280.1, 1024.1); -- { serverError 48 }
-select lcm(11.1, 121.1); -- { serverError 48 }
-select lcm(-256.1, 64.1); -- { serverError 48 }
-select lcm(1.1, 1.1); -- { serverError 48 }
-select lcm(4.1, 2.1); -- { serverError 48 }
-select lcm(15.1, 49.1); -- { serverError 48 }
-select lcm(255.1, 254.1); -- { serverError 48 }
+select lcm(1280.1, 1024.1); -- { serverError 43 }
+select lcm(11.1, 121.1); -- { serverError 43 }
+select lcm(-256.1, 64.1); -- { serverError 43 }
+select lcm(1.1, 1.1); -- { serverError 43 }
+select lcm(4.1, 2.1); -- { serverError 43 }
+select lcm(15.1, 49.1); -- { serverError 43 }
+select lcm(255.1, 254.1); -- { serverError 43 }
diff --git a/tests/queries/0_stateless/01666_gcd_ubsan.reference b/tests/queries/0_stateless/01666_gcd_ubsan.reference
index 2500ef1deae..af041bdcbde 100644
--- a/tests/queries/0_stateless/01666_gcd_ubsan.reference
+++ b/tests/queries/0_stateless/01666_gcd_ubsan.reference
@@ -4,7 +4,7 @@ SELECT gcd(9223372036854775808, -9223372036854775807); -- { serverError 407 }
 SELECT gcd(-9223372036854775808, 9223372036854775807); -- { serverError 407 }
 SELECT gcd(-9223372036854775807, 9223372036854775808); -- { serverError 407 }
 SELECT gcd(9223372036854775808, -1); -- { serverError 407 }
-SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 }
+SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 43 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 }
 SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720'));
diff --git a/tests/queries/0_stateless/01666_gcd_ubsan.sql b/tests/queries/0_stateless/01666_gcd_ubsan.sql
index bde2b624cc0..a1f501cda0b 100644
--- a/tests/queries/0_stateless/01666_gcd_ubsan.sql
+++ b/tests/queries/0_stateless/01666_gcd_ubsan.sql
@@ -4,7 +4,7 @@ SELECT gcd(9223372036854775808, -9223372036854775807); -- { serverError 407 }
 SELECT gcd(-9223372036854775808, 9223372036854775807); -- { serverError 407 }
 SELECT gcd(-9223372036854775807, 9223372036854775808); -- { serverError 407 }
 SELECT gcd(9223372036854775808, -1); -- { serverError 407 }
-SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 }
+SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 43 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 }
 SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720'));
diff --git a/tests/queries/0_stateless/01666_lcm_ubsan.reference b/tests/queries/0_stateless/01666_lcm_ubsan.reference
index ed9a6aed42b..d8ed13191d3 100644
--- a/tests/queries/0_stateless/01666_lcm_ubsan.reference
+++ b/tests/queries/0_stateless/01666_lcm_ubsan.reference
@@ -4,7 +4,7 @@ SELECT lcm(9223372036854775808, -9223372036854775807); -- { serverError 407 }
 SELECT lcm(-9223372036854775808, 9223372036854775807); -- { serverError 407 }
 SELECT lcm(-9223372036854775807, 9223372036854775808); -- { serverError 407 }
 SELECT lcm(9223372036854775808, -1); -- { serverError 407 }
-SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 }
+SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 43 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 }
 SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720'));
diff --git a/tests/queries/0_stateless/01666_lcm_ubsan.sql b/tests/queries/0_stateless/01666_lcm_ubsan.sql
index 5cc3546e941..b3b869c80ed 100644
--- a/tests/queries/0_stateless/01666_lcm_ubsan.sql
+++ b/tests/queries/0_stateless/01666_lcm_ubsan.sql
@@ -4,7 +4,7 @@ SELECT lcm(9223372036854775808, -9223372036854775807); -- { serverError 407 }
 SELECT lcm(-9223372036854775808, 9223372036854775807); -- { serverError 407 }
 SELECT lcm(-9223372036854775807, 9223372036854775808); -- { serverError 407 }
 SELECT lcm(9223372036854775808, -1); -- { serverError 407 }
-SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 }
+SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 43 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 }
 SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720'));

From 39f75ccd195f0f2319ec39e3b7e57c3edcef5301 Mon Sep 17 00:00:00 2001
From: Mikhail Filimonov <mfilimonov@altinity.com>
Date: Sat, 30 Jan 2021 11:26:31 +0100
Subject: [PATCH 0407/1238] Revert changes in test_multiple_disks

---
 tests/integration/test_multiple_disks/test.py | 29 +------------------
 1 file changed, 1 insertion(+), 28 deletions(-)

diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py
index 6877a9fc357..24ee6c0493b 100644
--- a/tests/integration/test_multiple_disks/test.py
+++ b/tests/integration/test_multiple_disks/test.py
@@ -287,8 +287,6 @@ def test_query_parser(start_cluster):
                 "ALTER TABLE table_with_normal_policy MODIFY SETTING storage_policy='moving_jbod_with_external'")
     finally:
         node1.query("DROP TABLE IF EXISTS table_with_normal_policy SYNC")
-        node1.query("DROP TABLE IF EXISTS table_with_absent_policy SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -330,7 +328,6 @@ def test_alter_policy(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def get_random_string(length):
@@ -400,7 +397,6 @@ def test_round_robin(start_cluster, name, engine):
         assert used_disks[2] == used_disks[0]
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -428,7 +424,6 @@ def test_max_data_part_size(start_cluster, name, engine):
         assert used_disks[0] == 'external'
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -482,8 +477,6 @@ def test_jbod_overflow(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node.query("DROP TABLE IF EXISTS system.part_log SYNC")
-
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -534,7 +527,6 @@ def test_background_move(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -622,7 +614,6 @@ def test_start_stop_moves(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def get_path_for_part_from_part_log(node, table, part_name):
@@ -711,7 +702,7 @@ def test_alter_move(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
+
 
 @pytest.mark.parametrize("volume_or_disk", [
     "DISK",
@@ -760,7 +751,6 @@ def test_alter_move_half_of_partition(start_cluster, volume_or_disk):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("volume_or_disk", [
@@ -805,7 +795,6 @@ def test_alter_double_move_partition(start_cluster, volume_or_disk):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def produce_alter_move(node, name):
@@ -890,7 +879,6 @@ def test_concurrent_alter_move(start_cluster, name, engine):
         assert node1.query("SELECT COUNT() FROM {}".format(name)) == "500\n"
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -944,7 +932,6 @@ def test_concurrent_alter_move_and_drop(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -976,7 +963,6 @@ def test_detach_attach(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -1023,7 +1009,6 @@ def test_mutate_to_another_disk(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 @pytest.mark.parametrize("name,engine", [
@@ -1082,7 +1067,6 @@ def test_concurrent_alter_modify(start_cluster, name, engine):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_simple_replication_and_moves(start_cluster):
@@ -1150,7 +1134,6 @@ def test_simple_replication_and_moves(start_cluster):
     finally:
         for node in [node1, node2]:
             node.query("DROP TABLE IF EXISTS replicated_table_for_moves SYNC")
-            node.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_download_appropriate_disk(start_cluster):
@@ -1185,7 +1168,6 @@ def test_download_appropriate_disk(start_cluster):
     finally:
         for node in [node1, node2]:
             node.query("DROP TABLE IF EXISTS replicated_table_for_download SYNC")
-            node.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_rename(start_cluster):
@@ -1225,7 +1207,6 @@ def test_rename(start_cluster):
         node1.query("DROP TABLE IF EXISTS default.renaming_table SYNC")
         node1.query("DROP TABLE IF EXISTS default.renaming_table1 SYNC")
         node1.query("DROP TABLE IF EXISTS test.renaming_table2 SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_freeze(start_cluster):
@@ -1261,7 +1242,6 @@ def test_freeze(start_cluster):
     finally:
         node1.query("DROP TABLE IF EXISTS default.freezing_table SYNC")
         node1.exec_in_container(["rm", "-rf", "/jbod1/shadow", "/external/shadow"])
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_kill_while_insert(start_cluster):
@@ -1305,7 +1285,6 @@ def test_kill_while_insert(start_cluster):
     finally:
         try:
             node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-            node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
         except:
             """ClickHouse may be inactive at this moment and we don't want to mask a meaningful exception."""
 
@@ -1367,7 +1346,6 @@ def test_move_while_merge(start_cluster):
 
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_move_across_policies_does_not_work(start_cluster):
@@ -1410,7 +1388,6 @@ def test_move_across_policies_does_not_work(start_cluster):
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
         node1.query(f"DROP TABLE IF EXISTS {name}2 SYNC")
-        node1.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def _insert_merge_execute(node, name, policy, parts, cmds, parts_before_cmds, parts_after_cmds):
@@ -1510,7 +1487,6 @@ def test_no_merges_in_configuration_allow_from_query_without_reload(start_cluste
 
     finally:
         node1.query("SYSTEM STOP MERGES ON VOLUME {}.external".format(policy))
-        node.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_no_merges_in_configuration_allow_from_query_with_reload(start_cluster):
@@ -1530,7 +1506,6 @@ def test_no_merges_in_configuration_allow_from_query_with_reload(start_cluster):
 
     finally:
         node1.query("SYSTEM STOP MERGES ON VOLUME {}.external".format(policy))
-        node.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_yes_merges_in_configuration_disallow_from_query_without_reload(start_cluster):
@@ -1550,7 +1525,6 @@ def test_yes_merges_in_configuration_disallow_from_query_without_reload(start_cl
 
     finally:
         node1.query("SYSTEM START MERGES ON VOLUME {}.external".format(policy))
-        node.query("DROP TABLE IF EXISTS system.part_log SYNC")
 
 
 def test_yes_merges_in_configuration_disallow_from_query_with_reload(start_cluster):
@@ -1571,4 +1545,3 @@ def test_yes_merges_in_configuration_disallow_from_query_with_reload(start_clust
 
     finally:
         node1.query("SYSTEM START MERGES ON VOLUME {}.external".format(policy))
-        node.query("DROP TABLE IF EXISTS system.part_log SYNC")

From c7fa78be1e590c8ecf0cfb56d151b8740a3f94ac Mon Sep 17 00:00:00 2001
From: sevirov <72220289+sevirov@users.noreply.github.com>
Date: Sat, 30 Jan 2021 16:32:07 +0300
Subject: [PATCH 0408/1238] Update
 docs/ru/sql-reference/statements/select/index.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/ru/sql-reference/statements/select/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md
index 536511ed5e8..ab9d538f7fd 100644
--- a/docs/ru/sql-reference/statements/select/index.md
+++ b/docs/ru/sql-reference/statements/select/index.md
@@ -168,7 +168,7 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of
 
 ### APPLY {#apply-modifier}
 
-Вызывает некоторую функцию для каждой строки, которая возвращает внешнее табличное выражение запроса. 
+Вызывает указанную функцию для каждой строки, возвращаемой внешним табличным выражением запроса. 
 
 **Синтаксис:**
 

From a46c8e0c73e07637144582b18d747271d5ba1703 Mon Sep 17 00:00:00 2001
From: sevirov <72220289+sevirov@users.noreply.github.com>
Date: Sat, 30 Jan 2021 16:33:49 +0300
Subject: [PATCH 0409/1238] Update
 docs/ru/sql-reference/statements/select/index.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/ru/sql-reference/statements/select/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md
index ab9d538f7fd..57fcc0244cc 100644
--- a/docs/ru/sql-reference/statements/select/index.md
+++ b/docs/ru/sql-reference/statements/select/index.md
@@ -192,7 +192,7 @@ SELECT * APPLY(sum) FROM columns_transformers;
 
 ### EXCEPT {#except-modifier}
 
-Указывает имена одного или нескольких столбцов для исключения из результата.
+Исключает из результата запроса один или несколько столбцов.
 
 **Синтаксис:**
 

From df28433457947ec02f39aeccede9d0ee58008a9c Mon Sep 17 00:00:00 2001
From: sevirov <72220289+sevirov@users.noreply.github.com>
Date: Sat, 30 Jan 2021 16:34:39 +0300
Subject: [PATCH 0410/1238] Update
 docs/ru/sql-reference/statements/select/index.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/ru/sql-reference/statements/select/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md
index 57fcc0244cc..b0b6e80d7be 100644
--- a/docs/ru/sql-reference/statements/select/index.md
+++ b/docs/ru/sql-reference/statements/select/index.md
@@ -215,7 +215,7 @@ SELECT * EXCEPT (i) from columns_transformers;
 
 ### REPLACE {#replace-modifier}
 
-Указывает одно или несколько [выражений алиасов](../../../sql-reference/syntax.md#syntax-expression_aliases). Каждый алиас должен соответствовать имени столбца из запроса `SELECT *`. В списке столбцов из результата запроса имя столбца, соответствующее алиасу, заменяется выражением в модификаторе `REPLACE`.
+Определяет одно или несколько [выражений алиасов](../../../sql-reference/syntax.md#syntax-expression_aliases). Каждый алиас должен соответствовать имени столбца из запроса `SELECT *`. В списке столбцов результата запроса имя столбца, соответствующее алиасу, заменяется выражением в модификаторе `REPLACE`.
 
 Этот модификатор не изменяет имена или порядок столбцов. Однако он может изменить значение и тип значения.
 

From dc9c3c8514402a56e2761cf1f460e9ff87646614 Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Sat, 30 Jan 2021 09:00:42 -0500
Subject: [PATCH 0411/1238] Fixing styling errors. Adding simple tests.

---
 src/Parsers/ASTCreateQuery.cpp                |  2 +
 src/Storages/LiveView/StorageLiveView.cpp     |  7 +--
 .../00962_live_view_periodic_refresh.py       | 41 +++++++++++++++
 ...00962_live_view_periodic_refresh.reference |  0
 ..._live_view_periodic_refresh_and_timeout.py | 52 +++++++++++++++++++
 ...iew_periodic_refresh_and_timeout.reference |  0
 6 files changed, 99 insertions(+), 3 deletions(-)
 create mode 100755 tests/queries/0_stateless/00962_live_view_periodic_refresh.py
 create mode 100644 tests/queries/0_stateless/00962_live_view_periodic_refresh.reference
 create mode 100755 tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py
 create mode 100644 tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.reference

diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp
index 15f361c0b8f..2af0d2d4a45 100644
--- a/src/Parsers/ASTCreateQuery.cpp
+++ b/src/Parsers/ASTCreateQuery.cpp
@@ -274,6 +274,8 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
         {
             if (live_view_timeout)
                 settings.ostr << (settings.hilite ? hilite_keyword : "") << " AND" << (settings.hilite ? hilite_none : "");
+            else
+                settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH" << (settings.hilite ? hilite_none : "");
 
             settings.ostr << (settings.hilite ? hilite_keyword : "") << " PERIODIC REFRESH " << (settings.hilite ? hilite_none : "")
                 << *live_view_periodic_refresh;
diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp
index d6ed1a3401a..cd96ab4ad40 100644
--- a/src/Storages/LiveView/StorageLiveView.cpp
+++ b/src/Storages/LiveView/StorageLiveView.cpp
@@ -388,7 +388,8 @@ bool StorageLiveView::getNewBlocks()
 
             updated = true;
         }
-        else {
+        else
+        {
             new_blocks_metadata->hash = getBlocksHashKey();
             new_blocks_metadata->version = getBlocksVersion();
             new_blocks_metadata->time = std::chrono::system_clock::now();
@@ -449,7 +450,7 @@ void StorageLiveView::scheduleNextPeriodicRefresh()
     Seconds current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
     Seconds blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
 
-    if ( (current_time - periodic_live_view_refresh) >= blocks_time )
+    if ((current_time - periodic_live_view_refresh) >= blocks_time)
     {
         refresh(false);
         blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
@@ -514,7 +515,7 @@ Pipe StorageLiveView::read(
         Seconds current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
         Seconds blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
 
-        if ( (current_time - periodic_live_view_refresh) >= blocks_time )
+        if ((current_time - periodic_live_view_refresh) >= blocks_time)
             refresh(false);
     }
 
diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh.py
new file mode 100755
index 00000000000..6d3bddcb88b
--- /dev/null
+++ b/tests/queries/0_stateless/00962_live_view_periodic_refresh.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+import os
+import sys
+import signal
+
+CURDIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
+
+from client import client, prompt, end_of_block
+
+log = None
+# uncomment the line below for debugging
+log=sys.stdout
+
+with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2:
+    client1.expect(prompt)
+    client2.expect(prompt)
+
+    client1.send('SET allow_experimental_live_view = 1')
+    client1.expect(prompt)
+    client2.send('SET allow_experimental_live_view = 1')
+    client2.expect(prompt)
+
+    client1.send('DROP TABLE IF EXISTS test.lv')
+    client1.expect(prompt)
+    client1.send("CREATE LIVE VIEW test.lv WITH REFRESH 1"
+                 " AS SELECT value FROM system.events WHERE event = 'OSCPUVirtualTimeMicroseconds'")
+    client1.expect(prompt)
+    client1.send('WATCH test.lv FORMAT JSONEachRow')
+    client1.expect(r'"_version":' + end_of_block)
+    client1.expect(r'"_version":' + end_of_block)
+    client1.expect(r'"_version":' + end_of_block)
+    # send Ctrl-C
+    client1.send('\x03', eol='')
+    match = client1.expect('(%s)|([#\$] )' % prompt)
+    if match.groups()[1]:
+        client1.send(client1.command)
+        client1.expect(prompt)
+    client1.send('DROP TABLE test.lv')
+    client1.expect(prompt)
+
diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh.reference b/tests/queries/0_stateless/00962_live_view_periodic_refresh.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py
new file mode 100755
index 00000000000..76b9980d1f5
--- /dev/null
+++ b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+import os
+import sys
+import time
+import signal
+
+CURDIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
+
+from client import client, prompt, end_of_block
+
+log = None
+# uncomment the line below for debugging
+#log=sys.stdout
+
+with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2:
+    client1.expect(prompt)
+    client2.expect(prompt)
+
+    client1.send('SET allow_experimental_live_view = 1')
+    client1.expect(prompt)
+    client2.send('SET allow_experimental_live_view = 1')
+    client2.expect(prompt)
+
+    client1.send('DROP TABLE IF EXISTS test.lv')
+    client1.expect(prompt)
+    client1.send("CREATE LIVE VIEW test.lv WITH TIMEOUT 60 AND REFRESH 1"
+                 " AS SELECT value FROM system.events WHERE event = 'OSCPUVirtualTimeMicroseconds'")
+    client1.expect(prompt)
+    client1.send('WATCH test.lv FORMAT JSONEachRow')
+    client1.expect(r'"_version":' + end_of_block)
+    client1.expect(r'"_version":' + end_of_block)
+    client1.expect(r'"_version":' + end_of_block)
+    # send Ctrl-C
+    client1.send('\x03', eol='')
+    match = client1.expect('(%s)|([#\$] )' % prompt)
+    if match.groups()[1]:
+        client1.send(client1.command)
+        client1.expect(prompt)
+    # poll until live view table is dropped
+    start_time = time.time()
+    while True:
+        client1.send('SELECT * FROM test.lv FORMAT JSONEachRow')
+        client1.expect(prompt)
+        if 'Table test.lv doesn\'t exist' in client1.before:
+            break
+        if time.time() - start_time > 90:
+            break
+    # check table is dropped
+    client1.send('DROP TABLE test.lv')
+    client1.expect('Table test.lv doesn\'t exist')
+    client1.expect(prompt)
diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.reference b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.reference
new file mode 100644
index 00000000000..e69de29bb2d

From 124bd1ea19b8b5ebb0a209fedda84c954cb24b1a Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Sat, 30 Jan 2021 10:56:02 -0500
Subject: [PATCH 0412/1238] Fixing live view refresh test. Adding live view
 refresh with dictionary test.

---
 .../00962_live_view_periodic_refresh.py       |  2 +-
 ...2_live_view_periodic_refresh_dictionary.py | 68 +++++++++++++++++++
 ...view_periodic_refresh_dictionary.reference |  0
 3 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100755 tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.py
 create mode 100644 tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.reference

diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh.py
index 6d3bddcb88b..27308548452 100755
--- a/tests/queries/0_stateless/00962_live_view_periodic_refresh.py
+++ b/tests/queries/0_stateless/00962_live_view_periodic_refresh.py
@@ -10,7 +10,7 @@ from client import client, prompt, end_of_block
 
 log = None
 # uncomment the line below for debugging
-log=sys.stdout
+#log=sys.stdout
 
 with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2:
     client1.expect(prompt)
diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.py
new file mode 100755
index 00000000000..34d5db676f4
--- /dev/null
+++ b/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+import os
+import sys
+import signal
+
+CURDIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
+
+from client import client, prompt, end_of_block
+
+log = None
+# uncomment the line below for debugging
+#log=sys.stdout
+
+with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2:
+    client1.expect(prompt)
+    client2.expect(prompt)
+
+    client1.send('SET allow_experimental_live_view = 1')
+    client1.expect(prompt)
+    client2.send('SET allow_experimental_live_view = 1')
+    client2.expect(prompt)
+
+    client1.send('DROP TABLE IF EXISTS test.lv')
+    client1.expect(prompt)
+    client1.send('DROP TABLE IF EXISTS test.mt')
+    client1.expect(prompt)
+    client1.send('DROP DICTIONARY IF EXITS test.dict')
+    client1.expect(prompt)
+    
+    client1.send("CREATE TABLE test.mt (a Int32, b Int32) Engine=MergeTree order by tuple()")
+    client1.expect(prompt)
+    client1.send("CREATE DICTIONARY test.dict(a Int32, b Int32) PRIMARY KEY a LAYOUT(FLAT()) " + \
+                 "SOURCE(CLICKHOUSE(db 'test' table 'mt')) LIFETIME(1)")
+    client1.expect(prompt)   
+    client1.send("CREATE LIVE VIEW test.lv WITH REFRESH 1 AS SELECT * FROM test.dict")
+    client1.expect(prompt)
+
+    client2.send("INSERT INTO test.mt VALUES (1,2)")
+    client2.expect(prompt) 
+
+    client1.send('WATCH test.lv FORMAT JSONEachRow')
+    client1.expect(r'"_version":"1"')
+    
+    client2.send("INSERT INTO test.mt VALUES (2,2)")
+    client2.expect(prompt) 
+    client1.expect(r'"_version":"2"')
+    
+    client2.send("INSERT INTO test.mt VALUES (3,2)")
+    client2.expect(prompt)    
+    client1.expect(r'"_version":"3"')
+    
+    # send Ctrl-C
+    client1.send('\x03', eol='')
+    match = client1.expect('(%s)|([#\$] )' % prompt)
+    if match.groups()[1]:
+        client1.send(client1.command)
+        client1.expect(prompt)
+
+    client1.send('DROP TABLE IF EXISTS test.lv')
+    client1.expect(prompt)
+    client1.send('DROP DICTIONARY IF EXISTS test.dict')
+    client1.expect(prompt)
+    client1.send('DROP TABLE IF EXISTS test.mt')
+    client1.expect(prompt)
+    
+    
+
diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.reference b/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.reference
new file mode 100644
index 00000000000..e69de29bb2d

From 7c8deae0e1eac1abdee2a266a86b5d7b7daf7685 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Fri, 29 Jan 2021 15:44:56 +0300
Subject: [PATCH 0413/1238] Fix starting the server with tables having columns'
 default expressions containing dictGet(). Allow getting return type of
 dictGet() without loading dictionary.

---
 src/Databases/DatabaseOrdinary.cpp            | 33 +++++++++++-------
 src/Dictionaries/DictionaryStructure.cpp      | 11 ++++--
 src/Dictionaries/DictionaryStructure.h        |  3 +-
 src/Functions/FunctionsExternalDictionaries.h | 34 ++++++++-----------
 ...76_dictget_in_default_expression.reference | 11 ++++++
 .../01676_dictget_in_default_expression.sql   | 31 +++++++++++++++++
 6 files changed, 87 insertions(+), 36 deletions(-)
 create mode 100644 tests/queries/0_stateless/01676_dictget_in_default_expression.reference
 create mode 100644 tests/queries/0_stateless/01676_dictget_in_default_expression.sql

diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp
index 58b4ad72a1b..df30f9e6306 100644
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@@ -172,6 +172,26 @@ void DatabaseOrdinary::loadStoredObjects(Context & context, bool has_force_resto
 
     ThreadPool pool;
 
+    /// We must attach dictionaries before attaching tables
+    /// because while we're attaching tables we may need to have some dictionaries attached
+    /// (for example, dictionaries can be used in the default expressions for some tables).
+    /// On the other hand we can attach any dictionary (even sourced from ClickHouse table)
+    /// without having any tables attached. It is so because attaching of a dictionary means
+    /// loading of its config only, it doesn't involve loading the dictionary itself.
+
+    /// Attach dictionaries.
+    for (const auto & [name, query] : file_names)
+    {
+        auto create_query = query->as<const ASTCreateQuery &>();
+        if (create_query.is_dictionary)
+        {
+            tryAttachDictionary(query, *this, getMetadataPath() + name, context);
+
+            /// Messages, so that it's not boring to wait for the server to load for a long time.
+            logAboutProgress(log, ++dictionaries_processed, total_dictionaries, watch);
+        }
+    }
+
     /// Attach tables.
     for (const auto & name_with_query : file_names)
     {
@@ -196,19 +216,6 @@ void DatabaseOrdinary::loadStoredObjects(Context & context, bool has_force_resto
 
     /// After all tables was basically initialized, startup them.
     startupTables(pool);
-
-    /// Attach dictionaries.
-    for (const auto & [name, query] : file_names)
-    {
-        auto create_query = query->as<const ASTCreateQuery &>();
-        if (create_query.is_dictionary)
-        {
-            tryAttachDictionary(query, *this, getMetadataPath() + name, context);
-
-            /// Messages, so that it's not boring to wait for the server to load for a long time.
-            logAboutProgress(log, ++dictionaries_processed, total_dictionaries, watch);
-        }
-    }
 }
 
 
diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index 95c2e0a3e09..4a446b6d1af 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -218,7 +218,7 @@ void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
     }
 }
 
-const DictionaryAttribute & DictionaryStructure::getAttribute(const String& attribute_name, const DataTypePtr & type) const
+const DictionaryAttribute & DictionaryStructure::getAttribute(const String & attribute_name) const
 {
     auto find_iter
         = std::find_if(attributes.begin(), attributes.end(), [&](const auto & attribute) { return attribute.name == attribute_name; });
@@ -226,13 +226,18 @@ const DictionaryAttribute & DictionaryStructure::getAttribute(const String& attr
     if (find_iter == attributes.end())
         throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
 
-    const auto & attribute = *find_iter;
+    return *find_iter;
+}
+
+const DictionaryAttribute & DictionaryStructure::getAttribute(const String& attribute_name, const DataTypePtr & type) const
+{
+    const auto & attribute = getAttribute(attribute_name);
 
     if (!areTypesEqual(attribute.type, type))
         throw Exception{"Attribute type does not match, expected " + attribute.type->getName() + ", found " + type->getName(),
             ErrorCodes::TYPE_MISMATCH};
 
-    return *find_iter;
+    return attribute;
 }
 
 std::string DictionaryStructure::getKeyDescription() const
diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h
index 945e1c55494..b996c3a199a 100644
--- a/src/Dictionaries/DictionaryStructure.h
+++ b/src/Dictionaries/DictionaryStructure.h
@@ -154,7 +154,8 @@ struct DictionaryStructure final
     DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
 
     void validateKeyTypes(const DataTypes & key_types) const;
-    const DictionaryAttribute &getAttribute(const String& attribute_name, const DataTypePtr & type) const;
+    const DictionaryAttribute & getAttribute(const String & attribute_name) const;
+    const DictionaryAttribute & getAttribute(const String & attribute_name, const DataTypePtr & type) const;
     std::string getKeyDescription() const;
     bool isKeySizeFixed() const;
     size_t getKeySize() const;
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 3c955ffbcf0..ac59775a755 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -121,25 +121,26 @@ public:
         return getDictionary(dict_name_col->getValue<String>())->isInjective(attr_name_col->getValue<String>());
     }
 
-    DictionaryAttribute getDictionaryAttribute(std::shared_ptr<const IDictionaryBase> dictionary, const String& attribute_name) const
+    DictionaryStructure getDictionaryStructure(const String & dictionary_name) const
     {
-        const DictionaryStructure & structure = dictionary->getStructure();
-
-        auto find_iter = std::find_if(structure.attributes.begin(), structure.attributes.end(), [&](const auto &attribute)
-        {
-            return attribute.name == attribute_name;
-        });
-
-        if (find_iter == structure.attributes.end())
-            throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
-
-        return *find_iter;
+        String resolved_name = DatabaseCatalog::instance().resolveDictionaryName(dictionary_name);
+        auto load_result = external_loader.getLoadResult(resolved_name);
+        if (!load_result.config)
+            throw Exception("Dictionary " + backQuote(dictionary_name) + " not found", ErrorCodes::BAD_ARGUMENTS);
+        return ExternalDictionariesLoader::getDictionaryStructure(*load_result.config);
     }
+
 private:
     const Context & context;
     const ExternalDictionariesLoader & external_loader;
     /// Access cannot be not granted, since in this case checkAccess() will throw and access_checked will not be updated.
     std::atomic<bool> access_checked = false;
+
+    /// We must not cache dictionary or dictionary's structure here, because there are places
+    /// where ExpressionActionsPtr is cached (StorageDistributed caching it for sharding_key_expr and
+    /// optimize_skip_unused_shards), and if the dictionary will be cached within "query" then
+    /// cached ExpressionActionsPtr will always have first version of the query and the dictionary
+    /// will not be updated after reload (see https://github.com/ClickHouse/ClickHouse/pull/16205)
 };
 
 
@@ -267,10 +268,7 @@ public:
         if (arguments.size() < 3)
             throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
 
-        /// TODO: We can load only dictionary structure
-
         String dictionary_name;
-
         if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
             dictionary_name = name_col->getValue<String>();
         else
@@ -278,16 +276,14 @@ public:
                 + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
 
         String attribute_name;
-
         if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
             attribute_name = name_col->getValue<String>();
         else
             throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName()
                 + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
 
-        auto dictionary = helper.getDictionary(dictionary_name);
-
-        return helper.getDictionaryAttribute(dictionary, attribute_name).type;
+        /// We're extracting the return type from the dictionary's config, without loading the dictionary.
+        return helper.getDictionaryStructure(dictionary_name).getAttribute(attribute_name).type;
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
diff --git a/tests/queries/0_stateless/01676_dictget_in_default_expression.reference b/tests/queries/0_stateless/01676_dictget_in_default_expression.reference
new file mode 100644
index 00000000000..608f9904d26
--- /dev/null
+++ b/tests/queries/0_stateless/01676_dictget_in_default_expression.reference
@@ -0,0 +1,11 @@
+2	20
+3	15
+status:
+LOADED
+status_after_detach_and_attach:
+NOT_LOADED
+2	20
+3	15
+4	40
+status:
+LOADED
diff --git a/tests/queries/0_stateless/01676_dictget_in_default_expression.sql b/tests/queries/0_stateless/01676_dictget_in_default_expression.sql
new file mode 100644
index 00000000000..cd4feaf5709
--- /dev/null
+++ b/tests/queries/0_stateless/01676_dictget_in_default_expression.sql
@@ -0,0 +1,31 @@
+DROP DATABASE IF EXISTS test_01676 SYNC;
+
+CREATE DATABASE test_01676;
+
+CREATE TABLE test_01676.dict_data (key UInt64, value UInt64) ENGINE=MergeTree ORDER BY tuple();
+INSERT INTO test_01676.dict_data VALUES (2,20), (3,30), (4,40), (5,50);
+
+CREATE DICTIONARY test_01676.dict (key UInt64, value UInt64) PRIMARY KEY key SOURCE(CLICKHOUSE(DB 'test_01676' TABLE 'dict_data' HOST '127.0.0.1' PORT tcpPort())) LIFETIME(0) LAYOUT(HASHED());
+
+CREATE TABLE test_01676.table (x UInt64, y UInt64 DEFAULT dictGet('test_01676.dict', 'value', x)) ENGINE=MergeTree ORDER BY tuple();
+INSERT INTO test_01676.table (x) VALUES (2);
+INSERT INTO test_01676.table VALUES (toUInt64(3), toUInt64(15));
+
+SELECT * FROM test_01676.table ORDER BY x;
+
+SELECT 'status:';
+SELECT status FROM system.dictionaries WHERE database='test_01676' AND name='dict';
+
+DETACH DATABASE test_01676;
+ATTACH DATABASE test_01676;
+
+SELECT 'status_after_detach_and_attach:';
+SELECT status FROM system.dictionaries WHERE database='test_01676' AND name='dict';
+
+INSERT INTO test_01676.table (x) VALUES (toInt64(4));
+SELECT * FROM test_01676.table ORDER BY x;
+
+SELECT 'status:';
+SELECT status FROM system.dictionaries WHERE database='test_01676' AND name='dict';
+
+DROP DATABASE test_01676;

From 95177dc0dec4abbee4ee2e2fee6b5badc9c0d9a4 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Sat, 30 Jan 2021 02:25:28 +0300
Subject: [PATCH 0414/1238] Fix access to the key as an attribute in
 IPAddressDictionary.

---
 src/Dictionaries/DictionaryFactory.cpp        |  2 +-
 src/Dictionaries/DictionaryStructure.cpp      | 37 ++++++++-----
 src/Dictionaries/DictionaryStructure.h        |  1 +
 src/Dictionaries/IPAddressDictionary.cpp      | 52 ++++++++-----------
 src/Dictionaries/IPAddressDictionary.h        |  6 +--
 .../ExternalDictionariesLoader.cpp            |  2 +-
 6 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/src/Dictionaries/DictionaryFactory.cpp b/src/Dictionaries/DictionaryFactory.cpp
index a889b63107f..0ab7d199186 100644
--- a/src/Dictionaries/DictionaryFactory.cpp
+++ b/src/Dictionaries/DictionaryFactory.cpp
@@ -41,7 +41,7 @@ DictionaryPtr DictionaryFactory::create(
         throw Exception{name + ": element dictionary.layout should have exactly one child element",
                         ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG};
 
-    const DictionaryStructure dict_struct{config, config_prefix + ".structure"};
+    const DictionaryStructure dict_struct{config, config_prefix};
 
     DictionarySourcePtr source_ptr = DictionarySourceFactory::instance().create(
         name, config, config_prefix + ".source", dict_struct, context, config.getString(config_prefix + ".database", ""), check_source_config);
diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index 4a446b6d1af..3b2afdb1bea 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -135,17 +135,19 @@ DictionarySpecialAttribute::DictionarySpecialAttribute(const Poco::Util::Abstrac
 
 DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
 {
-    const auto has_id = config.has(config_prefix + ".id");
-    const auto has_key = config.has(config_prefix + ".key");
+    std::string structure_prefix = config_prefix + ".structure";
+
+    const auto has_id = config.has(structure_prefix + ".id");
+    const auto has_key = config.has(structure_prefix + ".key");
 
     if (has_key && has_id)
         throw Exception{"Only one of 'id' and 'key' should be specified", ErrorCodes::BAD_ARGUMENTS};
 
     if (has_id)
-        id.emplace(config, config_prefix + ".id");
+        id.emplace(config, structure_prefix + ".id");
     else if (has_key)
     {
-        key.emplace(getAttributes(config, config_prefix + ".key", false, false));
+        key.emplace(getAttributes(config, structure_prefix + ".key", false, false));
         if (key->empty())
             throw Exception{"Empty 'key' supplied", ErrorCodes::BAD_ARGUMENTS};
     }
@@ -158,11 +160,11 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
             throw Exception{"'id' cannot be empty", ErrorCodes::BAD_ARGUMENTS};
 
         const char * range_default_type = "Date";
-        if (config.has(config_prefix + ".range_min"))
-            range_min.emplace(makeDictionaryTypedSpecialAttribute(config, config_prefix + ".range_min", range_default_type));
+        if (config.has(structure_prefix + ".range_min"))
+            range_min.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_min", range_default_type));
 
-        if (config.has(config_prefix + ".range_max"))
-            range_max.emplace(makeDictionaryTypedSpecialAttribute(config, config_prefix + ".range_max", range_default_type));
+        if (config.has(structure_prefix + ".range_max"))
+            range_max.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_max", range_default_type));
 
         if (range_min.has_value() != range_max.has_value())
         {
@@ -194,10 +196,13 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
             has_expressions = true;
     }
 
-    attributes = getAttributes(config, config_prefix);
+    attributes = getAttributes(config, structure_prefix);
 
     if (attributes.empty())
         throw Exception{"Dictionary has no attributes defined", ErrorCodes::BAD_ARGUMENTS};
+
+    if (config.getBool(config_prefix + ".layout.ip_trie.access_to_key_from_attributes", false))
+        access_to_key_from_attributes = true;
 }
 
 
@@ -222,14 +227,20 @@ const DictionaryAttribute & DictionaryStructure::getAttribute(const String & att
 {
     auto find_iter
         = std::find_if(attributes.begin(), attributes.end(), [&](const auto & attribute) { return attribute.name == attribute_name; });
+    if (find_iter != attributes.end())
+        return *find_iter;
 
-    if (find_iter == attributes.end())
-        throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
+    if (key && access_to_key_from_attributes)
+    {
+        find_iter = std::find_if(key->begin(), key->end(), [&](const auto & attribute) { return attribute.name == attribute_name; });
+        if (find_iter != key->end())
+            return *find_iter;
+    }
 
-    return *find_iter;
+    throw Exception{"No such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
 }
 
-const DictionaryAttribute & DictionaryStructure::getAttribute(const String& attribute_name, const DataTypePtr & type) const
+const DictionaryAttribute & DictionaryStructure::getAttribute(const String & attribute_name, const DataTypePtr & type) const
 {
     const auto & attribute = getAttribute(attribute_name);
 
diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h
index b996c3a199a..ad3333792aa 100644
--- a/src/Dictionaries/DictionaryStructure.h
+++ b/src/Dictionaries/DictionaryStructure.h
@@ -150,6 +150,7 @@ struct DictionaryStructure final
     std::optional<DictionaryTypedSpecialAttribute> range_min;
     std::optional<DictionaryTypedSpecialAttribute> range_max;
     bool has_expressions = false;
+    bool access_to_key_from_attributes = false;
 
     DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
 
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index fa9f8899142..6447c76ee73 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -247,21 +247,15 @@ IPAddressDictionary::IPAddressDictionary(
     const DictionaryStructure & dict_struct_,
     DictionarySourcePtr source_ptr_,
     const DictionaryLifetime dict_lifetime_,
-    bool require_nonempty_,
-    bool access_to_key_from_attributes_)
+    bool require_nonempty_)
     : IDictionaryBase(dict_id_)
     , dict_struct(dict_struct_)
     , source_ptr{std::move(source_ptr_)}
     , dict_lifetime(dict_lifetime_)
     , require_nonempty(require_nonempty_)
-    , access_to_key_from_attributes(access_to_key_from_attributes_)
+    , access_to_key_from_attributes(dict_struct_.access_to_key_from_attributes)
     , logger(&Poco::Logger::get("IPAddressDictionary"))
 {
-    if (access_to_key_from_attributes)
-    {
-        dict_struct.attributes.emplace_back(dict_struct.key->front());
-    }
-
     createAttributes();
 
     loadData();
@@ -367,18 +361,23 @@ ColumnUInt8::Ptr IPAddressDictionary::hasKeys(const Columns & key_columns, const
 
 void IPAddressDictionary::createAttributes()
 {
-    const auto size = dict_struct.attributes.size();
-    attributes.reserve(size);
-
-    for (const auto & attribute : dict_struct.attributes)
+    auto create_attributes_from_dictionary_attributes = [this](const std::vector<DictionaryAttribute> & dict_attrs)
     {
-        attribute_index_by_name.emplace(attribute.name, attributes.size());
-        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
+        attributes.reserve(attributes.size() + dict_attrs.size());
+        for (const auto & attribute : dict_attrs)
+        {
+            attribute_index_by_name.emplace(attribute.name, attributes.size());
+            attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
 
-        if (attribute.hierarchical)
-            throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
-                            ErrorCodes::TYPE_MISMATCH};
-    }
+            if (attribute.hierarchical)
+                throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
+                                ErrorCodes::TYPE_MISMATCH};
+        }
+    };
+
+    create_attributes_from_dictionary_attributes(dict_struct.attributes);
+    if (access_to_key_from_attributes)
+        create_attributes_from_dictionary_attributes(*dict_struct.key);
 }
 
 void IPAddressDictionary::loadData()
@@ -396,19 +395,13 @@ void IPAddressDictionary::loadData()
         element_count += rows;
 
         const ColumnPtr key_column_ptr = block.safeGetByPosition(0).column;
-
-        size_t attributes_size = dict_struct.attributes.size();
-        if (access_to_key_from_attributes)
-        {
-            /// last attribute contains key and will be filled in code below
-            attributes_size--;
-        }
-        const auto attribute_column_ptrs = ext::map<Columns>(ext::range(0, attributes_size),
+        const auto attribute_column_ptrs = ext::map<Columns>(
+            ext::range(0, dict_struct.attributes.size()),
             [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx + 1).column; });
 
         for (const auto row : ext::range(0, rows))
         {
-            for (const auto attribute_idx : ext::range(0, attribute_column_ptrs.size()))
+            for (const auto attribute_idx : ext::range(0, dict_struct.attributes.size()))
             {
                 const auto & attribute_column = *attribute_column_ptrs[attribute_idx];
                 auto & attribute = attributes[attribute_idx];
@@ -991,11 +984,8 @@ void registerDictionaryTrie(DictionaryFactory & factory)
         const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
         const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
 
-        const auto & layout_prefix = config_prefix + ".layout.ip_trie";
-        const bool access_to_key_from_attributes = config.getBool(layout_prefix + ".access_to_key_from_attributes", false);
         // This is specialised dictionary for storing IPv4 and IPv6 prefixes.
-        return std::make_unique<IPAddressDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime,
-                                                     require_nonempty, access_to_key_from_attributes);
+        return std::make_unique<IPAddressDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
     };
     factory.registerLayout("ip_trie", create_layout, true);
 }
diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h
index 629a850267c..6c5cfa765e8 100644
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@@ -28,8 +28,7 @@ public:
         const DictionaryStructure & dict_struct_,
         DictionarySourcePtr source_ptr_,
         const DictionaryLifetime dict_lifetime_,
-        bool require_nonempty_,
-        bool access_to_key_from_attributes_);
+        bool require_nonempty_);
 
     std::string getKeyDescription() const { return key_description; }
 
@@ -47,8 +46,7 @@ public:
 
     std::shared_ptr<const IExternalLoadable> clone() const override
     {
-        return std::make_shared<IPAddressDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime,
-                                                     require_nonempty, access_to_key_from_attributes);
+        return std::make_shared<IPAddressDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty);
     }
 
     const IDictionarySource * getSource() const override { return source_ptr.get(); }
diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp
index c735dd76911..4df4e5f8c1b 100644
--- a/src/Interpreters/ExternalDictionariesLoader.cpp
+++ b/src/Interpreters/ExternalDictionariesLoader.cpp
@@ -38,7 +38,7 @@ ExternalLoader::LoadablePtr ExternalDictionariesLoader::create(
 DictionaryStructure
 ExternalDictionariesLoader::getDictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config)
 {
-    return {config, key_in_config + ".structure"};
+    return {config, key_in_config};
 }
 
 DictionaryStructure ExternalDictionariesLoader::getDictionaryStructure(const ObjectConfig & config)

From 5cb99ae699863ea6f896ea16770e7cdeb33921d4 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Sat, 30 Jan 2021 20:20:11 +0300
Subject: [PATCH 0415/1238] Use offsets in FunctionIPv6StringToNum for copying
 ipv4 to buffer

---
 src/Functions/FunctionsCoding.h                             | 2 +-
 tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference | 3 +++
 tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql       | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h
index 428f4891990..a311c6938d6 100644
--- a/src/Functions/FunctionsCoding.h
+++ b/src/Functions/FunctionsCoding.h
@@ -311,7 +311,7 @@ public:
                     std::memcpy(
                         src_ipv4_buf + std::strlen("::ffff:"),
                         reinterpret_cast<const char *>(&vec_src[src_offset]),
-                        std::strlen(reinterpret_cast<const char *>(&vec_src[src_offset])));
+                        std::min(offsets_src[i] - src_offset, IPV4_MAX_TEXT_LENGTH + 1));
                     parseIPv6(src_ipv4_buf, reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
                 }
                 else
diff --git a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference
index bddd10ba865..e0922ad435d 100644
--- a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference
+++ b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference
@@ -62,3 +62,6 @@ FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF is ipv6 string: 	1
 ::ffff:127.0.0.7
 ::ffff:127.0.0.8
 ::ffff:127.0.0.9
+::ffff:127.0.0.10
+::ffff:127.0.0.11
+::ffff:127.0.0.12
diff --git a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql
index 39f50781a7e..5815afb1605 100644
--- a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql
+++ b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql
@@ -89,4 +89,4 @@ SELECT '2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D is ipv6 string: ', isIPv6String(
 SELECT toIPv6('0.0.0.0');
 SELECT toIPv6('127.0.0.1');
 SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0);
-SELECT toIPv6('127.0.0.' || toString(number)) FROM numbers(10);
+SELECT toIPv6('127.0.0.' || toString(number)) FROM numbers(13);

From 155f6c0eb5194d09b4d9817b2f0ce46c1cb986d0 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sat, 30 Jan 2021 22:56:23 +0300
Subject: [PATCH 0416/1238] Function reinterpretAs updated to support big
 integers

---
 .../functions/type-conversion-functions.md    |  86 +---
 src/Core/callOnTypeIndex.h                    |  18 +
 .../registerFunctionsReinterpret.cpp          |   4 -
 src/Functions/reinterpretAs.cpp               | 466 +++++++++++++-----
 src/Functions/reinterpretAsFixedString.cpp    |  96 ----
 src/Functions/reinterpretAsString.cpp         |  92 ----
 src/Functions/ya.make                         |   2 -
 7 files changed, 386 insertions(+), 378 deletions(-)
 delete mode 100644 src/Functions/reinterpretAsFixedString.cpp
 delete mode 100644 src/Functions/reinterpretAsString.cpp

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 6237cd6a976..f650df79271 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -303,9 +303,31 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
 └────────────┴───────┘
 ```
 
-## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}
+## reinterpretAs(x, T) {#type_conversion_function-cast}
 
-## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}
+Performs byte reinterpretation of ‘x’ as ‘t’ data type.
+
+Following reinterpretations are allowed:
+1. Any type that has fixed size and value of that type can be represented continuously into FixedString. Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
+2. Any type that if value of that type can be represented continuously into String. Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
+3. Types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString,
+String, and types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID).
+
+``` sql
+SELECT reinterpretAs(toInt8(-1), 'UInt8') as int_to_uint,
+    reinterpretAs(toInt8(1), 'Float32') as int_to_float,
+    reinterpretAs('1', 'UInt32') as string_to_int;
+```
+
+``` text
+┌─int_to_uint─┬─int_to_float─┬─string_to_int─┐
+│         255 │        1e-45 │            49 │
+└─────────────┴──────────────┴───────────────┘
+```
+
+## reinterpretAsUInt(8\|16\|32\|64\|256) {#reinterpretasuint8163264256}
+
+## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#reinterpretasint8163264128256}
 
 ## reinterpretAsFloat(32\|64) {#reinterpretasfloat3264}
 
@@ -313,71 +335,13 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
 
 ## reinterpretAsDateTime {#reinterpretasdatetime}
 
-These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isn’t long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch.
-
 ## reinterpretAsString {#type_conversion_functions-reinterpretAsString}
 
-This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
-
 ## reinterpretAsFixedString {#reinterpretasfixedstring}
 
-This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
-
 ## reinterpretAsUUID {#reinterpretasuuid}
 
-This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. 
-
-**Syntax**
-
-``` sql
-reinterpretAsUUID(fixed_string)
-```
-
-**Parameters**
-
--   `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring).
-
-**Returned value**
-
--   The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type).
-
-**Examples**
-
-String to UUID.
-
-Query:
-
-``` sql
-SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))
-```
-
-Result:
-
-``` text
-┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐
-│                                  08090a0b-0c0d-0e0f-0001-020304050607 │
-└───────────────────────────────────────────────────────────────────────┘
-```
-
-Going back and forth from String to UUID.
-
-Query:
-
-``` sql
-WITH
-    generateUUIDv4() AS uuid,
-    identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str,
-    reinterpretAsUUID(reverse(unhex(str))) AS uuid2
-SELECT uuid = uuid2;
-```
-
-Result:
-
-``` text
-┌─equals(uuid, uuid2)─┐
-│                   1 │
-└─────────────────────┘
-```
+These functions are aliases for `reinterpretAs`function.
 
 ## CAST(x, T) {#type_conversion_function-cast}
 
diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h
index ccbccd7105a..2d6134ab9fc 100644
--- a/src/Core/callOnTypeIndex.h
+++ b/src/Core/callOnTypeIndex.h
@@ -207,4 +207,22 @@ bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args)
     return false;
 }
 
+template <typename F>
+static bool callOnTwoTypeIndexes(TypeIndex left_type, TypeIndex right_type, F && func)
+{
+    return callOnIndexAndDataType<void>(left_type, [&](const auto & left_types) -> bool
+    {
+        using LeftTypes = std::decay_t<decltype(left_types)>;
+        using LeftType = typename LeftTypes::LeftType;
+
+        return callOnIndexAndDataType<void>(right_type, [&](const auto & right_types) -> bool
+        {
+            using RightTypes = std::decay_t<decltype(right_types)>;
+            using RightType = typename RightTypes::LeftType;
+
+            return std::forward<F>(func)(TypePair<LeftType, RightType>());
+        });
+    });
+}
+
 }
diff --git a/src/Functions/registerFunctionsReinterpret.cpp b/src/Functions/registerFunctionsReinterpret.cpp
index d82274ce9ed..e6fa0402071 100644
--- a/src/Functions/registerFunctionsReinterpret.cpp
+++ b/src/Functions/registerFunctionsReinterpret.cpp
@@ -4,14 +4,10 @@ namespace DB
 class FunctionFactory;
 
 void registerFunctionsReinterpretAs(FunctionFactory & factory);
-void registerFunctionReinterpretAsString(FunctionFactory & factory);
-void registerFunctionReinterpretAsFixedString(FunctionFactory & factory);
 
 void registerFunctionsReinterpret(FunctionFactory & factory)
 {
     registerFunctionsReinterpretAs(factory);
-    registerFunctionReinterpretAsString(factory);
-    registerFunctionReinterpretAsFixedString(factory);
 }
 
 }
diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp
index 64facaf0840..356ce9d60ad 100644
--- a/src/Functions/reinterpretAs.cpp
+++ b/src/Functions/reinterpretAs.cpp
@@ -1,5 +1,8 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/castTypeToEither.h>
+#include <Functions/FunctionHelpers.h>
+
+#include <Core/callOnTypeIndex.h>
 
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeString.h>
@@ -7,6 +10,7 @@
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypeFactory.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnConst.h>
@@ -21,178 +25,385 @@ namespace DB
 {
 namespace ErrorCodes
 {
-    extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
 namespace
 {
-template <typename ToDataType, typename Name, bool support_between_float_integer>
+
+/** Performs byte reinterpretation similar to reinterpret_cast.
+ *
+ * Following reinterpretations are allowed:
+ * 1. Any type that isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion into FixedString.
+ * 2. Any type that isValueUnambiguouslyRepresentedInContiguousMemoryRegion into String.
+ * 3. Types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString,
+ * String, and types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID).
+ */
 class FunctionReinterpretAs : public IFunction
 {
-    template <typename F>
-    static bool castType(const IDataType * type, F && f)
+public:
+    static constexpr auto name = "reinterpretAs";
+
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAs>(); }
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 2; }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        return castTypeToEither<DataTypeUInt32, DataTypeInt32, DataTypeUInt64, DataTypeInt64, DataTypeFloat32, DataTypeFloat64>(
-            type, std::forward<F>(f));
+        const auto & column = arguments.back().column;
+
+        DataTypePtr from_type = arguments[0].type;
+
+        const auto * type_col = checkAndGetColumnConst<ColumnString>(column.get());
+        if (!type_col)
+            throw Exception("Second argument to " + getName() + " must be a constant string describing type."
+                " Instead there is non-constant column of type " + arguments.back().type->getName(),
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        DataTypePtr to_type = DataTypeFactory::instance().get(type_col->getValue<String>());
+
+        WhichDataType result_reinterpret_type(to_type);
+
+        if (result_reinterpret_type.isFixedString())
+        {
+            if (!from_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
+                throw Exception("Cannot reinterpret " + from_type->getName() +
+                    " as FixedString because it is not fixed size and contiguous in memory",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+        else if (result_reinterpret_type.isString())
+        {
+            if (!from_type->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
+                throw Exception("Cannot reinterpret " + from_type->getName() +
+                    " as String because it is not contiguous in memory",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+        else if (canBeReinterpretedAsNumeric(result_reinterpret_type))
+        {
+            WhichDataType from_data_type(from_type);
+
+            if (!canBeReinterpretedAsNumeric(from_data_type) && !from_data_type.isStringOrFixedString())
+                throw Exception("Cannot reinterpret " + from_type->getName() + " as " + to_type->getName()
+                    + " because only Numeric, String or FixedString can be reinterpreted in Numeric",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+
+        return to_type;
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
+    {
+        auto from_type = arguments[0].type;
+
+        ColumnPtr result;
+
+        if (!callOnTwoTypeIndexes(from_type->getTypeId(), result_type->getTypeId(), [&](const auto & types)
+        {
+            using Types = std::decay_t<decltype(types)>;
+            using FromType = typename Types::LeftType;
+            using ToType = typename Types::RightType;
+
+            /// Place this check before std::is_same_v<FromType, ToType> because same FixedString
+            /// types does not necessary have the same byte size fixed value.
+            if constexpr (std::is_same_v<ToType, DataTypeFixedString>)
+            {
+                const IColumn & src = *arguments[0].column;
+                MutableColumnPtr dst = result_type->createColumn();
+
+                ColumnFixedString * dst_concrete = assert_cast<ColumnFixedString *>(dst.get());
+
+                if (src.isFixedAndContiguous() && src.sizeOfValueIfFixed() == dst_concrete->getN())
+                    executeContiguousToFixedString(src, *dst_concrete, dst_concrete->getN());
+                else
+                    executeToFixedString(src, *dst_concrete, dst_concrete->getN());
+
+                result = std::move(dst);
+
+                return true;
+            }
+            else if constexpr (std::is_same_v<FromType, ToType>)
+            {
+                result = arguments[0].column;
+
+                return true;
+            }
+            else if constexpr (std::is_same_v<ToType, DataTypeString>)
+            {
+                const IColumn & src = *arguments[0].column;
+                MutableColumnPtr dst = result_type->createColumn();
+
+                ColumnString * dst_concrete = assert_cast<ColumnString *>(dst.get());
+                executeToString(src, *dst_concrete);
+
+                result = std::move(dst);
+
+                return true;
+            }
+            else if constexpr (CanBeReinterpretedAsNumeric<ToType>)
+            {
+                using ToColumnType = typename ToType::ColumnType;
+                using ToFieldType = typename ToType::FieldType;
+
+                if constexpr (std::is_same_v<FromType, DataTypeString>)
+                {
+                    const auto * col_from = assert_cast<const ColumnString *>(arguments[0].column.get());
+
+                    auto col_res = ToColumnType::create();
+
+                    const ColumnString::Chars & data_from = col_from->getChars();
+                    const ColumnString::Offsets & offsets_from = col_from->getOffsets();
+                    size_t size = offsets_from.size();
+                    typename ToColumnType::Container & vec_res = col_res->getData();
+                    vec_res.resize(size);
+
+                    size_t offset = 0;
+                    for (size_t i = 0; i < size; ++i)
+                    {
+                        ToFieldType value{};
+                        memcpy(&value,
+                            &data_from[offset],
+                            std::min(static_cast<UInt64>(sizeof(ToFieldType)), offsets_from[i] - offset - 1));
+                        vec_res[i] = value;
+                        offset = offsets_from[i];
+                    }
+
+                    result = std::move(col_res);
+
+                    return true;
+                }
+                else if constexpr (std::is_same_v<FromType, DataTypeFixedString>)
+                {
+                    const auto * col_from_fixed = assert_cast<const ColumnFixedString *>(arguments[0].column.get());
+
+                    auto col_res = ToColumnType::create();
+
+                    const ColumnString::Chars & data_from = col_from_fixed->getChars();
+                    size_t step = col_from_fixed->getN();
+                    size_t size = data_from.size() / step;
+                    typename ToColumnType::Container & vec_res = col_res->getData();
+                    vec_res.resize(size);
+
+                    size_t offset = 0;
+                    size_t copy_size = std::min(step, sizeof(ToFieldType));
+                    for (size_t i = 0; i < size; ++i)
+                    {
+                        ToFieldType value{};
+                        memcpy(&value, &data_from[offset], copy_size);
+                        vec_res[i] = value;
+                        offset += step;
+                    }
+
+                    result = std::move(col_res);
+
+                    return true;
+                }
+                else if constexpr (CanBeReinterpretedAsNumeric<FromType>)
+                {
+                    using FromTypeFieldType = typename FromType::FieldType;
+                    const auto * col = assert_cast<const ColumnVector<FromTypeFieldType>*>(arguments[0].column.get());
+
+                    auto col_res = ToColumnType::create();
+                    reinterpretImpl(col->getData(), col_res->getData());
+                    result = std::move(col_res);
+
+                    return true;
+                }
+            }
+
+            return false;
+        }))
+        {
+            throw Exception("Cannot reinterpret " + from_type->getName() + " as " + result_type->getName(),
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+
+        return result;
+    }
+private:
+    template <typename T>
+    static constexpr auto CanBeReinterpretedAsNumeric =
+        IsDataTypeNumber<T> ||
+        std::is_same_v<T, DataTypeDate> ||
+        std::is_same_v<T, DataTypeDateTime> ||
+        std::is_same_v<T, DataTypeUUID>;
+
+    static bool canBeReinterpretedAsNumeric(const WhichDataType & type)
+    {
+        return type.isUInt() ||
+            type.isInt() ||
+            type.isDateOrDateTime() ||
+            type.isFloat() ||
+            type.isUUID();
+    }
+
+    static void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
+    {
+        size_t rows = src.size();
+        ColumnFixedString::Chars & data_to = dst.getChars();
+        data_to.resize_fill(n * rows);
+
+        ColumnFixedString::Offset offset = 0;
+        for (size_t i = 0; i < rows; ++i)
+        {
+            StringRef data = src.getDataAt(i);
+
+            std::memcpy(&data_to[offset], data.data, std::min(n, data.size));
+            offset += n;
+        }
+    }
+
+    static void NO_INLINE executeContiguousToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
+    {
+        size_t rows = src.size();
+        ColumnFixedString::Chars & data_to = dst.getChars();
+        data_to.resize(n * rows);
+
+        memcpy(data_to.data(), src.getRawData().data, data_to.size());
+    }
+
+    static void NO_INLINE executeToString(const IColumn & src, ColumnString & dst)
+    {
+        size_t rows = src.size();
+        ColumnString::Chars & data_to = dst.getChars();
+        ColumnString::Offsets & offsets_to = dst.getOffsets();
+        offsets_to.resize(rows);
+
+        ColumnString::Offset offset = 0;
+        for (size_t i = 0; i < rows; ++i)
+        {
+            StringRef data = src.getDataAt(i);
+
+            /// Cut trailing zero bytes.
+            while (data.size && data.data[data.size - 1] == 0)
+                --data.size;
+
+            data_to.resize(offset + data.size + 1);
+            memcpy(&data_to[offset], data.data, data.size);
+            offset += data.size;
+            data_to[offset] = 0;
+            ++offset;
+            offsets_to[i] = offset;
+        }
     }
 
     template <typename From, typename To>
     static void reinterpretImpl(const PaddedPODArray<From> & from, PaddedPODArray<To> & to)
     {
         size_t size = from.size();
-        to.resize(size);
+        to.resize_fill(size);
+
         for (size_t i = 0; i < size; ++i)
         {
-            to[i] = unalignedLoad<To>(&(from.data()[i]));
+            memcpy(static_cast<void*>(&to[i]),
+                static_cast<const void*>(&from[i]),
+                std::min(sizeof(From), sizeof(To)));
         }
     }
+};
 
+template <typename ToDataType, typename Name>
+class FunctionReinterpretAsTyped : public IFunction
+{
 public:
     static constexpr auto name = Name::name;
-    static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAs>(); }
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAsTyped>(); }
 
-    using ToFieldType = typename ToDataType::FieldType;
-    using ColumnType = typename ToDataType::ColumnType;
+    explicit FunctionReinterpretAsTyped() {}
 
     String getName() const override { return name; }
 
     size_t getNumberOfArguments() const override { return 1; }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        const IDataType & type = *arguments[0];
-        if constexpr (support_between_float_integer)
-        {
-            if (!isStringOrFixedString(type) && !isNumber(type))
-                throw Exception(
-                    "Cannot reinterpret " + type.getName() + " as " + ToDataType().getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-
-            if (isNumber(type))
-            {
-                if (type.getSizeOfValueInMemory() != ToDataType{}.getSizeOfValueInMemory())
-                    throw Exception(
-                        "Cannot reinterpret " + type.getName() + " as " + ToDataType().getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-            }
-        }
-        else
-        {
-            if (!isStringOrFixedString(type))
-                throw Exception(
-                    "Cannot reinterpret " + type.getName() + " as " + ToDataType().getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-        }
-        return std::make_shared<ToDataType>();
-    }
-
     bool useDefaultImplementationForConstants() const override { return true; }
 
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+    static ColumnsWithTypeAndName addTypeColumnToArguments(const ColumnsWithTypeAndName & arguments)
     {
-        if (const ColumnString * col_from = typeid_cast<const ColumnString *>(arguments[0].column.get()))
+        auto & argument = arguments[0];
+
+        DataTypePtr data_type;
+
+        if constexpr (std::is_same_v<ToDataType, DataTypeFixedString>)
         {
-            auto col_res = ColumnType::create();
-
-            const ColumnString::Chars & data_from = col_from->getChars();
-            const ColumnString::Offsets & offsets_from = col_from->getOffsets();
-            size_t size = offsets_from.size();
-            typename ColumnType::Container & vec_res = col_res->getData();
-            vec_res.resize(size);
-
-            size_t offset = 0;
-            for (size_t i = 0; i < size; ++i)
-            {
-                ToFieldType value{};
-                memcpy(&value, &data_from[offset], std::min(static_cast<UInt64>(sizeof(ToFieldType)), offsets_from[i] - offset - 1));
-                vec_res[i] = value;
-                offset = offsets_from[i];
-            }
-
-            return col_res;
-        }
-        else if (const ColumnFixedString * col_from_fixed = typeid_cast<const ColumnFixedString *>(arguments[0].column.get()))
-        {
-            auto col_res = ColumnVector<ToFieldType>::create();
-
-            const ColumnString::Chars & data_from = col_from_fixed->getChars();
-            size_t step = col_from_fixed->getN();
-            size_t size = data_from.size() / step;
-            typename ColumnVector<ToFieldType>::Container & vec_res = col_res->getData();
-            vec_res.resize(size);
-
-            size_t offset = 0;
-            size_t copy_size = std::min(step, sizeof(ToFieldType));
-            for (size_t i = 0; i < size; ++i)
-            {
-                ToFieldType value{};
-                memcpy(&value, &data_from[offset], copy_size);
-                vec_res[i] = value;
-                offset += step;
-            }
-
-            return col_res;
-        }
-        else if constexpr (support_between_float_integer)
-        {
-            ColumnPtr res;
-            if (castType(arguments[0].type.get(), [&](const auto & type)
-                {
-                    using DataType = std::decay_t<decltype(type)>;
-                    using T = typename DataType::FieldType;
-
-                    const ColumnVector<T> * col = checkAndGetColumn<ColumnVector<T>>(arguments[0].column.get());
-                    auto col_res = ColumnType::create();
-                    reinterpretImpl(col->getData(), col_res->getData());
-                    res = std::move(col_res);
-
-                    return true;
-                }))
-            {
-                return res;
-            }
-            else
-            {
-                throw Exception(
-                    "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(),
-                    ErrorCodes::ILLEGAL_COLUMN);
-            }
+            auto & type = argument.type;
+            size_t type_value_size_in_memory = type->getSizeOfValueInMemory();
+            data_type = std::make_shared<DataTypeFixedString>(type_value_size_in_memory);
         }
         else
-        {
-            throw Exception(
-                "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(),
-                ErrorCodes::ILLEGAL_COLUMN);
-        }
-    }
-};
+            data_type = std::make_shared<ToDataType>();
 
+        auto type_name_column = DataTypeString().createColumnConst(1, data_type->getName());
+        ColumnWithTypeAndName type_column(type_name_column, std::make_shared<DataTypeString>(), "");
+
+        ColumnsWithTypeAndName arguments_with_type
+        {
+            argument,
+            type_column
+        };
+
+        return arguments_with_type;
+    }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        auto arguments_with_type = addTypeColumnToArguments(arguments);
+        return impl.getReturnTypeImpl(arguments_with_type);
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type, size_t input_rows_count) const override
+    {
+        auto arguments_with_type = addTypeColumnToArguments(arguments);
+        return impl.executeImpl(arguments_with_type, return_type, input_rows_count);
+    }
+
+    const FunctionReinterpretAs impl;
+};
 
 struct NameReinterpretAsUInt8       { static constexpr auto name = "reinterpretAsUInt8"; };
 struct NameReinterpretAsUInt16      { static constexpr auto name = "reinterpretAsUInt16"; };
 struct NameReinterpretAsUInt32      { static constexpr auto name = "reinterpretAsUInt32"; };
 struct NameReinterpretAsUInt64      { static constexpr auto name = "reinterpretAsUInt64"; };
+struct NameReinterpretAsUInt256     { static constexpr auto name = "reinterpretAsUInt256"; };
 struct NameReinterpretAsInt8        { static constexpr auto name = "reinterpretAsInt8"; };
 struct NameReinterpretAsInt16       { static constexpr auto name = "reinterpretAsInt16"; };
 struct NameReinterpretAsInt32       { static constexpr auto name = "reinterpretAsInt32"; };
 struct NameReinterpretAsInt64       { static constexpr auto name = "reinterpretAsInt64"; };
+struct NameReinterpretAsInt128      { static constexpr auto name = "reinterpretAsInt128"; };
+struct NameReinterpretAsInt256      { static constexpr auto name = "reinterpretAsInt256"; };
 struct NameReinterpretAsFloat32     { static constexpr auto name = "reinterpretAsFloat32"; };
 struct NameReinterpretAsFloat64     { static constexpr auto name = "reinterpretAsFloat64"; };
 struct NameReinterpretAsDate        { static constexpr auto name = "reinterpretAsDate"; };
 struct NameReinterpretAsDateTime    { static constexpr auto name = "reinterpretAsDateTime"; };
 struct NameReinterpretAsUUID        { static constexpr auto name = "reinterpretAsUUID"; };
+struct NameReinterpretAsString      { static constexpr auto name = "reinterpretAsString"; };
+struct NameReinterpretAsFixedString { static constexpr auto name = "reinterpretAsFixedString"; };
+
+using FunctionReinterpretAsUInt8 = FunctionReinterpretAsTyped<DataTypeUInt8, NameReinterpretAsUInt8>;
+using FunctionReinterpretAsUInt16 = FunctionReinterpretAsTyped<DataTypeUInt16, NameReinterpretAsUInt16>;
+using FunctionReinterpretAsUInt32 = FunctionReinterpretAsTyped<DataTypeUInt32, NameReinterpretAsUInt32>;
+using FunctionReinterpretAsUInt64 = FunctionReinterpretAsTyped<DataTypeUInt64, NameReinterpretAsUInt64>;
+using FunctionReinterpretAsUInt256 = FunctionReinterpretAsTyped<DataTypeUInt256, NameReinterpretAsUInt256>;
+using FunctionReinterpretAsInt8 = FunctionReinterpretAsTyped<DataTypeInt8, NameReinterpretAsInt8>;
+using FunctionReinterpretAsInt16 = FunctionReinterpretAsTyped<DataTypeInt16, NameReinterpretAsInt16>;
+using FunctionReinterpretAsInt32 = FunctionReinterpretAsTyped<DataTypeInt32, NameReinterpretAsInt32>;
+using FunctionReinterpretAsInt64 = FunctionReinterpretAsTyped<DataTypeInt64, NameReinterpretAsInt64>;
+using FunctionReinterpretAsInt128 = FunctionReinterpretAsTyped<DataTypeInt128, NameReinterpretAsInt128>;
+using FunctionReinterpretAsInt256 = FunctionReinterpretAsTyped<DataTypeInt256, NameReinterpretAsInt256>;
+using FunctionReinterpretAsFloat32 = FunctionReinterpretAsTyped<DataTypeFloat32, NameReinterpretAsFloat32>;
+using FunctionReinterpretAsFloat64 = FunctionReinterpretAsTyped<DataTypeFloat64, NameReinterpretAsFloat64>;
+using FunctionReinterpretAsDate = FunctionReinterpretAsTyped<DataTypeDate, NameReinterpretAsDate>;
+using FunctionReinterpretAsDateTime = FunctionReinterpretAsTyped<DataTypeDateTime, NameReinterpretAsDateTime>;
+using FunctionReinterpretAsUUID = FunctionReinterpretAsTyped<DataTypeUUID, NameReinterpretAsUUID>;
+
+using FunctionReinterpretAsString = FunctionReinterpretAsTyped<DataTypeString, NameReinterpretAsString>;
+
+using FunctionReinterpretAsFixedString = FunctionReinterpretAsTyped<DataTypeFixedString, NameReinterpretAsFixedString>;
 
-using FunctionReinterpretAsUInt8 = FunctionReinterpretAs<DataTypeUInt8, NameReinterpretAsUInt8, false>;
-using FunctionReinterpretAsUInt16 = FunctionReinterpretAs<DataTypeUInt16, NameReinterpretAsUInt16, false>;
-using FunctionReinterpretAsUInt32 = FunctionReinterpretAs<DataTypeUInt32, NameReinterpretAsUInt32, true>;
-using FunctionReinterpretAsUInt64 = FunctionReinterpretAs<DataTypeUInt64, NameReinterpretAsUInt64, true>;
-using FunctionReinterpretAsInt8 = FunctionReinterpretAs<DataTypeInt8, NameReinterpretAsInt8, false>;
-using FunctionReinterpretAsInt16 = FunctionReinterpretAs<DataTypeInt16, NameReinterpretAsInt16, false>;
-using FunctionReinterpretAsInt32 = FunctionReinterpretAs<DataTypeInt32, NameReinterpretAsInt32, true>;
-using FunctionReinterpretAsInt64 = FunctionReinterpretAs<DataTypeInt64, NameReinterpretAsInt64, true>;
-using FunctionReinterpretAsFloat32 = FunctionReinterpretAs<DataTypeFloat32, NameReinterpretAsFloat32, true>;
-using FunctionReinterpretAsFloat64 = FunctionReinterpretAs<DataTypeFloat64, NameReinterpretAsFloat64, true>;
-using FunctionReinterpretAsDate = FunctionReinterpretAs<DataTypeDate, NameReinterpretAsDate, false>;
-using FunctionReinterpretAsDateTime = FunctionReinterpretAs<DataTypeDateTime, NameReinterpretAsDateTime, false>;
-using FunctionReinterpretAsUUID = FunctionReinterpretAs<DataTypeUUID, NameReinterpretAsUUID, false>;
 }
 
 void registerFunctionsReinterpretAs(FunctionFactory & factory)
@@ -201,15 +412,24 @@ void registerFunctionsReinterpretAs(FunctionFactory & factory)
     factory.registerFunction<FunctionReinterpretAsUInt16>();
     factory.registerFunction<FunctionReinterpretAsUInt32>();
     factory.registerFunction<FunctionReinterpretAsUInt64>();
+    factory.registerFunction<FunctionReinterpretAsUInt256>();
     factory.registerFunction<FunctionReinterpretAsInt8>();
     factory.registerFunction<FunctionReinterpretAsInt16>();
     factory.registerFunction<FunctionReinterpretAsInt32>();
     factory.registerFunction<FunctionReinterpretAsInt64>();
+    factory.registerFunction<FunctionReinterpretAsInt128>();
+    factory.registerFunction<FunctionReinterpretAsInt256>();
     factory.registerFunction<FunctionReinterpretAsFloat32>();
     factory.registerFunction<FunctionReinterpretAsFloat64>();
     factory.registerFunction<FunctionReinterpretAsDate>();
     factory.registerFunction<FunctionReinterpretAsDateTime>();
     factory.registerFunction<FunctionReinterpretAsUUID>();
+
+    factory.registerFunction<FunctionReinterpretAsString>();
+
+    factory.registerFunction<FunctionReinterpretAsFixedString>();
+
+    factory.registerFunction<FunctionReinterpretAs>();
 }
 
 }
diff --git a/src/Functions/reinterpretAsFixedString.cpp b/src/Functions/reinterpretAsFixedString.cpp
deleted file mode 100644
index 465e7dffc6a..00000000000
--- a/src/Functions/reinterpretAsFixedString.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-#include <Functions/FunctionFactory.h>
-
-#include <DataTypes/DataTypeFixedString.h>
-#include <Columns/ColumnFixedString.h>
-#include <Common/typeid_cast.h>
-#include <Common/memcpySmall.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int ILLEGAL_COLUMN;
-}
-
-namespace
-{
-
-class FunctionReinterpretAsFixedString : public IFunction
-{
-public:
-    static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAsFixedString>(); }
-
-    static constexpr auto name = "reinterpretAsFixedString";
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    size_t getNumberOfArguments() const override { return 1; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        const IDataType & type = *arguments[0];
-
-        if (type.isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
-            return std::make_shared<DataTypeFixedString>(type.getSizeOfValueInMemory());
-        throw Exception("Cannot reinterpret " + type.getName() + " as FixedString because it is not fixed size and contiguous in memory", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-    }
-
-    static void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
-    {
-        size_t rows = src.size();
-        ColumnFixedString::Chars & data_to = dst.getChars();
-        data_to.resize(n * rows);
-
-        ColumnFixedString::Offset offset = 0;
-        for (size_t i = 0; i < rows; ++i)
-        {
-            StringRef data = src.getDataAt(i);
-            memcpySmallAllowReadWriteOverflow15(&data_to[offset], data.data, n);
-            offset += n;
-        }
-    }
-
-    static void NO_INLINE executeContiguousToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
-    {
-        size_t rows = src.size();
-        ColumnFixedString::Chars & data_to = dst.getChars();
-        data_to.resize(n * rows);
-
-        memcpy(data_to.data(), src.getRawData().data, data_to.size());
-    }
-
-    bool useDefaultImplementationForConstants() const override { return true; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
-    {
-        const IColumn & src = *arguments[0].column;
-        MutableColumnPtr dst = result_type->createColumn();
-
-        if (ColumnFixedString * dst_concrete = typeid_cast<ColumnFixedString *>(dst.get()))
-        {
-            if (src.isFixedAndContiguous() && src.sizeOfValueIfFixed() == dst_concrete->getN())
-                executeContiguousToFixedString(src, *dst_concrete, dst_concrete->getN());
-            else
-                executeToFixedString(src, *dst_concrete, dst_concrete->getN());
-        }
-        else
-            throw Exception("Illegal column " + src.getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
-
-        return dst;
-    }
-};
-
-}
-
-void registerFunctionReinterpretAsFixedString(FunctionFactory & factory)
-{
-    factory.registerFunction<FunctionReinterpretAsFixedString>();
-}
-
-}
-
diff --git a/src/Functions/reinterpretAsString.cpp b/src/Functions/reinterpretAsString.cpp
deleted file mode 100644
index 70db8f315bd..00000000000
--- a/src/Functions/reinterpretAsString.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-#include <Functions/FunctionFactory.h>
-
-#include <DataTypes/DataTypeString.h>
-#include <Columns/ColumnString.h>
-#include <Common/typeid_cast.h>
-#include <Common/memcpySmall.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_COLUMN;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-}
-
-namespace
-{
-
-/** Function for transforming numbers and dates to strings that contain the same set of bytes in the machine representation. */
-class FunctionReinterpretAsString : public IFunction
-{
-public:
-    static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAsString>(); }
-
-    static constexpr auto name = "reinterpretAsString";
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    size_t getNumberOfArguments() const override { return 1; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        const IDataType & type = *arguments[0];
-
-        if (type.isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
-            return std::make_shared<DataTypeString>();
-        throw Exception("Cannot reinterpret " + type.getName() + " as String because it is not contiguous in memory", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-    }
-
-    static void executeToString(const IColumn & src, ColumnString & dst)
-    {
-        size_t rows = src.size();
-        ColumnString::Chars & data_to = dst.getChars();
-        ColumnString::Offsets & offsets_to = dst.getOffsets();
-        offsets_to.resize(rows);
-
-        ColumnString::Offset offset = 0;
-        for (size_t i = 0; i < rows; ++i)
-        {
-            StringRef data = src.getDataAt(i);
-
-            /// Cut trailing zero bytes.
-            while (data.size && data.data[data.size - 1] == 0)
-                --data.size;
-
-            data_to.resize(offset + data.size + 1);
-            memcpySmallAllowReadWriteOverflow15(&data_to[offset], data.data, data.size);
-            offset += data.size;
-            data_to[offset] = 0;
-            ++offset;
-            offsets_to[i] = offset;
-        }
-    }
-
-    bool useDefaultImplementationForConstants() const override { return true; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
-    {
-        const IColumn & src = *arguments[0].column;
-        MutableColumnPtr dst = result_type->createColumn();
-
-        if (ColumnString * dst_concrete = typeid_cast<ColumnString *>(dst.get()))
-            executeToString(src, *dst_concrete);
-        else
-            throw Exception("Illegal column " + src.getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
-
-        return dst;
-    }
-};
-
-}
-
-void registerFunctionReinterpretAsString(FunctionFactory & factory)
-{
-    factory.registerFunction<FunctionReinterpretAsString>();
-}
-
-}
diff --git a/src/Functions/ya.make b/src/Functions/ya.make
index 9488c9d7d4e..9032a80dfd6 100644
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@@ -409,8 +409,6 @@ SRCS(
     registerFunctionsUnixTimestamp64.cpp
     registerFunctionsVisitParam.cpp
     reinterpretAs.cpp
-    reinterpretAsFixedString.cpp
-    reinterpretAsString.cpp
     repeat.cpp
     replaceAll.cpp
     replaceOne.cpp

From e967ba5f3b40c7f905fdee3c7d5c4f77bbaf2a29 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Sat, 30 Jan 2021 12:12:38 -0800
Subject: [PATCH 0417/1238] try fix clang-11-darwin build

---
 src/Functions/FunctionsCoding.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h
index a311c6938d6..abfb1e83a77 100644
--- a/src/Functions/FunctionsCoding.h
+++ b/src/Functions/FunctionsCoding.h
@@ -311,7 +311,7 @@ public:
                     std::memcpy(
                         src_ipv4_buf + std::strlen("::ffff:"),
                         reinterpret_cast<const char *>(&vec_src[src_offset]),
-                        std::min(offsets_src[i] - src_offset, IPV4_MAX_TEXT_LENGTH + 1));
+                        std::min<UInt64>(offsets_src[i] - src_offset, IPV4_MAX_TEXT_LENGTH + 1));
                     parseIPv6(src_ipv4_buf, reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
                 }
                 else

From 64261c772085f1fd5d4adc8987ee06a17bc732d0 Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Sat, 30 Jan 2021 15:24:20 -0500
Subject: [PATCH 0418/1238] Fixing reference to LDAP role mapping specification

---
 tests/testflows/ldap/role_mapping/regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testflows/ldap/role_mapping/regression.py b/tests/testflows/ldap/role_mapping/regression.py
index fff1e72a945..7afb6c98713 100755
--- a/tests/testflows/ldap/role_mapping/regression.py
+++ b/tests/testflows/ldap/role_mapping/regression.py
@@ -18,7 +18,7 @@ xfails = {
 @Name("role mapping")
 @ArgumentParser(argparser)
 @Specifications(
-    QA_SRS014_ClickHouse_LDAP_Role_Mapping
+    SRS_014_ClickHouse_LDAP_Role_Mapping
 )
 @Requirements(
     RQ_SRS_014_LDAP_RoleMapping("1.0")

From 282409e8a92fc224541f07af9e89cec978b5ffe6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 01:37:12 +0300
Subject: [PATCH 0419/1238] Add a patch from Stig Bakken

---
 .../materialize_with_ddl.py                     | 17 ++++++++++++++---
 .../test_materialize_mysql_database/test.py     |  5 +++++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
index 8bb1fdb84e0..11600359b66 100644
--- a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
+++ b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
@@ -79,9 +79,9 @@ def dml_with_materialize_mysql_database(clickhouse_node, mysql_node, service_nam
 
     check_query(clickhouse_node, """
         SELECT key, unsigned_tiny_int, tiny_int, unsigned_small_int,
-         small_int, unsigned_medium_int, medium_int, unsigned_int, _int, unsigned_integer, _integer, 
+         small_int, unsigned_medium_int, medium_int, unsigned_int, _int, unsigned_integer, _integer,
          unsigned_bigint, _bigint, unsigned_float, _float, unsigned_double, _double, _varchar, _char, binary_col,
-         _date, _datetime, /* exclude it, because ON UPDATE CURRENT_TIMESTAMP _timestamp, */ 
+         _date, _datetime, /* exclude it, because ON UPDATE CURRENT_TIMESTAMP _timestamp, */
          _bool FROM test_database.test_table_1 ORDER BY key FORMAT TSV
         """,
         "1\t2\t-1\t2\t-2\t3\t-3\t4\t-4\t5\t-5\t6\t-6\t3.2\t-3.2\t3.4\t-3.4\tvarchar\tchar\tbinary\\0\\0\t2020-01-01\t"
@@ -720,7 +720,7 @@ def clickhouse_killed_while_insert(clickhouse_node, mysql_node, service_name):
 
     t = threading.Thread(target=insert, args=(1000,))
     t.start()
-    
+
     # TODO: add clickhouse_node.restart_clickhouse(20, kill=False) test
     clickhouse_node.restart_clickhouse(20, kill=True)
     t.join()
@@ -732,3 +732,14 @@ def clickhouse_killed_while_insert(clickhouse_node, mysql_node, service_name):
 
     mysql_node.query("DROP DATABASE kill_clickhouse_while_insert")
     clickhouse_node.query("DROP DATABASE kill_clickhouse_while_insert")
+
+
+def utf8mb4_test(clickhouse_node, mysql_node, service_name):
+    mysql_node.query("DROP DATABASE IF EXISTS utf8mb4_test")
+    clickhouse_node.query("DROP DATABASE IF EXISTS utf8mb4_test")
+    mysql_node.query("CREATE DATABASE utf8mb4_test")
+    mysql_node.query("CREATE TABLE utf8mb4_test.test (id INT(11) NOT NULL PRIMARY KEY, name VARCHAR(255)) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4")
+    mysql_node.query("INSERT INTO utf8mb4_test.test VALUES(1, '🦄'),(2, '\u2601')")
+    clickhouse_node.query("CREATE DATABASE utf8mb4_test ENGINE = MaterializeMySQL('{}:3306', 'utf8mb4_test', 'root', 'clickhouse')".format(service_name))
+    check_query(clickhouse_node, "SELECT id, name FROM utf8mb4_test.test ORDER BY id", "1\t\U0001F984\n2\t\u2601\n")
+
diff --git a/tests/integration/test_materialize_mysql_database/test.py b/tests/integration/test_materialize_mysql_database/test.py
index dbd6e894987..22322671f13 100644
--- a/tests/integration/test_materialize_mysql_database/test.py
+++ b/tests/integration/test_materialize_mysql_database/test.py
@@ -228,3 +228,8 @@ def test_clickhouse_killed_while_insert_5_7(started_cluster, started_mysql_5_7,
 @pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_atomic])
 def test_clickhouse_killed_while_insert_8_0(started_cluster, started_mysql_8_0, clickhouse_node):
     materialize_with_ddl.clickhouse_killed_while_insert(clickhouse_node, started_mysql_8_0, "mysql8_0")
+
+@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary])
+def test_utf8mb4(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node):
+    materialize_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_5_7, "mysql1")
+    materialize_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_8_0, "mysql8_0")

From 895be295a18a846148f52edb1006c0536bc97be5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 02:22:55 +0300
Subject: [PATCH 0420/1238] Fix test

---
 tests/queries/0_stateless/00474_readonly_settings.sh       | 2 ++
 tests/queries/0_stateless/01187_set_profile_as_setting.sh  | 2 ++
 .../queries/0_stateless/01455_opentelemetry_distributed.sh | 2 ++
 tests/queries/0_stateless/01526_initial_query_id.sh        | 2 ++
 tests/queries/shell_config.sh                              | 7 ++++---
 5 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/00474_readonly_settings.sh b/tests/queries/0_stateless/00474_readonly_settings.sh
index 0edde9f12ed..013d7f23756 100755
--- a/tests/queries/0_stateless/00474_readonly_settings.sh
+++ b/tests/queries/0_stateless/00474_readonly_settings.sh
@@ -1,5 +1,7 @@
 #!/usr/bin/env bash
 
+CLICKHOUSE_LOG_COMMENT=''
+
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
diff --git a/tests/queries/0_stateless/01187_set_profile_as_setting.sh b/tests/queries/0_stateless/01187_set_profile_as_setting.sh
index db9d095fe92..e7dfb83fe76 100755
--- a/tests/queries/0_stateless/01187_set_profile_as_setting.sh
+++ b/tests/queries/0_stateless/01187_set_profile_as_setting.sh
@@ -1,5 +1,7 @@
 #!/usr/bin/env bash
 
+CLICKHOUSE_LOG_COMMENT=''
+
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
index 24667f26363..403a4b1b6b3 100755
--- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
+++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
@@ -1,6 +1,8 @@
 #!/usr/bin/env bash
 set -ue
 
+CLICKHOUSE_LOG_COMMENT=''
+
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
diff --git a/tests/queries/0_stateless/01526_initial_query_id.sh b/tests/queries/0_stateless/01526_initial_query_id.sh
index e77764ee34e..beb1f14bda4 100755
--- a/tests/queries/0_stateless/01526_initial_query_id.sh
+++ b/tests/queries/0_stateless/01526_initial_query_id.sh
@@ -1,6 +1,8 @@
 #!/usr/bin/env bash
 set -ue
 
+CLICKHOUSE_LOG_COMMENT=''
+
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh
index 6130bec52f1..361f8c444f9 100644
--- a/tests/queries/shell_config.sh
+++ b/tests/queries/shell_config.sh
@@ -10,9 +10,9 @@ export CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL
 [ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} "
 [ -v CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} "
 [ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
-[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_CLIENT_OPT0+=" --log_comment=${CLICKHOUSE_LOG_COMMENT} "
+[ -n "$CLICKHOUSE_LOG_COMMENT" ] && CLICKHOUSE_CLIENT_OPT0+=" --log_comment='${CLICKHOUSE_LOG_COMMENT}' "
 [ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_BENCHMARK_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
-[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_BENCHMARK_OPT0+=" --log_comment=${CLICKHOUSE_LOG_COMMENT} "
+[ -n "$CLICKHOUSE_LOG_COMMENT" ] && CLICKHOUSE_BENCHMARK_OPT0+=" --log_comment='${CLICKHOUSE_LOG_COMMENT}' "
 
 export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"}
 [ -x "$CLICKHOUSE_BINARY-client" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY-client}
@@ -61,7 +61,8 @@ then
 else
   export CLICKHOUSE_URL_PARAMS="database=${CLICKHOUSE_DATABASE}"
 fi
-[ -v CLICKHOUSE_LOG_COMMENT ] && export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&log_comment=${CLICKHOUSE_LOG_COMMENT}"
+# Note: missing url encoding of the log comment.
+[ -n "$CLICKHOUSE_LOG_COMMENT" ] && export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&log_comment=${CLICKHOUSE_LOG_COMMENT}"
 
 export CLICKHOUSE_URL=${CLICKHOUSE_URL:="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/"}
 export CLICKHOUSE_URL_HTTPS=${CLICKHOUSE_URL_HTTPS:="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/"}

From cbd4bace6f265561902455aa18a9a894c43e6acc Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Sat, 30 Jan 2021 21:26:08 +0300
Subject: [PATCH 0421/1238] Mark that test 01676_dictget_in_default_expression
 should not be run in parallel.

---
 tests/queries/skip_list.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 0ec0f760d8d..600a2fd6028 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -566,6 +566,7 @@
         "01602_show_create_view",
         "01603_rename_overwrite_bug",
         "01646_system_restart_replicas_smoke", // system restart replicas is a global query
+        "01676_dictget_in_default_expression",
         "attach",
         "ddl_dictionaries",
         "dictionary",

From 0fa5142715f096c3a304f0e7e51435fcc3c116f0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 05:36:52 +0300
Subject: [PATCH 0422/1238] Remove tons of garbage

---
 src/Common/StringSearcher.h        |  61 ++---
 src/Common/UTF8Helpers.h           |  22 +-
 src/Common/Volnitsky.h             | 398 +++++++++++++++--------------
 src/Functions/LowerUpperUTF8Impl.h |  16 +-
 src/Functions/randomStringUTF8.cpp |   8 +-
 5 files changed, 258 insertions(+), 247 deletions(-)

diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h
index 7028ae8eae1..0d2ee26889f 100644
--- a/src/Common/StringSearcher.h
+++ b/src/Common/StringSearcher.h
@@ -98,10 +98,10 @@ public:
         }
         else
         {
-            int first_u32 = UTF8::convert(needle);
+            auto first_u32 = UTF8::convertUTF8ToCodePoint(needle, needle_size);
 
             /// Invalid UTF-8
-            if (first_u32 < 0)
+            if (!first_u32)
             {
                 /// Process it verbatim as a sequence of bytes.
                 size_t src_len = UTF8::seqLength(*needle);
@@ -111,12 +111,15 @@ public:
             }
             else
             {
-                int first_l_u32 = Poco::Unicode::toLower(first_u32);
-                int first_u_u32 = Poco::Unicode::toUpper(first_u32);
+                uint32_t first_l_u32 = Poco::Unicode::toLower(*first_u32);
+                uint32_t first_u_u32 = Poco::Unicode::toUpper(*first_u32);
 
                 /// lower and uppercase variants of the first octet of the first character in `needle`
-                UTF8::convert(first_l_u32, l_seq, sizeof(l_seq));
-                UTF8::convert(first_u_u32, u_seq, sizeof(u_seq));
+                size_t length_l = UTF8::convertCodePointToUTF8(first_l_u32, l_seq, sizeof(l_seq));
+                size_t length_r = UTF8::convertCodePointToUTF8(first_u_u32, u_seq, sizeof(u_seq));
+
+                if (length_l != length_r)
+                    throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER};
             }
 
             l = l_seq[0];
@@ -143,15 +146,15 @@ public:
             }
 
             size_t src_len = std::min<size_t>(needle_end - needle_pos, UTF8::seqLength(*needle_pos));
-            int c_u32 = UTF8::convert(needle_pos);  /// This assumes valid UTF-8 or zero byte after needle.
+            auto c_u32 = UTF8::convertUTF8ToCodePoint(needle_pos, src_len);
 
-            if (c_u32 >= 0)
+            if (c_u32)
             {
-                int c_l_u32 = Poco::Unicode::toLower(c_u32);
-                int c_u_u32 = Poco::Unicode::toUpper(c_u32);
+                int c_l_u32 = Poco::Unicode::toLower(*c_u32);
+                int c_u_u32 = Poco::Unicode::toUpper(*c_u32);
 
-                uint8_t dst_l_len = static_cast<uint8_t>(UTF8::convert(c_l_u32, l_seq, sizeof(l_seq)));
-                uint8_t dst_u_len = static_cast<uint8_t>(UTF8::convert(c_u_u32, u_seq, sizeof(u_seq)));
+                uint8_t dst_l_len = static_cast<uint8_t>(UTF8::convertCodePointToUTF8(c_l_u32, l_seq, sizeof(l_seq)));
+                uint8_t dst_u_len = static_cast<uint8_t>(UTF8::convertCodePointToUTF8(c_u_u32, u_seq, sizeof(u_seq)));
 
                 /// @note Unicode standard states it is a rare but possible occasion
                 if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
@@ -181,7 +184,7 @@ public:
     }
 
     template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
-    ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const
+    ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * haystack_end, const CharT * pos) const
     {
 
 #ifdef __SSE4_1__
@@ -202,15 +205,15 @@ public:
 
                     while (needle_pos < needle_end)
                     {
-                        int haystack_code_point = UTF8::convert(pos);
-                        int needle_code_point = UTF8::convert(needle_pos);
+                        auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos);
+                        auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
 
                         /// Invalid UTF-8, should not compare equals
-                        if (haystack_code_point < 0 || needle_code_point < 0)
+                        if (!haystack_code_point || !needle_code_point)
                             break;
 
                         /// Not equals case insensitive.
-                        if (Poco::Unicode::toLower(haystack_code_point) !=  Poco::Unicode::toLower(needle_code_point))
+                        if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
                             break;
 
                         /// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
@@ -237,15 +240,15 @@ public:
 
             while (needle_pos < needle_end)
             {
-                int haystack_code_point = UTF8::convert(pos);
-                int needle_code_point = UTF8::convert(needle_pos);
+                auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos);
+                auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
 
                 /// Invalid UTF-8, should not compare equals
-                if (haystack_code_point < 0 || needle_code_point < 0)
+                if (!haystack_code_point || !needle_code_point)
                     break;
 
                 /// Not equals case insensitive.
-                if (Poco::Unicode::toLower(haystack_code_point) !=  Poco::Unicode::toLower(needle_code_point))
+                if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
                     break;
 
                 const auto len = UTF8::seqLength(*pos);
@@ -307,15 +310,15 @@ public:
 
                             while (haystack_pos < haystack_end && needle_pos < needle_end)
                             {
-                                int haystack_code_point = UTF8::convert(haystack_pos);
-                                int needle_code_point = UTF8::convert(needle_pos);
+                                auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
+                                auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
 
                                 /// Invalid UTF-8, should not compare equals
-                                if (haystack_code_point < 0 || needle_code_point < 0)
+                                if (!haystack_code_point || !needle_code_point)
                                     break;
 
                                 /// Not equals case insensitive.
-                                if (Poco::Unicode::toLower(haystack_code_point) !=  Poco::Unicode::toLower(needle_code_point))
+                                if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
                                     break;
 
                                 /// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
@@ -348,15 +351,15 @@ public:
 
                 while (haystack_pos < haystack_end && needle_pos < needle_end)
                 {
-                    int haystack_code_point = UTF8::convert(haystack_pos);
-                    int needle_code_point = UTF8::convert(needle_pos);
+                    auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
+                    auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
 
                     /// Invalid UTF-8, should not compare equals
-                    if (haystack_code_point < 0 || needle_code_point < 0)
+                    if (!haystack_code_point || !needle_code_point)
                         break;
 
                     /// Not equals case insensitive.
-                    if (Poco::Unicode::toLower(haystack_code_point) !=  Poco::Unicode::toLower(needle_code_point))
+                    if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
                         break;
 
                     const auto len = UTF8::seqLength(*haystack_pos);
diff --git a/src/Common/UTF8Helpers.h b/src/Common/UTF8Helpers.h
index e795b6846b2..f25ed55a6af 100644
--- a/src/Common/UTF8Helpers.h
+++ b/src/Common/UTF8Helpers.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <optional>
 #include <common/types.h>
 #include <Common/BitHelpers.h>
 #include <Poco/UTF8Encoding.h>
@@ -73,26 +74,27 @@ inline size_t countCodePoints(const UInt8 * data, size_t size)
     return res;
 }
 
+
 template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
-int convert(const CharT * bytes)
+size_t convertCodePointToUTF8(uint32_t code_point, CharT * out_bytes, size_t out_length)
 {
     static const Poco::UTF8Encoding utf8;
-    return utf8.convert(reinterpret_cast<const uint8_t *>(bytes));
+    int res = utf8.convert(code_point, reinterpret_cast<uint8_t *>(out_bytes), out_length);
+    assert(res >= 0);
+    return res;
 }
 
 template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
-int convert(int ch, CharT * bytes, int length)
+std::optional<uint32_t> convertUTF8ToCodePoint(const CharT * in_bytes, size_t in_length)
 {
     static const Poco::UTF8Encoding utf8;
-    return utf8.convert(ch, reinterpret_cast<uint8_t *>(bytes), length);
+    int res = utf8.queryConvert(reinterpret_cast<const uint8_t *>(in_bytes), in_length);
+
+    if (res >= 0)
+        return res;
+    return {};
 }
 
-template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
-int queryConvert(const CharT * bytes, int length)
-{
-    static const Poco::UTF8Encoding utf8;
-    return utf8.queryConvert(reinterpret_cast<const uint8_t *>(bytes), length);
-}
 
 /// returns UTF-8 wcswidth. Invalid sequence is treated as zero width character.
 /// `prefix` is used to compute the `\t` width which extends the string before
diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h
index 09317f1fbbf..d4b514022ed 100644
--- a/src/Common/Volnitsky.h
+++ b/src/Common/Volnitsky.h
@@ -60,7 +60,7 @@ namespace VolnitskyTraits
     static inline Ngram toNGram(const UInt8 * const pos) { return unalignedLoad<Ngram>(pos); }
 
     template <typename Callback>
-    static inline void putNGramASCIICaseInsensitive(const UInt8 * const pos, const int offset, const Callback & putNGramBase)
+    static inline void putNGramASCIICaseInsensitive(const UInt8 * pos, int offset, Callback && putNGramBase)
     {
         struct Chars
         {
@@ -109,231 +109,234 @@ namespace VolnitskyTraits
             putNGramBase(n, offset);
     }
 
-    template <bool CaseSensitive, bool ASCII, typename Callback>
-    static inline void putNGram(const UInt8 * const pos, const int offset, [[maybe_unused]] const UInt8 * const begin, const Callback & putNGramBase)
+    template <typename Callback>
+    static inline void putNGramUTF8CaseInsensitive(
+        const UInt8 * pos, int offset, const UInt8 * begin, size_t size, Callback && putNGramBase)
     {
-        if constexpr (CaseSensitive)
+        const UInt8 * end = begin + size;
+
+        struct Chars
         {
-            putNGramBase(toNGram(pos), offset);
+            UInt8 c0;
+            UInt8 c1;
+        };
+
+        union
+        {
+            VolnitskyTraits::Ngram n;
+            Chars chars;
+        };
+
+        n = toNGram(pos);
+
+        if (isascii(chars.c0) && isascii(chars.c1))
+        {
+            putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
         }
         else
         {
-            if constexpr (ASCII)
+            /** n-gram (in the case of n = 2)
+                *  can be entirely located within one code point,
+                *  or intersect with two code points.
+                *
+                * In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
+                *  and in the second case - up to four alternatives - fragments of two code points in all combinations of cases.
+                *
+                * It does not take into account the dependence of the case-transformation from the locale (for example - Turkish `Ii`)
+                *  as well as composition / decomposition and other features.
+                *
+                * It also does not work if characters with lower and upper cases are represented by different number of bytes or code points.
+                */
+
+            using Seq = UInt8[6];
+
+            if (UTF8::isContinuationOctet(chars.c1))
             {
-                putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
-            }
-            else
-            {
-                struct Chars
+                /// ngram is inside a sequence
+                auto seq_pos = pos;
+                UTF8::syncBackward(seq_pos, begin);
+
+                auto u32 = UTF8::convertUTF8ToCodePoint(seq_pos, end - seq_pos);
+                /// Invalid UTF-8
+                if (!u32)
                 {
-                    UInt8 c0;
-                    UInt8 c1;
-                };
-
-                union
-                {
-                    VolnitskyTraits::Ngram n;
-                    Chars chars;
-                };
-
-                n = toNGram(pos);
-
-                if (isascii(chars.c0) && isascii(chars.c1))
-                    putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
+                    putNGramBase(n, offset);
+                }
                 else
                 {
-                    /** n-gram (in the case of n = 2)
-                      *  can be entirely located within one code point,
-                      *  or intersect with two code points.
-                      *
-                      * In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
-                      *  and in the second case - up to four alternatives - fragments of two code points in all combinations of cases.
-                      *
-                      * It does not take into account the dependence of the case-transformation from the locale (for example - Turkish `Ii`)
-                      *  as well as composition / decomposition and other features.
-                      *
-                      * It also does not work if characters with lower and upper cases are represented by different number of bytes or code points.
-                      */
+                    int l_u32 = Poco::Unicode::toLower(*u32);
+                    int u_u32 = Poco::Unicode::toUpper(*u32);
 
-                    using Seq = UInt8[6];
-
-                    if (UTF8::isContinuationOctet(chars.c1))
+                    /// symbol is case-independent
+                    if (l_u32 == u_u32)
                     {
-                        /// ngram is inside a sequence
-                        auto seq_pos = pos;
-                        UTF8::syncBackward(seq_pos, begin);
-
-                        int u32 = UTF8::convert(seq_pos);
-                        /// Invalid UTF-8
-                        if (u32 < 0)
-                        {
-                            putNGramBase(n, offset);
-                        }
-                        else
-                        {
-                            int l_u32 = Poco::Unicode::toLower(u32);
-                            int u_u32 = Poco::Unicode::toUpper(u32);
-
-                            /// symbol is case-independent
-                            if (l_u32 == u_u32)
-                            {
-                                putNGramBase(n, offset);
-                            }
-                            else
-                            {
-                                /// where is the given ngram in respect to the start of UTF-8 sequence?
-                                const auto seq_ngram_offset = pos - seq_pos;
-
-                                Seq seq;
-
-                                /// put ngram for lowercase
-                                UTF8::convert(l_u32, seq, sizeof(seq));
-                                chars.c0 = seq[seq_ngram_offset];
-                                chars.c1 = seq[seq_ngram_offset + 1];
-                                putNGramBase(n, offset);
-
-                                /// put ngram for uppercase
-                                UTF8::convert(u_u32, seq, sizeof(seq));
-                                chars.c0 = seq[seq_ngram_offset]; //-V519
-                                chars.c1 = seq[seq_ngram_offset + 1]; //-V519
-                                putNGramBase(n, offset);
-                            }
-                        }
+                        putNGramBase(n, offset);
                     }
                     else
                     {
-                        /// ngram is on the boundary of two sequences
-                        /// first sequence may start before u_pos if it is not ASCII
-                        auto first_seq_pos = pos;
-                        UTF8::syncBackward(first_seq_pos, begin);
-                        /// where is the given ngram in respect to the start of first UTF-8 sequence?
-                        const auto seq_ngram_offset = pos - first_seq_pos;
+                        /// where is the given ngram in respect to the start of UTF-8 sequence?
+                        const auto seq_ngram_offset = pos - seq_pos;
 
-                        int first_u32 = UTF8::convert(first_seq_pos);
-                        int first_l_u32;
-                        int first_u_u32;
+                        Seq seq;
 
-                        if (first_u32 < 0)
-                        {
-                            first_l_u32 = first_u32;
-                            first_u_u32 = first_u32;
-                        }
-                        else
-                        {
-                            first_l_u32 = Poco::Unicode::toLower(first_u32);
-                            first_u_u32 = Poco::Unicode::toUpper(first_u32);
-                        }
+                        /// put ngram for lowercase
+                        size_t length_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(l_u32, seq, sizeof(seq));
+                        assert(length_l >= 2);
+                        chars.c0 = seq[seq_ngram_offset];
+                        chars.c1 = seq[seq_ngram_offset + 1];
+                        putNGramBase(n, offset);
 
-                        /// second sequence always start immediately after u_pos
-                        auto second_seq_pos = pos + 1;
+                        /// put ngram for uppercase
+                        size_t length_r [[maybe_unused]] = UTF8::convertCodePointToUTF8(u_u32, seq, sizeof(seq));
+                        assert(length_r >= 2);
+                        chars.c0 = seq[seq_ngram_offset]; //-V519
+                        chars.c1 = seq[seq_ngram_offset + 1]; //-V519
+                        putNGramBase(n, offset);
+                    }
+                }
+            }
+            else
+            {
+                /// ngram is on the boundary of two sequences
+                /// first sequence may start before u_pos if it is not ASCII
+                auto first_seq_pos = pos;
+                UTF8::syncBackward(first_seq_pos, begin);
+                /// where is the given ngram in respect to the start of first UTF-8 sequence?
+                const auto seq_ngram_offset = pos - first_seq_pos;
 
-                        int second_u32 = UTF8::convert(second_seq_pos); /// This assumes valid UTF-8 or zero byte after needle.
-                        int second_l_u32;
-                        int second_u_u32;
+                auto first_u32 = UTF8::convertUTF8ToCodePoint(first_seq_pos, end - first_seq_pos);
+                int first_l_u32 = 0;
+                int first_u_u32 = 0;
 
-                        if (second_u32 < 0)
-                        {
-                            second_l_u32 = second_u32;
-                            second_u_u32 = second_u32;
-                        }
-                        else
-                        {
-                            second_l_u32 = Poco::Unicode::toLower(second_u32);
-                            second_u_u32 = Poco::Unicode::toUpper(second_u32);
-                        }
+                if (first_u32)
+                {
+                    first_l_u32 = Poco::Unicode::toLower(*first_u32);
+                    first_u_u32 = Poco::Unicode::toUpper(*first_u32);
+                }
 
-                        /// both symbols are case-independent
-                        if (first_l_u32 == first_u_u32 && second_l_u32 == second_u_u32)
-                        {
-                            putNGramBase(n, offset);
-                        }
-                        else if (first_l_u32 == first_u_u32)
-                        {
-                            /// first symbol is case-independent
-                            Seq seq;
+                /// second sequence always start immediately after u_pos
+                auto second_seq_pos = pos + 1;
 
-                            /// put ngram for lowercase
-                            UTF8::convert(second_l_u32, seq, sizeof(seq));
-                            chars.c1 = seq[0];
-                            putNGramBase(n, offset);
+                auto second_u32 = UTF8::convertUTF8ToCodePoint(second_seq_pos, end - second_seq_pos);
+                int second_l_u32 = 0;
+                int second_u_u32 = 0;
 
-                            /// put ngram from uppercase, if it is different
-                            UTF8::convert(second_u_u32, seq, sizeof(seq));
-                            if (chars.c1 != seq[0])
-                            {
-                                chars.c1 = seq[0];
-                                putNGramBase(n, offset);
-                            }
-                        }
-                        else if (second_l_u32 == second_u_u32)
-                        {
-                            /// second symbol is case-independent
-                            Seq seq;
+                if (second_u32)
+                {
+                    second_l_u32 = Poco::Unicode::toLower(*second_u32);
+                    second_u_u32 = Poco::Unicode::toUpper(*second_u32);
+                }
 
-                            /// put ngram for lowercase
-                            UTF8::convert(first_l_u32, seq, sizeof(seq));
-                            chars.c0 = seq[seq_ngram_offset];
-                            putNGramBase(n, offset);
+                /// both symbols are case-independent
+                if (first_l_u32 == first_u_u32 && second_l_u32 == second_u_u32)
+                {
+                    putNGramBase(n, offset);
+                }
+                else if (first_l_u32 == first_u_u32)
+                {
+                    /// first symbol is case-independent
+                    Seq seq;
 
-                            /// put ngram for uppercase, if it is different
-                            UTF8::convert(first_u_u32, seq, sizeof(seq));
-                            if (chars.c0 != seq[seq_ngram_offset])
-                            {
-                                chars.c0 = seq[seq_ngram_offset];
-                                putNGramBase(n, offset);
-                            }
-                        }
-                        else
-                        {
-                            Seq first_l_seq;
-                            Seq first_u_seq;
-                            Seq second_l_seq;
-                            Seq second_u_seq;
+                    /// put ngram for lowercase
+                    size_t size_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_l_u32, seq, sizeof(seq));
+                    assert(size_l >= 1);
+                    chars.c1 = seq[0];
+                    putNGramBase(n, offset);
 
-                            UTF8::convert(first_l_u32, first_l_seq, sizeof(first_l_seq));
-                            UTF8::convert(first_u_u32, first_u_seq, sizeof(first_u_seq));
-                            UTF8::convert(second_l_u32, second_l_seq, sizeof(second_l_seq));
-                            UTF8::convert(second_u_u32, second_u_seq, sizeof(second_u_seq));
+                    /// put ngram from uppercase, if it is different
+                    size_t size_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_u_u32, seq, sizeof(seq));
+                    assert(size_u >= 1);
+                    if (chars.c1 != seq[0])
+                    {
+                        chars.c1 = seq[0];
+                        putNGramBase(n, offset);
+                    }
+                }
+                else if (second_l_u32 == second_u_u32)
+                {
+                    /// second symbol is case-independent
+                    Seq seq;
 
-                            auto c0l = first_l_seq[seq_ngram_offset];
-                            auto c0u = first_u_seq[seq_ngram_offset];
-                            auto c1l = second_l_seq[0];
-                            auto c1u = second_u_seq[0];
+                    /// put ngram for lowercase
+                    size_t size_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_l_u32, seq, sizeof(seq));
+                    assert(size_l > seq_ngram_offset);
+                    chars.c0 = seq[seq_ngram_offset];
+                    putNGramBase(n, offset);
 
-                            /// ngram for ll
-                            chars.c0 = c0l;
-                            chars.c1 = c1l;
-                            putNGramBase(n, offset);
+                    /// put ngram for uppercase, if it is different
+                    size_t size_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_u_u32, seq, sizeof(seq));
+                    assert(size_u > seq_ngram_offset);
+                    if (chars.c0 != seq[seq_ngram_offset])
+                    {
+                        chars.c0 = seq[seq_ngram_offset];
+                        putNGramBase(n, offset);
+                    }
+                }
+                else
+                {
+                    Seq first_l_seq;
+                    Seq first_u_seq;
+                    Seq second_l_seq;
+                    Seq second_u_seq;
 
-                            if (c0l != c0u)
-                            {
-                                /// ngram for Ul
-                                chars.c0 = c0u;
-                                chars.c1 = c1l;
-                                putNGramBase(n, offset);
-                            }
+                    size_t size_first_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_l_u32, first_l_seq, sizeof(first_l_seq));
+                    size_t size_first_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_u_u32, first_u_seq, sizeof(first_u_seq));
+                    size_t size_second_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_l_u32, second_l_seq, sizeof(second_l_seq));
+                    size_t size_second_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_u_u32, second_u_seq, sizeof(second_u_seq));
 
-                            if (c1l != c1u)
-                            {
-                                /// ngram for lU
-                                chars.c0 = c0l;
-                                chars.c1 = c1u;
-                                putNGramBase(n, offset);
-                            }
+                    assert(size_first_l > seq_ngram_offset);
+                    assert(size_first_u > seq_ngram_offset);
+                    assert(size_second_l > 0);
+                    assert(size_second_u > 0);
 
-                            if (c0l != c0u && c1l != c1u)
-                            {
-                                /// ngram for UU
-                                chars.c0 = c0u;
-                                chars.c1 = c1u;
-                                putNGramBase(n, offset);
-                            }
-                        }
+                    auto c0l = first_l_seq[seq_ngram_offset];
+                    auto c0u = first_u_seq[seq_ngram_offset];
+                    auto c1l = second_l_seq[0];
+                    auto c1u = second_u_seq[0];
+
+                    /// ngram for ll
+                    chars.c0 = c0l;
+                    chars.c1 = c1l;
+                    putNGramBase(n, offset);
+
+                    if (c0l != c0u)
+                    {
+                        /// ngram for Ul
+                        chars.c0 = c0u;
+                        chars.c1 = c1l;
+                        putNGramBase(n, offset);
+                    }
+
+                    if (c1l != c1u)
+                    {
+                        /// ngram for lU
+                        chars.c0 = c0l;
+                        chars.c1 = c1u;
+                        putNGramBase(n, offset);
+                    }
+
+                    if (c0l != c0u && c1l != c1u)
+                    {
+                        /// ngram for UU
+                        chars.c0 = c0u;
+                        chars.c1 = c1u;
+                        putNGramBase(n, offset);
                     }
                 }
             }
         }
     }
+
+    template <bool CaseSensitive, bool ASCII, typename Callback>
+    static inline void putNGram(const UInt8 * pos, int offset, [[maybe_unused]] const UInt8 * begin, size_t size, Callback && putNGramBase)
+    {
+        if constexpr (CaseSensitive)
+            putNGramBase(toNGram(pos), offset);
+        else if constexpr (ASCII)
+            putNGramASCIICaseInsensitive(pos, offset, std::forward<Callback>(putNGramBase));
+        else
+            putNGramUTF8CaseInsensitive(pos, offset, begin, size, std::forward<Callback>(putNGramBase));
+    }
 }
 
 
@@ -342,17 +345,17 @@ template <bool CaseSensitive, bool ASCII, typename FallbackSearcher>
 class VolnitskyBase
 {
 protected:
-    const UInt8 * const needle;
-    const size_t needle_size;
-    const UInt8 * const needle_end = needle + needle_size;
+    const UInt8 * needle;
+    size_t needle_size;
+    const UInt8 * needle_end = needle + needle_size;
     /// For how long we move, if the n-gram from haystack is not found in the hash table.
-    const size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
+    size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
 
     /** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
       *  storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
     std::unique_ptr<VolnitskyTraits::Offset[]> hash; /// Hash table.
 
-    const bool fallback; /// Do we need to use the fallback algorithm.
+    bool fallback; /// Do we need to use the fallback algorithm.
 
     FallbackSearcher fallback_searcher;
 
@@ -378,7 +381,7 @@ public:
         /// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
         /// And also adding from the end guarantees that we will find first occurrence because we will lookup bigger offsets first.
         for (auto i = static_cast<ssize_t>(needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
-            VolnitskyTraits::putNGram<CaseSensitive, ASCII>(this->needle + i, i + 1, this->needle, callback);
+            VolnitskyTraits::putNGram<CaseSensitive, ASCII>(needle + i, i + 1, needle, needle_size, callback);
     }
 
 
@@ -525,6 +528,7 @@ public:
                         reinterpret_cast<const UInt8 *>(cur_needle_data) + i,
                         i + 1,
                         reinterpret_cast<const UInt8 *>(cur_needle_data),
+                        cur_needle_size,
                         callback);
                 }
             }
diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h
index f1ec9e7da1c..4c155034b3d 100644
--- a/src/Functions/LowerUpperUTF8Impl.h
+++ b/src/Functions/LowerUpperUTF8Impl.h
@@ -135,16 +135,16 @@ struct LowerUpperUTF8Impl
         {
             static const Poco::UTF8Encoding utf8;
 
-            int src_sequence_length = UTF8::seqLength(*src);
+            size_t src_sequence_length = UTF8::seqLength(*src);
 
-            int src_code_point = UTF8::queryConvert(src, src_end - src);
-            if (src_code_point > 0)
+            auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
+            if (src_code_point)
             {
-                int dst_code_point = to_case(src_code_point);
+                int dst_code_point = to_case(*src_code_point);
                 if (dst_code_point > 0)
                 {
-                    int dst_sequence_length = UTF8::convert(dst_code_point, dst, src_end - src);
-                    assert(dst_sequence_length >= 0 && dst_sequence_length <= 4);
+                    size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src);
+                    assert(dst_sequence_length <= 4);
 
                     /// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8.
                     /// As an example, this happens for ß and ẞ.
@@ -157,7 +157,9 @@ struct LowerUpperUTF8Impl
                 }
             }
 
-            *dst++ = *src++;
+            *dst = *src;
+            ++dst;
+            ++src;
         }
     }
 
diff --git a/src/Functions/randomStringUTF8.cpp b/src/Functions/randomStringUTF8.cpp
index 0813c1e6465..163092226f2 100644
--- a/src/Functions/randomStringUTF8.cpp
+++ b/src/Functions/randomStringUTF8.cpp
@@ -119,12 +119,12 @@ public:
                 UInt32 code_point2 = generate_code_point(rand >> 32);
 
                 /// We have padding in column buffers that we can overwrite.
-                auto length1 = UTF8::convert(code_point1, pos, sizeof(int));
-                assert(length1 >= 0 && length1 <= 4);
+                size_t length1 = UTF8::convertCodePointToUTF8(code_point1, pos, sizeof(int));
+                assert(length1 <= 4);
                 pos += length1;
 
-                auto length2 = UTF8::convert(code_point2, pos, sizeof(int));
-                assert(length2 >= 0 && length2 <= 4);
+                size_t length2 = UTF8::convertCodePointToUTF8(code_point2, pos, sizeof(int));
+                assert(length2 <= 4);
                 last_writen_bytes = length2;
                 pos += last_writen_bytes;
             }

From 921ee52ebcf09e214b7a78034abd8382d0180d71 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 05:47:59 +0300
Subject: [PATCH 0423/1238] More tests

---
 tests/queries/0_stateless/01674_unicode_asan.reference | 1 +
 tests/queries/0_stateless/01674_unicode_asan.sql       | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/01674_unicode_asan.reference b/tests/queries/0_stateless/01674_unicode_asan.reference
index 573541ac970..aa47d0d46d4 100644
--- a/tests/queries/0_stateless/01674_unicode_asan.reference
+++ b/tests/queries/0_stateless/01674_unicode_asan.reference
@@ -1 +1,2 @@
 0
+0
diff --git a/tests/queries/0_stateless/01674_unicode_asan.sql b/tests/queries/0_stateless/01674_unicode_asan.sql
index d34ff23be48..2ba34b46f93 100644
--- a/tests/queries/0_stateless/01674_unicode_asan.sql
+++ b/tests/queries/0_stateless/01674_unicode_asan.sql
@@ -1,2 +1,3 @@
 SELECT positionCaseInsensitiveUTF8('иголка.ру', 'иголка.р�\0') AS res;
 SELECT positionCaseInsensitiveUTF8('иголка.ру', randomString(rand() % 100)) FROM system.numbers; -- { serverError 2 }
+SELECT sum(ignore(positionCaseInsensitiveUTF8('иголка.ру', randomString(rand() % 2)))) FROM numbers(1000000);

From 0128d185eaa9fc16c4067a59c534680f62084285 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 06:49:33 +0300
Subject: [PATCH 0424/1238] Fix UBSan report in DateTimeAddInterval

---
 .../FunctionDateOrDateTimeAddInterval.h        | 18 ++++++------------
 .../01680_date_time_add_ubsan.reference        |  0
 .../0_stateless/01680_date_time_add_ubsan.sql  |  1 +
 3 files changed, 7 insertions(+), 12 deletions(-)
 create mode 100644 tests/queries/0_stateless/01680_date_time_add_ubsan.reference
 create mode 100644 tests/queries/0_stateless/01680_date_time_add_ubsan.sql

diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h
index b1d04fd60f0..5f964b899b4 100644
--- a/src/Functions/FunctionDateOrDateTimeAddInterval.h
+++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h
@@ -68,12 +68,12 @@ struct AddSecondsImpl : public AddOnDateTime64DefaultImpl<AddSecondsImpl>
 
     static constexpr auto name = "addSeconds";
 
-    static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
     {
         return t + delta;
     }
 
-    static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
     {
         return time_zone.fromDayNum(DayNum(d)) + delta;
     }
@@ -92,7 +92,7 @@ struct AddMinutesImpl : public AddOnDateTime64DefaultImpl<AddMinutesImpl>
         return t + delta * 60;
     }
 
-    static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
     {
         return time_zone.fromDayNum(DayNum(d)) + delta * 60;
     }
@@ -111,7 +111,7 @@ struct AddHoursImpl : public AddOnDateTime64DefaultImpl<AddHoursImpl>
         return t + delta * 3600;
     }
 
-    static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
     {
         return time_zone.fromDayNum(DayNum(d)) + delta * 3600;
     }
@@ -125,18 +125,12 @@ struct AddDaysImpl : public AddOnDateTime64DefaultImpl<AddDaysImpl>
 
     static constexpr auto name = "addDays";
 
-//    static inline UInt32 execute(UInt64 t, Int64 delta, const DateLUTImpl & time_zone)
-//    {
-//        // TODO (nemkov): LUT does not support out-of range date values for now.
-//        return time_zone.addDays(t, delta);
-//    }
-
     static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
     {
         return time_zone.addDays(t, delta);
     }
 
-    static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
+    static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
     {
         return d + delta;
     }
@@ -155,7 +149,7 @@ struct AddWeeksImpl : public AddOnDateTime64DefaultImpl<AddWeeksImpl>
         return time_zone.addWeeks(t, delta);
     }
 
-    static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
+    static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
     {
         return d + delta * 7;
     }
diff --git a/tests/queries/0_stateless/01680_date_time_add_ubsan.reference b/tests/queries/0_stateless/01680_date_time_add_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01680_date_time_add_ubsan.sql b/tests/queries/0_stateless/01680_date_time_add_ubsan.sql
new file mode 100644
index 00000000000..2ded70e29f8
--- /dev/null
+++ b/tests/queries/0_stateless/01680_date_time_add_ubsan.sql
@@ -0,0 +1 @@
+SELECT DISTINCT result FROM (SELECT toStartOfFifteenMinutes(toDateTime(toStartOfFifteenMinutes(toDateTime(1000.0001220703125) + (number * 65536))) + (number * 9223372036854775807)) AS result FROM system.numbers LIMIT 1048576) ORDER BY result DESC NULLS FIRST FORMAT Null;

From ae65219525e2889dba2fad24d449b16ffa3f0adc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 06:56:39 +0300
Subject: [PATCH 0425/1238] More tests

---
 tests/queries/0_stateless/01680_date_time_add_ubsan.reference | 1 +
 tests/queries/0_stateless/01680_date_time_add_ubsan.sql       | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/01680_date_time_add_ubsan.reference b/tests/queries/0_stateless/01680_date_time_add_ubsan.reference
index e69de29bb2d..dec7d2fabd2 100644
--- a/tests/queries/0_stateless/01680_date_time_add_ubsan.reference
+++ b/tests/queries/0_stateless/01680_date_time_add_ubsan.reference
@@ -0,0 +1 @@
+\N
diff --git a/tests/queries/0_stateless/01680_date_time_add_ubsan.sql b/tests/queries/0_stateless/01680_date_time_add_ubsan.sql
index 2ded70e29f8..f4690116e1a 100644
--- a/tests/queries/0_stateless/01680_date_time_add_ubsan.sql
+++ b/tests/queries/0_stateless/01680_date_time_add_ubsan.sql
@@ -1 +1,2 @@
 SELECT DISTINCT result FROM (SELECT toStartOfFifteenMinutes(toDateTime(toStartOfFifteenMinutes(toDateTime(1000.0001220703125) + (number * 65536))) + (number * 9223372036854775807)) AS result FROM system.numbers LIMIT 1048576) ORDER BY result DESC NULLS FIRST FORMAT Null;
+SELECT round(round(round(round(round(100)), round(round(round(round(NULL), round(65535)), toTypeName(now() + 9223372036854775807) LIKE 'DateTime%DateTime%DateTime%DateTime%', round(-2)), 255), round(NULL))));

From e5736c4c8debf91541919c27eef6c5f4fc442728 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 07:23:07 +0300
Subject: [PATCH 0426/1238] Fix debug assertion in Hyperscan

---
 contrib/hyperscan                                           | 2 +-
 src/Functions/Regexps.h                                     | 1 -
 .../0_stateless/01681_hyperscan_debug_assertion.reference   | 0
 .../queries/0_stateless/01681_hyperscan_debug_assertion.sql | 6 ++++++
 4 files changed, 7 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/01681_hyperscan_debug_assertion.reference
 create mode 100644 tests/queries/0_stateless/01681_hyperscan_debug_assertion.sql

diff --git a/contrib/hyperscan b/contrib/hyperscan
index 3907fd00ee8..e9f08df0213 160000
--- a/contrib/hyperscan
+++ b/contrib/hyperscan
@@ -1 +1 @@
-Subproject commit 3907fd00ee8b2538739768fa9533f8635a276531
+Subproject commit e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa
diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h
index d9df4218056..11f3e31e22e 100644
--- a/src/Functions/Regexps.h
+++ b/src/Functions/Regexps.h
@@ -168,7 +168,6 @@ namespace MultiRegexps
         hs_database_t * db = nullptr;
         hs_compile_error_t * compile_error;
 
-
         std::unique_ptr<unsigned int[]> ids;
 
         /// We mark the patterns to provide the callback results.
diff --git a/tests/queries/0_stateless/01681_hyperscan_debug_assertion.reference b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sql b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sql
new file mode 100644
index 00000000000..ac88c58b30f
--- /dev/null
+++ b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sql
@@ -0,0 +1,6 @@
+-- We throw our own exception from operator new.
+-- In previous versions of Hyperscan it triggered debug assertion as it only expected std::bad_alloc.
+
+SET allow_hyperscan = 1;
+SET max_memory_usage = 4000000;
+SELECT [1, 2, 3, 11] = arraySort(multiMatchAllIndices('фабрикант', ['', 'рикан', 'а', 'f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]'])); -- { serverError 241 }

From 40a356db743a040b0877eb5ee895e0c58f5ba677 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 07:40:51 +0300
Subject: [PATCH 0427/1238] Fix warning

---
 src/Common/Volnitsky.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h
index d4b514022ed..c674015fba9 100644
--- a/src/Common/Volnitsky.h
+++ b/src/Common/Volnitsky.h
@@ -175,7 +175,7 @@ namespace VolnitskyTraits
                     else
                     {
                         /// where is the given ngram in respect to the start of UTF-8 sequence?
-                        const auto seq_ngram_offset = pos - seq_pos;
+                        size_t seq_ngram_offset = pos - seq_pos;
 
                         Seq seq;
 
@@ -202,7 +202,7 @@ namespace VolnitskyTraits
                 auto first_seq_pos = pos;
                 UTF8::syncBackward(first_seq_pos, begin);
                 /// where is the given ngram in respect to the start of first UTF-8 sequence?
-                const auto seq_ngram_offset = pos - first_seq_pos;
+                size_t seq_ngram_offset = pos - first_seq_pos;
 
                 auto first_u32 = UTF8::convertUTF8ToCodePoint(first_seq_pos, end - first_seq_pos);
                 int first_l_u32 = 0;

From ddd4a7b1b50eba54fc24514b1eb8ec608137e152 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 08:33:41 +0300
Subject: [PATCH 0428/1238] Add exception for fast test

---
 docker/test/fasttest/run.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 7555b5591d0..17cec7ae286 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -251,8 +251,12 @@ function run_tests
         00701_rollup
         00834_cancel_http_readonly_queries_on_client_close
         00911_tautological_compare
+
+        # Hyperscan
         00926_multimatch
         00929_multi_match_edit_distance
+        01681_hyperscan_debug_assertion
+
         01031_mutations_interpreter_and_context
         01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
         01083_expressions_in_engine_arguments

From 248540e58f2a1b5a71cc9bd955a8f94969b50c94 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 10:10:37 +0300
Subject: [PATCH 0429/1238] Fix UBSan report in GatherUtils

---
 src/Functions/GatherUtils/Algorithms.h                       | 4 ++--
 tests/queries/0_stateless/01682_gather_utils_ubsan.reference | 0
 tests/queries/0_stateless/01682_gather_utils_ubsan.sql       | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/01682_gather_utils_ubsan.reference
 create mode 100644 tests/queries/0_stateless/01682_gather_utils_ubsan.sql

diff --git a/src/Functions/GatherUtils/Algorithms.h b/src/Functions/GatherUtils/Algorithms.h
index 616257493eb..de0b1763afd 100644
--- a/src/Functions/GatherUtils/Algorithms.h
+++ b/src/Functions/GatherUtils/Algorithms.h
@@ -711,7 +711,7 @@ void resizeDynamicSize(ArraySource && array_source, ValueSource && value_source,
             }
             else
             {
-                auto length = static_cast<size_t>(-size);
+                auto length = -static_cast<size_t>(size);
                 if (length > MAX_ARRAY_SIZE)
                     throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
                         length, MAX_ARRAY_SIZE);
@@ -760,7 +760,7 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source
         }
         else
         {
-            auto length = static_cast<size_t>(-size);
+            auto length = -static_cast<size_t>(size);
             if (length > MAX_ARRAY_SIZE)
                 throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
                     length, MAX_ARRAY_SIZE);
diff --git a/tests/queries/0_stateless/01682_gather_utils_ubsan.reference b/tests/queries/0_stateless/01682_gather_utils_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01682_gather_utils_ubsan.sql b/tests/queries/0_stateless/01682_gather_utils_ubsan.sql
new file mode 100644
index 00000000000..2388586e8fe
--- /dev/null
+++ b/tests/queries/0_stateless/01682_gather_utils_ubsan.sql
@@ -0,0 +1 @@
+SELECT arrayResize([1, 2, 3], -9223372036854775808); -- { serverError 128 }

From e23dc60e31f5856b4b8f910fdbaba6b885b715d1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 10:12:17 +0300
Subject: [PATCH 0430/1238] Fix UBSan report in GatherUtils

---
 src/Functions/GatherUtils/Algorithms.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Functions/GatherUtils/Algorithms.h b/src/Functions/GatherUtils/Algorithms.h
index de0b1763afd..e174261d76e 100644
--- a/src/Functions/GatherUtils/Algorithms.h
+++ b/src/Functions/GatherUtils/Algorithms.h
@@ -465,7 +465,7 @@ std::vector<size_t> buildKMPPrefixFunction(const SliceType & pattern, const Equa
     for (size_t i = 1; i < pattern.size; ++i)
     {
         result[i] = 0;
-        for (auto length = i; length > 0;)
+        for (size_t length = i; length > 0;)
         {
             length = result[length - 1];
             if (isEqualFunc(pattern, i, length))
@@ -695,7 +695,7 @@ void resizeDynamicSize(ArraySource && array_source, ValueSource && value_source,
 
             if (size >= 0)
             {
-                auto length = static_cast<size_t>(size);
+                size_t length = static_cast<size_t>(size);
                 if (length > MAX_ARRAY_SIZE)
                     throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
                         length, MAX_ARRAY_SIZE);
@@ -711,7 +711,7 @@ void resizeDynamicSize(ArraySource && array_source, ValueSource && value_source,
             }
             else
             {
-                auto length = -static_cast<size_t>(size);
+                size_t length = -static_cast<size_t>(size);
                 if (length > MAX_ARRAY_SIZE)
                     throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
                         length, MAX_ARRAY_SIZE);
@@ -744,7 +744,7 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source
 
         if (size >= 0)
         {
-            auto length = static_cast<size_t>(size);
+            size_t length = static_cast<size_t>(size);
             if (length > MAX_ARRAY_SIZE)
                 throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
                     length, MAX_ARRAY_SIZE);
@@ -760,7 +760,7 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source
         }
         else
         {
-            auto length = -static_cast<size_t>(size);
+            size_t length = -static_cast<size_t>(size);
             if (length > MAX_ARRAY_SIZE)
                 throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
                     length, MAX_ARRAY_SIZE);

From d604cbc9863d87f59ae25abc2c0a6773bc599b89 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 10:41:24 +0300
Subject: [PATCH 0431/1238] Maybe improve server latency

---
 src/Server/TCPHandler.h        | 5 +++--
 src/Server/TCPHandlerFactory.h | 4 +++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index e12e9bcf4d0..0d3109a6591 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -109,15 +109,16 @@ public:
       *  because it allows to check the IP ranges of the trusted proxy.
       * Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP.
       */
-    TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_)
+    TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_,
+        std::string server_display_name_)
         : Poco::Net::TCPServerConnection(socket_)
         , server(server_)
         , parse_proxy_protocol(parse_proxy_protocol_)
         , log(&Poco::Logger::get("TCPHandler"))
         , connection_context(server.context())
         , query_context(server.context())
+        , server_display_name(std::move(server_display_name_))
     {
-        server_display_name = server.config().getString("display_name", getFQDNOrHostName());
     }
 
     void run() override;
diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h
index 73318fea9da..fb50333193c 100644
--- a/src/Server/TCPHandlerFactory.h
+++ b/src/Server/TCPHandlerFactory.h
@@ -17,6 +17,7 @@ private:
     IServer & server;
     bool parse_proxy_protocol = false;
     Poco::Logger * log;
+    std::string server_display_name;
 
     class DummyTCPHandler : public Poco::Net::TCPServerConnection
     {
@@ -34,6 +35,7 @@ public:
         : server(server_), parse_proxy_protocol(parse_proxy_protocol_)
         , log(&Poco::Logger::get(std::string("TCP") + (secure_ ? "S" : "") + "HandlerFactory"))
     {
+        server_display_name = server.config().getString("display_name", getFQDNOrHostName());
     }
 
     Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override
@@ -42,7 +44,7 @@ public:
         {
             LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString());
 
-            return new TCPHandler(server, socket, parse_proxy_protocol);
+            return new TCPHandler(server, socket, parse_proxy_protocol, server_display_name);
         }
         catch (const Poco::Net::NetException &)
         {

From 01a703ae5028d966e7af8d5cbee35255ee99d55e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 10:48:18 +0300
Subject: [PATCH 0432/1238] Update test

---
 src/Client/tests/test_connect.cpp | 58 ++++++++++++++-----------------
 1 file changed, 26 insertions(+), 32 deletions(-)

diff --git a/src/Client/tests/test_connect.cpp b/src/Client/tests/test_connect.cpp
index 1259980f9a6..1b98b936a52 100644
--- a/src/Client/tests/test_connect.cpp
+++ b/src/Client/tests/test_connect.cpp
@@ -7,8 +7,10 @@
 #include <atomic>
 #include <Poco/Net/StreamSocket.h>
 #include <Common/Exception.h>
-#include <Common/Stopwatch.h>
+#include <Common/ShellCommand.h>
 #include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromFileDescriptor.h>
+#include <IO/copyData.h>
 
 
 /** In a loop it connects to the server and immediately breaks the connection.
@@ -18,22 +20,26 @@
 int main(int argc, char ** argv)
 try
 {
+    using namespace DB;
+
     size_t num_iterations = 1;
     size_t num_threads = 1;
     std::string host = "localhost";
     uint16_t port = 9000;
 
     if (argc >= 2)
-        num_iterations = DB::parse<size_t>(argv[1]);
+        num_iterations = parse<size_t>(argv[1]);
 
     if (argc >= 3)
-        num_threads = DB::parse<size_t>(argv[2]);
+        num_threads = parse<size_t>(argv[2]);
 
     if (argc >= 4)
         host = argv[3];
 
     if (argc >= 5)
-        port = DB::parse<uint16_t>(argv[4]);
+        port = parse<uint16_t>(argv[4]);
+
+    WriteBufferFromFileDescriptor out(STDERR_FILENO);
 
     std::atomic_bool cancel{false};
     std::vector<std::thread> threads(num_threads);
@@ -45,44 +51,32 @@ try
             {
                 std::cerr << ".";
 
-                Poco::Net::SocketAddress address(host, port);
-
-                int fd = socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
-
-                if (fd < 0)
-                    DB::throwFromErrno("Cannot create socket", 0);
-
-                linger linger_value;
-                linger_value.l_onoff = 1;
-                linger_value.l_linger = 0;
-
-                if (0 != setsockopt(fd, SOL_SOCKET, SO_LINGER, &linger_value, sizeof(linger_value)))
-                    DB::throwFromErrno("Cannot set linger", 0);
-
                 try
                 {
-                    Stopwatch watch;
+                    Poco::Net::SocketAddress address(host, port);
+                    Poco::Net::StreamSocket socket;
+                    //socket.setLinger(1, 0);
 
-                    int res = connect(fd, address.addr(), address.length());
-
-                    if (res != 0 && errno != EINPROGRESS && errno != EWOULDBLOCK)
+                    socket.connectNB(address);
+                    if (!socket.poll(Poco::Timespan(1000000),
+                        Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_WRITE | Poco::Net::Socket::SELECT_ERROR))
                     {
-                        close(fd);
-                        DB::throwFromErrno("Cannot connect", 0);
-                    }
+                        /// Allow to debug the server.
+/*                        auto command = ShellCommand::execute("kill -STOP $(pidof clickhouse-server)");
+                        copyData(command->err, out);
+                        copyData(command->out, out);
+                        command->wait();*/
 
-                    close(fd);
-
-                    if (watch.elapsedSeconds() > 0.1)
-                    {
-                        std::cerr << watch.elapsedSeconds() << "\n";
-                        cancel = true;
-                        break;
+                        std::cerr << "Timeout\n";
+/*                        cancel = true;
+                        break;*/
                     }
                 }
                 catch (const Poco::Exception & e)
                 {
                     std::cerr << e.displayText() << "\n";
+                    cancel = true;
+                    break;
                 }
             }
         });

From 246ed24801cf9b06bda21ffac83835cbda26d6b9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 11:23:55 +0300
Subject: [PATCH 0433/1238] Update test

---
 .../01681_hyperscan_debug_assertion.reference |  1 +
 .../01681_hyperscan_debug_assertion.sh        | 23 +++++++++++++++++++
 .../01681_hyperscan_debug_assertion.sql       |  6 -----
 3 files changed, 24 insertions(+), 6 deletions(-)
 create mode 100755 tests/queries/0_stateless/01681_hyperscan_debug_assertion.sh
 delete mode 100644 tests/queries/0_stateless/01681_hyperscan_debug_assertion.sql

diff --git a/tests/queries/0_stateless/01681_hyperscan_debug_assertion.reference b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.reference
index e69de29bb2d..7326d960397 100644
--- a/tests/queries/0_stateless/01681_hyperscan_debug_assertion.reference
+++ b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.reference
@@ -0,0 +1 @@
+Ok
diff --git a/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sh b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sh
new file mode 100755
index 00000000000..0bbf8942c1a
--- /dev/null
+++ b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# We throw our own exception from operator new.
+# In previous versions of Hyperscan it triggered debug assertion as it only expected std::bad_alloc.
+
+M=1000000
+
+while true
+do
+    $CLICKHOUSE_CLIENT --allow_hyperscan 1 --max_memory_usage $M --format Null --query "
+        SELECT [1, 2, 3, 11] = arraySort(multiMatchAllIndices('фабрикант', ['', 'рикан', 'а', 'f[a${RANDOM}e]b[ei]rl', 'ф[иа${RANDOM}эе]б[еэи][рпл]', 'афиукд', 'a[f${RANDOM}t],th', '^ф[аие${RANDOM}э]?б?[еэи]?$', 'бе${RANDOM}рлик', 'fa${RANDOM}b', 'фа[беьв]+е?[рл${RANDOM}ко]']))
+    " 2>&1 | grep -q 'Memory limit' || break;
+
+    M=$((M + 100000))
+done
+
+echo 'Ok'
diff --git a/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sql b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sql
deleted file mode 100644
index ac88c58b30f..00000000000
--- a/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sql
+++ /dev/null
@@ -1,6 +0,0 @@
--- We throw our own exception from operator new.
--- In previous versions of Hyperscan it triggered debug assertion as it only expected std::bad_alloc.
-
-SET allow_hyperscan = 1;
-SET max_memory_usage = 4000000;
-SELECT [1, 2, 3, 11] = arraySort(multiMatchAllIndices('фабрикант', ['', 'рикан', 'а', 'f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]'])); -- { serverError 241 }

From 5b7c437e6d800fd1297d161a923e8a8efc6aaa6d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 11:24:37 +0300
Subject: [PATCH 0434/1238] Minor modification

---
 src/Parsers/ASTSelectQuery.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 42bea0107d1..aa5508bf190 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -307,10 +307,11 @@ bool ASTSelectQuery::final() const
 
 bool ASTSelectQuery::withFill() const
 {
-    if (!orderBy())
+    const ASTPtr order_by = orderBy();
+    if (!order_by)
         return false;
 
-    for (const auto & order_expression_element : orderBy()->children)
+    for (const auto & order_expression_element : order_by->children)
         if (order_expression_element->as<ASTOrderByElement &>().with_fill)
             return true;
 

From c39ee3f58c85de0f2a7b609377e2f138305dc745 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sun, 31 Jan 2021 16:27:46 +0800
Subject: [PATCH 0435/1238] Fix argMinMaxIf crash

---
 src/AggregateFunctions/IAggregateFunction.h   | 29 ++++++++++++-------
 .../01681_arg_min_max_if_fix.reference        |  1 +
 .../0_stateless/01681_arg_min_max_if_fix.sql  |  1 +
 3 files changed, 20 insertions(+), 11 deletions(-)
 create mode 100644 tests/queries/0_stateless/01681_arg_min_max_if_fix.reference
 create mode 100644 tests/queries/0_stateless/01681_arg_min_max_if_fix.sql

diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h
index 4c76ccf7720..c84be1a6b5b 100644
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@@ -513,7 +513,7 @@ private:
     }
 
 protected:
-    void extractColumns(const IColumn ** columns, const IColumn ** aggr_columns) const
+    ssize_t extractColumns(const IColumn ** columns, const IColumn ** aggr_columns, ssize_t if_argument_pos) const
     {
         if (tuple_argument)
         {
@@ -526,6 +526,13 @@ protected:
             for (size_t i = 0; i < args_count; ++i)
                 columns[i] = aggr_columns[i];
         }
+        if (if_argument_pos >= 0)
+        {
+            columns[args_count] = aggr_columns[if_argument_pos];
+            return args_count;
+        }
+        else
+            return -1;
     }
 
     bool tuple_argument;
@@ -551,8 +558,8 @@ public:
         Arena * arena,
         ssize_t if_argument_pos = -1) const override
     {
-        const IColumn * ex_columns[args_count];
-        extractColumns(ex_columns, columns);
+        const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
+        if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
 
         Base::addBatch(batch_size, places, place_offset, ex_columns, arena, if_argument_pos);
     }
@@ -560,8 +567,8 @@ public:
     void addBatchSinglePlace(
         size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const override
     {
-        const IColumn * ex_columns[args_count];
-        extractColumns(ex_columns, columns);
+        const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
+        if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
 
         Base::addBatchSinglePlace(batch_size, place, ex_columns, arena, if_argument_pos);
     }
@@ -574,8 +581,8 @@ public:
         Arena * arena,
         ssize_t if_argument_pos = -1) const override
     {
-        const IColumn * ex_columns[args_count];
-        extractColumns(ex_columns, columns);
+        const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
+        if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
 
         Base::addBatchSinglePlaceNotNull(batch_size, place, ex_columns, null_map, arena, if_argument_pos);
     }
@@ -584,8 +591,8 @@ public:
         size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1)
         const override
     {
-        const IColumn * ex_columns[args_count];
-        extractColumns(ex_columns, columns);
+        const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
+        if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
 
         Base::addBatchSinglePlaceFromInterval(batch_begin, batch_end, place, ex_columns, arena, if_argument_pos);
     }
@@ -595,7 +602,7 @@ public:
         const override
     {
         const IColumn * ex_columns[args_count];
-        extractColumns(ex_columns, columns);
+        extractColumns(ex_columns, columns, -1);
 
         Base::addBatchArray(batch_size, places, place_offset, ex_columns, offsets, arena);
     }
@@ -610,7 +617,7 @@ public:
         Arena * arena) const override
     {
         const IColumn * ex_columns[args_count];
-        extractColumns(ex_columns, columns);
+        extractColumns(ex_columns, columns, -1);
 
         Base::addBatchLookupTable8(batch_size, map, place_offset, init, key, ex_columns, arena);
     }
diff --git a/tests/queries/0_stateless/01681_arg_min_max_if_fix.reference b/tests/queries/0_stateless/01681_arg_min_max_if_fix.reference
new file mode 100644
index 00000000000..75a0b4104b3
--- /dev/null
+++ b/tests/queries/0_stateless/01681_arg_min_max_if_fix.reference
@@ -0,0 +1 @@
+0	0	2
diff --git a/tests/queries/0_stateless/01681_arg_min_max_if_fix.sql b/tests/queries/0_stateless/01681_arg_min_max_if_fix.sql
new file mode 100644
index 00000000000..b0aab898536
--- /dev/null
+++ b/tests/queries/0_stateless/01681_arg_min_max_if_fix.sql
@@ -0,0 +1 @@
+SELECT bitAnd(number, toUInt64(pow(257, 20) - 1048576)) AS k, argMaxIf(k, if((number % 255) = 256, toInt256(65535), number), number > 42), uniq(number) AS u FROM numbers(2) GROUP BY toInt256(-2, NULL), k;

From 18f3df31d55b4a0c1ba6162d2ecbc39cc412b215 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sun, 31 Jan 2021 14:22:57 +0800
Subject: [PATCH 0436/1238] Fix crash when pushing down predicates to union
 distinct subquery

---
 src/Interpreters/PredicateRewriteVisitor.cpp    | 17 ++++++++++++-----
 ...e_pushdown_union_distinct_subquery.reference |  1 +
 ...edicate_pushdown_union_distinct_subquery.sql |  1 +
 3 files changed, 14 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.reference
 create mode 100644 tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.sql

diff --git a/src/Interpreters/PredicateRewriteVisitor.cpp b/src/Interpreters/PredicateRewriteVisitor.cpp
index 5773629d0d1..c567e769005 100644
--- a/src/Interpreters/PredicateRewriteVisitor.cpp
+++ b/src/Interpreters/PredicateRewriteVisitor.cpp
@@ -26,11 +26,18 @@ void PredicateRewriteVisitorData::visit(ASTSelectWithUnionQuery & union_select_q
 {
     auto & internal_select_list = union_select_query.list_of_selects->children;
 
-    if (!internal_select_list.empty())
-        visitFirstInternalSelect(*internal_select_list[0]->as<ASTSelectQuery>(), internal_select_list[0]);
-
-    for (size_t index = 1; index < internal_select_list.size(); ++index)
-        visitOtherInternalSelect(*internal_select_list[index]->as<ASTSelectQuery>(), internal_select_list[index]);
+    for (size_t index = 0; index < internal_select_list.size(); ++index)
+    {
+        if (auto child_union = internal_select_list[index]->as<ASTSelectWithUnionQuery>())
+            visit(*child_union, internal_select_list[index]);
+        else
+        {
+            if (index == 0)
+                visitFirstInternalSelect(*internal_select_list[0]->as<ASTSelectQuery>(), internal_select_list[0]);
+            else
+                visitOtherInternalSelect(*internal_select_list[index]->as<ASTSelectQuery>(), internal_select_list[index]);
+        }
+    }
 }
 
 void PredicateRewriteVisitorData::visitFirstInternalSelect(ASTSelectQuery & select_query, ASTPtr &)
diff --git a/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.reference b/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.sql b/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.sql
new file mode 100644
index 00000000000..181e7109cd4
--- /dev/null
+++ b/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.sql
@@ -0,0 +1 @@
+SELECT count() FROM (SELECT 2000 AS d_year UNION DISTINCT SELECT 2000 AS d_year) WHERE d_year = 2002

From 978f23e3437d0d0eb64727e87ffe1b80289c9fc2 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 31 Jan 2021 11:23:52 +0300
Subject: [PATCH 0437/1238] Fix SIGSEGV due to accessing
 GRPCServer::currentConnections() before initialization

This PR makes asynchronous metrics available just after start, and this
breaks GRPCServer, since it requires the server be started before
accessing currentConnections().

However it is possible to trigger the same SIGSEGV even without that
patch, with some timeouts during previous server initilizations and
small asynchronous_metrics_update_period_s.

Fix this by creating GRPCServer::Runner in the ctor.

Stacktrace:
    26.842505 [ 7 ] {} <Information> Application: Listening for MySQL compatibility protocol: 0.0.0.0:9004
    26.842562 [ 7 ] {} <Information> Application: Listening for gRPC protocol: 0.0.0.0:9100
    26.842600 [ 7 ] {} <Debug> AsynchronousMetrics: MemoryTracking: was 350.72 KiB, peak 5.25 MiB, will set to 232.63 MiB (RSS), difference: 232.29 MiB
    26.842834 [ 8 ] {} <Trace> BaseDaemon: Received signal 11
    26.843014 [ 39 ] {} <Fatal> BaseDaemon: ########################################
    26.843055 [ 39 ] {} <Fatal> BaseDaemon: (version 21.2.1.5858, build id: B27D5550AC34F9091BC4437D8021B752EDB34FBB) (from thread 7) (no query) Received signal Segmentation fault (11)
    26.843085 [ 39 ] {} <Fatal> BaseDaemon: Address: 0x78 Access: read. Address not mapped to object.
    26.843104 [ 39 ] {} <Fatal> BaseDaemon: Stack trace: 0x7fe8c4e6afc4 0x13683cf1 0xf52efe5 0xe7c9137 0x85596e1 0x8553635 0x11b7a313 0x8545ebc 0x8544b25 0x84e1cbe 0x7fe8c4c940b3 0x84ac22e
    26.843136 [ 39 ] {} <Fatal> BaseDaemon: 2. pthread_mutex_lock @ 0xbfc4 in /usr/lib/x86_64-linux-gnu/libpthread-2.31.so
    26.843162 [ 39 ] {} <Fatal> BaseDaemon: 3. std::__1::mutex::lock() @ 0x13683cf1 in ?
    26.843188 [ 39 ] {} <Fatal> BaseDaemon: 4. DB::GRPCServer::currentConnections() const @ 0xf52efe5 in /usr/bin/clickhouse
    26.843204 [ 39 ] {} <Fatal> BaseDaemon: 5. DB::AsynchronousMetrics::update() @ 0xe7c9137 in /usr/bin/clickhouse
    26.843228 [ 39 ] {} <Fatal> BaseDaemon: 6. DB::AsynchronousMetrics::start() @ 0x85596e1 in /usr/bin/clickhouse
    26.843248 [ 39 ] {} <Fatal> BaseDaemon: 7. DB::Server::main(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > const&) @ 0x8553635 in /usr/bin/clickhouse
    26.843270 [ 39 ] {} <Fatal> BaseDaemon: 8. Poco::Util::Application::run() @ 0x11b7a313 in /usr/bin/clickhouse
    26.843284 [ 39 ] {} <Fatal> BaseDaemon: 9. DB::Server::run() @ 0x8545ebc in /usr/bin/clickhouse
    26.843299 [ 39 ] {} <Fatal> BaseDaemon: 10. mainEntryClickHouseServer(int, char**) @ 0x8544b25 in /usr/bin/clickhouse
    26.843313 [ 39 ] {} <Fatal> BaseDaemon: 11. main @ 0x84e1cbe in /usr/bin/clickhouse
    26.843331 [ 39 ] {} <Fatal> BaseDaemon: 12. __libc_start_main @ 0x270b3 in /usr/lib/x86_64-linux-gnu/libc-2.31.so
    26.843346 [ 39 ] {} <Fatal> BaseDaemon: 13. _start @ 0x84ac22e in /usr/bin/clickhouse
---
 src/Server/GRPCServer.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp
index 475bfc81801..c3492e9ea8a 100644
--- a/src/Server/GRPCServer.cpp
+++ b/src/Server/GRPCServer.cpp
@@ -1613,7 +1613,10 @@ private:
 
 
 GRPCServer::GRPCServer(IServer & iserver_, const Poco::Net::SocketAddress & address_to_listen_)
-    : iserver(iserver_), address_to_listen(address_to_listen_), log(&Poco::Logger::get("GRPCServer"))
+    : iserver(iserver_)
+    , address_to_listen(address_to_listen_)
+    , log(&Poco::Logger::get("GRPCServer"))
+    , runner(std::make_unique<Runner>(*this))
 {}
 
 GRPCServer::~GRPCServer()
@@ -1644,7 +1647,6 @@ void GRPCServer::start()
 
     queue = builder.AddCompletionQueue();
     grpc_server = builder.BuildAndStart();
-    runner = std::make_unique<Runner>(*this);
     runner->start();
 }
 

From 37f88a1468a62525bef67849cc82bf883b7fd67d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 12:02:54 +0300
Subject: [PATCH 0438/1238] Whitespace

---
 src/Functions/Regexps.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h
index d9df4218056..11f3e31e22e 100644
--- a/src/Functions/Regexps.h
+++ b/src/Functions/Regexps.h
@@ -168,7 +168,6 @@ namespace MultiRegexps
         hs_database_t * db = nullptr;
         hs_compile_error_t * compile_error;
 
-
         std::unique_ptr<unsigned int[]> ids;
 
         /// We mark the patterns to provide the callback results.

From b74b76f681408b0a7a86d6e3c2c3699c80f65b5f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 12:36:55 +0300
Subject: [PATCH 0439/1238] clickhouse-benchmark: add --reconnect option

---
 programs/benchmark/Benchmark.cpp | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp
index ae1d16ce402..a0e2ea155ba 100644
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@@ -62,12 +62,12 @@ public:
             bool randomize_, size_t max_iterations_, double max_time_,
             const String & json_path_, size_t confidence_,
             const String & query_id_, const String & query_to_execute_, bool continue_on_errors_,
-            bool print_stacktrace_, const Settings & settings_)
+            bool reconnect_, bool print_stacktrace_, const Settings & settings_)
         :
         concurrency(concurrency_), delay(delay_), queue(concurrency), randomize(randomize_),
         cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_),
         json_path(json_path_), confidence(confidence_), query_id(query_id_),
-        query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_),
+        query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_), reconnect(reconnect_),
         print_stacktrace(print_stacktrace_), settings(settings_),
         shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())),
         pool(concurrency)
@@ -155,6 +155,7 @@ private:
     String query_id;
     String query_to_execute;
     bool continue_on_errors;
+    bool reconnect;
     bool print_stacktrace;
     const Settings & settings;
     SharedContextHolder shared_context;
@@ -404,9 +405,14 @@ private:
     void execute(EntryPtrs & connection_entries, Query & query, size_t connection_index)
     {
         Stopwatch watch;
+
+        Connection & connection = **connection_entries[connection_index];
+
+        if (reconnect)
+            connection.disconnect();
+
         RemoteBlockInputStream stream(
-            *(*connection_entries[connection_index]),
-            query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage);
+            connection, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage);
         if (!query_id.empty())
             stream.setQueryId(query_id);
 
@@ -589,6 +595,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
             ("confidence",    value<size_t>()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)")
             ("query_id",      value<std::string>()->default_value(""),         "")
             ("continue_on_errors", "continue testing even if a query fails")
+            ("reconnect", "establish new connection for every query")
         ;
 
         Settings settings;
@@ -638,7 +645,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
             options["confidence"].as<size_t>(),
             options["query_id"].as<std::string>(),
             options["query"].as<std::string>(),
-            options.count("continue_on_errors") > 0,
+            options.count("continue_on_errors"),
+            options.count("reconnect"),
             print_stacktrace,
             settings);
         return benchmark.run();

From 11f144f5890984fac18d571fe9f3da6e70039214 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 13:02:35 +0300
Subject: [PATCH 0440/1238] Fix deadlock in system.text_log

---
 src/Interpreters/SystemLog.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 6c56565a152..101bc752f43 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -8,6 +8,7 @@
 #include <condition_variable>
 #include <boost/noncopyable.hpp>
 #include <common/logger_useful.h>
+#include <ext/scope_guard.h>
 #include <common/types.h>
 #include <Core/Defines.h>
 #include <Storages/IStorage.h>
@@ -229,9 +230,18 @@ void SystemLog<LogElement>::startup()
 }
 
 
+static thread_local bool recursive_add_call = false;
+
 template <typename LogElement>
 void SystemLog<LogElement>::add(const LogElement & element)
 {
+    /// It is possible that the method will be called recursively.
+    /// Better to drop these events to avoid complications.
+    if (recursive_add_call)
+        return;
+    recursive_add_call = true;
+    SCOPE_EXIT({ recursive_add_call = false; });
+
     /// Memory can be allocated while resizing on queue.push_back.
     /// The size of allocation can be in order of a few megabytes.
     /// But this should not be accounted for query memory usage.

From 051e4832b998909ef47b6649a8548954583de3da Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 13:05:00 +0300
Subject: [PATCH 0441/1238] Add a test

---
 .../queries/0_stateless/01683_text_log_deadlock.reference  | 1 +
 tests/queries/0_stateless/01683_text_log_deadlock.sh       | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 100644 tests/queries/0_stateless/01683_text_log_deadlock.reference
 create mode 100755 tests/queries/0_stateless/01683_text_log_deadlock.sh

diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.reference b/tests/queries/0_stateless/01683_text_log_deadlock.reference
new file mode 100644
index 00000000000..b358a63ce24
--- /dev/null
+++ b/tests/queries/0_stateless/01683_text_log_deadlock.reference
@@ -0,0 +1 @@
+queries 100000
diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.sh b/tests/queries/0_stateless/01683_text_log_deadlock.sh
new file mode 100755
index 00000000000..ddb10c2d7f3
--- /dev/null
+++ b/tests/queries/0_stateless/01683_text_log_deadlock.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_BENCHMARK --secure -i 100000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries 100000'

From ddff28e594d92ee6b8d0a495649720556232a785 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 13:23:50 +0300
Subject: [PATCH 0442/1238] Fix UBSan report in intDiv

---
 src/Functions/DivisionUtils.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h
index 9f9cfc1e72c..97f5e81337c 100644
--- a/src/Functions/DivisionUtils.h
+++ b/src/Functions/DivisionUtils.h
@@ -3,8 +3,10 @@
 #include <cmath>
 #include <type_traits>
 #include <Common/Exception.h>
+#include <Common/NaNUtils.h>
 #include <DataTypes/NumberTraits.h>
 
+
 #if !defined(ARCADIA_BUILD)
 #    include <Common/config.h>
 #endif
@@ -87,7 +89,12 @@ struct DivideIntegralImpl
             return static_cast<Result>(checkedDivision(static_cast<SignedCastA>(a), static_cast<SignedCastB>(b)));
         }
         else
+        {
+            if (!isFinite(a) || !isFinite(b))
+                throw Exception("Cannot perform integer division on infinite floating point numbers", ErrorCodes::ILLEGAL_DIVISION);
+
             return static_cast<Result>(checkedDivision(CastA(a), CastB(b)));
+        }
     }
 
 #if USE_EMBEDDED_COMPILER
@@ -114,6 +121,9 @@ struct ModuloImpl
         }
         else
         {
+            if (!isFinite(a) || !isFinite(b))
+                throw Exception("Cannot perform integer division on infinite floating point numbers", ErrorCodes::ILLEGAL_DIVISION);
+
             throwIfDivisionLeadsToFPE(IntegerAType(a), IntegerBType(b));
 
             if constexpr (is_big_int_v<IntegerAType> || is_big_int_v<IntegerBType>)

From 4eb21ca39671b49d27b1a2c87387722d498fc510 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 13:25:31 +0300
Subject: [PATCH 0443/1238] Add a test

---
 tests/queries/0_stateless/01683_intdiv_ubsan.reference | 0
 tests/queries/0_stateless/01683_intdiv_ubsan.sql       | 1 +
 2 files changed, 1 insertion(+)
 create mode 100644 tests/queries/0_stateless/01683_intdiv_ubsan.reference
 create mode 100644 tests/queries/0_stateless/01683_intdiv_ubsan.sql

diff --git a/tests/queries/0_stateless/01683_intdiv_ubsan.reference b/tests/queries/0_stateless/01683_intdiv_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01683_intdiv_ubsan.sql b/tests/queries/0_stateless/01683_intdiv_ubsan.sql
new file mode 100644
index 00000000000..adac2505745
--- /dev/null
+++ b/tests/queries/0_stateless/01683_intdiv_ubsan.sql
@@ -0,0 +1 @@
+SELECT DISTINCT intDiv(number, nan) FROM numbers(10); -- { serverError 153 }

From cad9f2da6cedcb5273e4585890bf9c71211f6690 Mon Sep 17 00:00:00 2001
From: Dmitriy <sevirov@yandex-team.ru>
Date: Sun, 31 Jan 2021 14:46:44 +0300
Subject: [PATCH 0444/1238] Update the system.distributed_ddl_queue system
 table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Поправил английскую версию и выполнил перевод на русский язык.
---
 .../system-tables/distributed_ddl_queue.md    | 28 ++++----
 .../system-tables/distributed_ddl_queue.md    | 65 +++++++++++++++++++
 2 files changed, 78 insertions(+), 15 deletions(-)
 create mode 100644 docs/ru/operations/system-tables/distributed_ddl_queue.md

diff --git a/docs/en/operations/system-tables/distributed_ddl_queue.md b/docs/en/operations/system-tables/distributed_ddl_queue.md
index 643bdee6def..c252458af8a 100644
--- a/docs/en/operations/system-tables/distributed_ddl_queue.md
+++ b/docs/en/operations/system-tables/distributed_ddl_queue.md
@@ -1,22 +1,21 @@
 # system.distributed_ddl_queue {#system_tables-distributed_ddl_queue}
 
-Contains information about distributed ddl queries (ON CLUSTER queries) that were executed on a cluster.
+Contains information about [distributed ddl queries (ON CLUSTER clause)](../../sql-reference/distributed-ddl.md) that were executed on a cluster.
 
 Columns:
 
--   `entry`  ([String](../../sql-reference/data-types/string.md)) - Query id.
--   `host_name`  ([String](../../sql-reference/data-types/string.md)) - Hostname.
--   `host_address`  ([String](../../sql-reference/data-types/string.md)) - IP address that the Hostname resolves to.
--   `port`  ([UInt16](../../sql-reference/data-types/int-uint.md)) - Host Port.
--   `status`  ([Enum](../../sql-reference/data-types/enum.md)) - Stats of the query.
--   `cluster`  ([String](../../sql-reference/data-types/string.md)) - Cluster name.
--   `query`  ([String](../../sql-reference/data-types/string.md)) - Query executed.
--   `initiator`  ([String](../../sql-reference/data-types/string.md)) - Nod that executed the query.
--   `query_start_time` ([Date](../../sql-reference/data-types/date.md)) — Query start time.
--   `query_finish_time` ([Date](../../sql-reference/data-types/date.md)) — Query finish time.
--   `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution in milliseconds.
--   `exception_code`  ([Enum](../../sql-reference/data-types/enum.md)) - Exception code from ZooKeeper.
-
+-   `entry` ([String](../../sql-reference/data-types/string.md)) — Query id.
+-   `host_name` ([String](../../sql-reference/data-types/string.md)) — Hostname.
+-   `host_address` ([String](../../sql-reference/data-types/string.md)) — IP address that the Hostname resolves to.
+-   `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Host Port.
+-   `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the query.
+-   `cluster` ([String](../../sql-reference/data-types/string.md)) — Cluster name.
+-   `query` ([String](../../sql-reference/data-types/string.md)) — Query executed.
+-   `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query.
+-   `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time.
+-   `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time.
+-   `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution (in milliseconds).
+-   `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ZooKeeper](../../operations/tips.md#zookeeper).
 
 **Example**
 
@@ -62,6 +61,5 @@ exception_code:    ZOK
 2 rows in set. Elapsed: 0.025 sec.
 ```
 
-
 [Original article](https://clickhouse.tech/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->
  
\ No newline at end of file
diff --git a/docs/ru/operations/system-tables/distributed_ddl_queue.md b/docs/ru/operations/system-tables/distributed_ddl_queue.md
new file mode 100644
index 00000000000..058ed06f639
--- /dev/null
+++ b/docs/ru/operations/system-tables/distributed_ddl_queue.md
@@ -0,0 +1,65 @@
+# system.distributed_ddl_queue {#system_tables-distributed_ddl_queue}
+
+Содержит информацию о [распределенных ddl запросах (секция ON CLUSTER)](../../sql-reference/distributed-ddl.md), которые были выполнены на кластере.
+
+Столбцы:
+
+-   `entry` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса.
+-   `host_name` ([String](../../sql-reference/data-types/string.md)) — имя хоста.
+-   `host_address` ([String](../../sql-reference/data-types/string.md)) — IP-адрес хоста.
+-   `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт для соединения с сервером.
+-   `status` ([Enum8](../../sql-reference/data-types/enum.md)) — состояние запроса.
+-   `cluster` ([String](../../sql-reference/data-types/string.md)) — имя кластера.
+-   `query` ([String](../../sql-reference/data-types/string.md)) — выполненный запрос.
+-   `initiator` ([String](../../sql-reference/data-types/string.md)) — узел, выполнивший запрос.
+-   `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала запроса.
+-   `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время окончания запроса.
+-   `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — продолжительность выполнения запроса (в миллисекундах).
+-   `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — код исключения из [ZooKeeper](../../operations/tips.md#zookeeper).
+
+**Пример**
+
+``` sql
+SELECT *
+FROM system.distributed_ddl_queue
+WHERE cluster = 'test_cluster'
+LIMIT 2
+FORMAT Vertical
+
+Query id: f544e72a-6641-43f1-836b-24baa1c9632a
+
+Row 1:
+──────
+entry:             query-0000000000
+host_name:         clickhouse01
+host_address:      172.23.0.11
+port:              9000
+status:            Finished
+cluster:           test_cluster
+query:             CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
+initiator:         clickhouse01:9000
+query_start_time:  2020-12-30 13:07:51
+query_finish_time: 2020-12-30 13:07:51
+query_duration_ms: 6
+exception_code:    ZOK
+
+Row 2:
+──────
+entry:             query-0000000000
+host_name:         clickhouse02
+host_address:      172.23.0.12
+port:              9000
+status:            Finished
+cluster:           test_cluster
+query:             CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
+initiator:         clickhouse01:9000
+query_start_time:  2020-12-30 13:07:51
+query_finish_time: 2020-12-30 13:07:51
+query_duration_ms: 6
+exception_code:    ZOK
+
+2 rows in set. Elapsed: 0.025 sec.
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->
+ 
\ No newline at end of file

From a4812aa97b1d1c71910c934c93466400a79e98f0 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sun, 31 Jan 2021 18:32:47 +0300
Subject: [PATCH 0445/1238] Updated tests

---
 .../functions/type-conversion-functions.md    |  5 ++--
 src/Functions/reinterpretAs.cpp               |  8 ++---
 .../01676_reinterpret_as.reference            | 30 +++++++++++++++++++
 .../0_stateless/01676_reinterpret_as.sql      | 30 +++++++++++++++++++
 4 files changed, 65 insertions(+), 8 deletions(-)
 create mode 100644 tests/queries/0_stateless/01676_reinterpret_as.reference
 create mode 100644 tests/queries/0_stateless/01676_reinterpret_as.sql

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index f650df79271..3ca36f41c78 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -308,10 +308,9 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
 Performs byte reinterpretation of ‘x’ as ‘t’ data type.
 
 Following reinterpretations are allowed:
-1. Any type that has fixed size and value of that type can be represented continuously into FixedString. Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
+1. Any type that has fixed size and value of that type can be represented continuously into FixedString.
 2. Any type that if value of that type can be represented continuously into String. Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
-3. Types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString,
-String, and types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID).
+3. FixedString, String, types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString,
 
 ``` sql
 SELECT reinterpretAs(toInt8(-1), 'UInt8') as int_to_uint,
diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp
index 356ce9d60ad..6cf75467501 100644
--- a/src/Functions/reinterpretAs.cpp
+++ b/src/Functions/reinterpretAs.cpp
@@ -314,8 +314,6 @@ public:
     static constexpr auto name = Name::name;
     static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAsTyped>(); }
 
-    explicit FunctionReinterpretAsTyped() {}
-
     String getName() const override { return name; }
 
     size_t getNumberOfArguments() const override { return 1; }
@@ -324,13 +322,13 @@ public:
 
     static ColumnsWithTypeAndName addTypeColumnToArguments(const ColumnsWithTypeAndName & arguments)
     {
-        auto & argument = arguments[0];
+        const auto & argument = arguments[0];
 
         DataTypePtr data_type;
 
         if constexpr (std::is_same_v<ToDataType, DataTypeFixedString>)
         {
-            auto & type = argument.type;
+            const auto & type = argument.type;
             size_t type_value_size_in_memory = type->getSizeOfValueInMemory();
             data_type = std::make_shared<DataTypeFixedString>(type_value_size_in_memory);
         }
@@ -361,7 +359,7 @@ public:
         return impl.executeImpl(arguments_with_type, return_type, input_rows_count);
     }
 
-    const FunctionReinterpretAs impl;
+    FunctionReinterpretAs impl;
 };
 
 struct NameReinterpretAsUInt8       { static constexpr auto name = "reinterpretAsUInt8"; };
diff --git a/tests/queries/0_stateless/01676_reinterpret_as.reference b/tests/queries/0_stateless/01676_reinterpret_as.reference
new file mode 100644
index 00000000000..bbde2d5ed57
--- /dev/null
+++ b/tests/queries/0_stateless/01676_reinterpret_as.reference
@@ -0,0 +1,30 @@
+Into String
+1
+Into FixedString
+1
+1\0
+1\0\0
+1\0\0\0
+1
+Into Numeric Representable
+Integer and Integer types
+1	1
+1	1
+257	257
+257	257
+257	257
+257	257
+257	257
+257	257
+257	257
+257	257
+257	257
+Integer and Float types
+1045220557	1045220557
+4596373779694328218	4596373779694328218
+0.2	1045220557
+0.2	4596373779694328218
+Integer and String types
+1	49
+1	49
+11	12593
diff --git a/tests/queries/0_stateless/01676_reinterpret_as.sql b/tests/queries/0_stateless/01676_reinterpret_as.sql
new file mode 100644
index 00000000000..88dc6437043
--- /dev/null
+++ b/tests/queries/0_stateless/01676_reinterpret_as.sql
@@ -0,0 +1,30 @@
+SELECT 'Into String';
+SELECT reinterpretAs(49, 'String');
+SELECT 'Into FixedString';
+SELECT reinterpretAs(49, 'FixedString(1)');
+SELECT reinterpretAs(49, 'FixedString(2)');
+SELECT reinterpretAs(49, 'FixedString(3)');
+SELECT reinterpretAs(49, 'FixedString(4)');
+SELECT reinterpretAsFixedString(49);
+SELECT 'Into Numeric Representable';
+SELECT 'Integer and Integer types';
+SELECT reinterpretAs(257, 'UInt8'), reinterpretAsUInt8(257);
+SELECT reinterpretAs(257, 'Int8'), reinterpretAsInt8(257);
+SELECT reinterpretAs(257, 'UInt16'), reinterpretAsUInt16(257);
+SELECT reinterpretAs(257, 'Int16'), reinterpretAsInt16(257);
+SELECT reinterpretAs(257, 'UInt32'), reinterpretAsUInt32(257);
+SELECT reinterpretAs(257, 'Int32'), reinterpretAsInt32(257);
+SELECT reinterpretAs(257, 'UInt64'), reinterpretAsUInt64(257);
+SELECT reinterpretAs(257, 'Int64'), reinterpretAsInt64(257);
+SELECT reinterpretAs(257, 'Int128'), reinterpretAsInt128(257);
+SELECT reinterpretAs(257, 'UInt256'), reinterpretAsUInt256(257);
+SELECT reinterpretAs(257, 'Int256'), reinterpretAsInt256(257);
+SELECT 'Integer and Float types';
+SELECT reinterpretAs(toFloat32(0.2), 'UInt32'), reinterpretAsUInt32(toFloat32(0.2));
+SELECT reinterpretAs(toFloat64(0.2), 'UInt64'), reinterpretAsUInt64(toFloat64(0.2));
+SELECT reinterpretAsFloat32(a), reinterpretAsUInt32(toFloat32(0.2)) as a;
+SELECT reinterpretAsFloat64(a), reinterpretAsUInt64(toFloat64(0.2)) as a;
+SELECT 'Integer and String types';
+SELECT reinterpretAsString(a), reinterpretAsUInt8('1') as a;
+SELECT reinterpretAsString(a), reinterpretAsUInt8('11') as a;
+SELECT reinterpretAsString(a), reinterpretAsUInt16('11') as a;

From ecab446721e94d9833b94e5a4d7b945727651886 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Sun, 31 Jan 2021 21:23:57 +0300
Subject: [PATCH 0446/1238] updated and added translation

---
 .../functions/other-functions.md              |  60 +++++-----
 .../functions/other-functions.md              | 111 ++++++++++++++++++
 2 files changed, 140 insertions(+), 31 deletions(-)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index c8f2c0ad495..73b7f6a1078 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -186,10 +186,6 @@ In ClickHouse, queries are always run on blocks (sets of column parts). This fun
 
 Returns estimation of uncompressed byte size of its arguments in memory.
 
-E.g. for [UInt32](../../sql-reference/data-types/int-uint.md) argument it will return constant 4, for [String](../../sql-reference/data-types/string.md) arguments — the string length + 9 (terminating zero + length).
-
-The function can take multiple arguments. The typical application is byteSize(*).
-
 Use case: suppose you have a service that stores data for multiple clients in one table. Users will pay per data volume. So, you need to implement accounting of users data volume. The function will allow to calculate the data size on per-row basis.
 
 **Syntax**
@@ -208,7 +204,9 @@ byteSize(argument [, ...])
 
 Type: [UInt64](../../sql-reference/data-types/int-uint.md).
 
-**Example**
+**Examples**
+
+For [String](../../sql-reference/data-types/string.md) arguments the funtion returns the string length + 9 (terminating zero + length).
 
 Query:
 
@@ -218,11 +216,11 @@ SELECT byteSize('string');
 
 Result:
 
+```text
 ┌─byteSize('string')─┐
 │                 15 │
 └────────────────────┘
-
-For this table:
+```
 
 Query:
 
@@ -243,43 +241,43 @@ CREATE TABLE test
 )
 ENGINE = MergeTree
 ORDER BY key;
-```
 
-Insert this values:
+INSERT INTO test VALUES(1, 8, 16, 32, 64,  -8, -16, -32, -64, 32.32, 64.64);
 
-Query:
+SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16)`, byteSize(u32) AS `byteSize(UInt32)`, byteSize(u64) AS `byteSize(UInt64)` FROM test ORDER BY key ASC FORMAT Vertical;
 
-```sql
-insert into test values(1, 8, 16, 32, 64,  -8, -16, -32, -64, 32.32, 64.64);
-```
+SELECT key, byteSize(i8) AS `byteSize(Int8)`, byteSize(i16) AS `byteSize(Int16)`, byteSize(i32) AS `byteSize(Int32)`, byteSize(i64) AS `byteSize(Int64)` FROM test ORDER BY key ASC FORMAT Vertical;
 
-Query:
-
-```sql
-SELECT key, toTypeName(u8), byteSize(u8), toTypeName(u16), byteSize(u16), toTypeName(u32), byteSize(u32), toTypeName(u64), byteSize(u64) FROM test ORDER BY key ASC;
-
-SELECT key, toTypeName(i8), byteSize(i8), toTypeName(i16), byteSize(i16), toTypeName(i32), byteSize(i32), toTypeName(i64), byteSize(i64), FROM test ORDER BY key ASC;
-
-SELECT key, toTypeName(f32), byteSize(f32), toTypeName(f64), byteSize(f64) FROM test ORDER BY key ASC;
+SELECT key, byteSize(f32) AS `byteSize(Float32)`,  byteSize(f64) AS `byteSize(Float64)` FROM test ORDER BY key ASC FORMAT Vertical;
 ```
 
 Result:
 
 ``` text
-┌─key─┬─toTypeName(u8)─┬─byteSize(u8)─┬─toTypeName(u16)─┬─byteSize(u16)─┬─toTypeName(u32)─┬─byteSize(u32)─┬─toTypeName(u64)─┬─byteSize(u64)─┐
-│   1 │ UInt8          │            1 │ UInt16          │             2 │ UInt32          │             4 │ UInt64          │             8 │
-└─────┴────────────────┴──────────────┴─────────────────┴───────────────┴─────────────────┴───────────────┴─────────────────┴───────────────┘
+Row 1:
+──────
+key:              1
+byteSize(UInt8):  1
+byteSize(UInt16): 2
+byteSize(UInt32): 4
+byteSize(UInt64): 8
 
-┌─key─┬─toTypeName(i8)─┬─byteSize(i8)─┬─toTypeName(i16)─┬─byteSize(i16)─┬─toTypeName(i32)─┬─byteSize(i32)─┬─toTypeName(i64)─┬─byteSize(i64)─┐
-│   1 │ Int8           │            1 │ Int16           │             2 │ Int32           │             4 │ Int64           │             8 │
-└─────┴────────────────┴──────────────┴─────────────────┴───────────────┴─────────────────┴───────────────┴─────────────────┴───────────────┘
+Row 1:
+──────
+key:             1
+byteSize(Int8):  1
+byteSize(Int16): 2
+byteSize(Int32): 4
+byteSize(Int64): 8
 
-┌─key─┬─toTypeName(f32)─┬─byteSize(f32)─┬─toTypeName(f64)─┬─byteSize(f64)─┐
-│   1 │ Float32         │             4 │ Float64         │             8 │
-└─────┴─────────────────┴───────────────┴─────────────────┴───────────────┘
+Row 1:
+──────
+key:               1
+byteSize(Float32): 4
+byteSize(Float64): 8
 ```
 
-With multiple arguments:
+The function can take multiple arguments and will return their combined byte size.
 
 Query:
 
diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md
index 68afb3e24ce..6478b77effe 100644
--- a/docs/ru/sql-reference/functions/other-functions.md
+++ b/docs/ru/sql-reference/functions/other-functions.md
@@ -183,6 +183,117 @@ SELECT visibleWidth(NULL)
 Получить размер блока.
 В ClickHouse выполнение запроса всегда идёт по блокам (наборам кусочков столбцов). Функция позволяет получить размер блока, для которого её вызвали.
 
+## byteSize {#function-bytesize}
+
+Returns estimation of uncompressed byte size of its arguments in memory. Возвращает примерный размер аргументов в памяти в байтах.
+
+Пример использования: в сервисе, хранящим данные для нескольких клиентов в одной таблице, пользователь платит за хранение данных. Пэтому требуется узнать количество памяти, занимаемое ими. Функция позволяет вычислить размер данных для каждой строки.
+
+**Синтаксис**
+
+```sql
+byteSize(argument [, ...])
+```
+
+**Параметры**
+
+-   `argument` — значение.
+
+**Возвращаемое значение**
+
+-   Оценка размера аргументов в памяти в байтах.
+
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Примеры**
+
+Для аргументов типа [String](../../sql-reference/data-types/string.md) функция возвращает длину строки + 9 (нуль-терминатор + длина)
+
+Запрос:
+
+```sql
+SELECT byteSize('string');
+```
+
+Результат:
+
+```text
+┌─byteSize('string')─┐
+│                 15 │
+└────────────────────┘
+```
+
+Запрос:
+
+```sql
+CREATE TABLE test
+(
+    `key` Int32,
+    `u8` UInt8,
+    `u16` UInt16,
+    `u32` UInt32,
+    `u64` UInt64,
+    `i8` Int8,
+    `i16` Int16,
+    `i32` Int32,
+    `i64` Int64,
+    `f32` Float32,
+    `f64` Float64
+)
+ENGINE = MergeTree
+ORDER BY key;
+
+INSERT INTO test VALUES(1, 8, 16, 32, 64,  -8, -16, -32, -64, 32.32, 64.64);
+
+SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16)`, byteSize(u32) AS `byteSize(UInt32)`, byteSize(u64) AS `byteSize(UInt64)` FROM test ORDER BY key ASC FORMAT Vertical;
+
+SELECT key, byteSize(i8) AS `byteSize(Int8)`, byteSize(i16) AS `byteSize(Int16)`, byteSize(i32) AS `byteSize(Int32)`, byteSize(i64) AS `byteSize(Int64)` FROM test ORDER BY key ASC FORMAT Vertical;
+
+SELECT key, byteSize(f32) AS `byteSize(Float32)`,  byteSize(f64) AS `byteSize(Float64)` FROM test ORDER BY key ASC FORMAT Vertical;
+```
+
+Результат:
+
+``` text
+Row 1:
+──────
+key:              1
+byteSize(UInt8):  1
+byteSize(UInt16): 2
+byteSize(UInt32): 4
+byteSize(UInt64): 8
+
+Row 1:
+──────
+key:             1
+byteSize(Int8):  1
+byteSize(Int16): 2
+byteSize(Int32): 4
+byteSize(Int64): 8
+
+Row 1:
+──────
+key:               1
+byteSize(Float32): 4
+byteSize(Float64): 8
+```
+
+Функция может принимать несколько аргументов и будет выводить их совокупный размер в байтах.
+
+Запрос:
+
+```sql
+SELECT byteSize(NULL, 1, 0.3, '');
+```
+
+Результат:
+
+```text
+┌─byteSize(NULL, 1, 0.3, '')─┐
+│                         19 │
+└────────────────────────────┘
+```
+
 ## materialize(x) {#materializex}
 
 Превращает константу в полноценный столбец, содержащий только одно значение.

From 1aa8d7fdfb79df5b98a1efab3586e1b69c78165c Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sun, 31 Jan 2021 21:49:27 +0300
Subject: [PATCH 0447/1238] BloomFilter index crash fix

---
 src/Interpreters/Set.cpp                      | 11 +++-
 ...681_bloom_filter_nullable_column.reference | 10 ++++
 .../01681_bloom_filter_nullable_column.sql    | 55 +++++++++++++++++++
 3 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01681_bloom_filter_nullable_column.reference
 create mode 100644 tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql

diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp
index faec94de9ac..b150eabda65 100644
--- a/src/Interpreters/Set.cpp
+++ b/src/Interpreters/Set.cpp
@@ -140,7 +140,16 @@ void Set::setHeader(const Block & header)
     ConstNullMapPtr null_map{};
     ColumnPtr null_map_holder;
     if (!transform_null_in)
+    {
+        /// We convert nullable columns to non nullable we also need to update nullable types
+        for (size_t i = 0; i < set_elements_types.size(); ++i)
+        {
+            data_types[i] = removeNullable(data_types[i]);
+            set_elements_types[i] = removeNullable(set_elements_types[i]);
+        }
+
         extractNestedColumnsAndNullMap(key_columns, null_map);
+    }
 
     if (fill_set_elements)
     {
@@ -182,7 +191,7 @@ bool Set::insertFromBlock(const Block & block)
     ConstNullMapPtr null_map{};
     ColumnPtr null_map_holder;
     if (!transform_null_in)
-         null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
+        null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
 
     /// Filter to extract distinct values from the block.
     ColumnUInt8::MutablePtr filter;
diff --git a/tests/queries/0_stateless/01681_bloom_filter_nullable_column.reference b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.reference
new file mode 100644
index 00000000000..2616e6c2a5c
--- /dev/null
+++ b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.reference
@@ -0,0 +1,10 @@
+NullableTuple with transform_null_in=0
+NullableTuple with transform_null_in=1
+NullableColumnFromCast with transform_null_in=0
+1	test
+NullableColumnFromCast with transform_null_in=1
+1	test
+NullableColumnFromTable with transform_null_in=0
+1	test
+NullableColumnFromTable with transform_null_in=1
+1	test
diff --git a/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql
new file mode 100644
index 00000000000..d4ae56d14a6
--- /dev/null
+++ b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql
@@ -0,0 +1,55 @@
+CREATE DATABASE 01681_bloom_filter_nullable_column;
+
+DROP TABLE IF EXISTS 01681_bloom_filter_nullable_column.bloom_filter_nullable_index;
+
+CREATE TABLE 01681_bloom_filter_nullable_column.bloom_filter_nullable_index
+    (
+        order_key UInt64,
+        str Nullable(String),
+
+        INDEX idx (str) TYPE bloom_filter GRANULARITY 1
+    )
+    ENGINE = MergeTree() 
+    ORDER BY order_key SETTINGS index_granularity = 6;
+
+INSERT INTO 01681_bloom_filter_nullable_column.bloom_filter_nullable_index VALUES (1, 'test');
+INSERT INTO 01681_bloom_filter_nullable_column.bloom_filter_nullable_index VALUES (2, 'test2');
+
+SELECT 'NullableTuple with transform_null_in=0';
+SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
+    (SELECT '1048576', str FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index) SETTINGS transform_null_in = 0;
+SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
+    (SELECT '1048576', str FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index) SETTINGS transform_null_in = 0;
+
+SELECT 'NullableTuple with transform_null_in=1';
+
+SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
+    (SELECT '1048576', str FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index) SETTINGS transform_null_in = 1; -- { serverError 20 }
+
+SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
+    (SELECT '1048576', str FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index) SETTINGS transform_null_in = 1; -- { serverError 20 }
+
+
+SELECT 'NullableColumnFromCast with transform_null_in=0';
+SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
+    (SELECT cast('test', 'Nullable(String)')) SETTINGS transform_null_in = 0;
+
+SELECT 'NullableColumnFromCast with transform_null_in=1';
+SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
+    (SELECT cast('test', 'Nullable(String)')) SETTINGS transform_null_in = 1;
+
+CREATE TABLE 01681_bloom_filter_nullable_column.nullable_string_value (value Nullable(String)) ENGINE=TinyLog;
+INSERT INTO 01681_bloom_filter_nullable_column.nullable_string_value VALUES ('test');
+
+SELECT 'NullableColumnFromTable with transform_null_in=0';
+SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
+    (SELECT value FROM 01681_bloom_filter_nullable_column.nullable_string_value) SETTINGS transform_null_in = 0;
+
+SELECT 'NullableColumnFromTable with transform_null_in=1';
+SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
+    (SELECT value FROM 01681_bloom_filter_nullable_column.nullable_string_value) SETTINGS transform_null_in = 1;
+
+DROP TABLE 01681_bloom_filter_nullable_column.nullable_string_value; 
+
+DROP TABLE 01681_bloom_filter_nullable_column.bloom_filter_nullable_index;
+DROP DATABASE 01681_bloom_filter_nullable_column;

From 4c9a640ed481106f2f1fa2dd0545acb417e5bb01 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Sun, 31 Jan 2021 22:02:42 +0300
Subject: [PATCH 0448/1238] Minor fixes

---
 docs/ru/sql-reference/functions/other-functions.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md
index 6478b77effe..c92f85e3e00 100644
--- a/docs/ru/sql-reference/functions/other-functions.md
+++ b/docs/ru/sql-reference/functions/other-functions.md
@@ -185,9 +185,9 @@ SELECT visibleWidth(NULL)
 
 ## byteSize {#function-bytesize}
 
-Returns estimation of uncompressed byte size of its arguments in memory. Возвращает примерный размер аргументов в памяти в байтах.
+Возвращает примерный размер аргументов в памяти в байтах в несжатом виде.
 
-Пример использования: в сервисе, хранящим данные для нескольких клиентов в одной таблице, пользователь платит за хранение данных. Пэтому требуется узнать количество памяти, занимаемое ими. Функция позволяет вычислить размер данных для каждой строки.
+Пример использования: в сервисе, хранящим данные для нескольких клиентов в одной таблице, пользователь платит за хранение данных. Поэтому требуется узнать количество памяти, занимаемое ими. Функция позволяет вычислить размер данных для каждой строки.
 
 **Синтаксис**
 
@@ -278,7 +278,7 @@ byteSize(Float32): 4
 byteSize(Float64): 8
 ```
 
-Функция может принимать несколько аргументов и будет выводить их совокупный размер в байтах.
+Функция может принимать несколько аргументов и будет возвращать их совокупный размер в байтах.
 
 Запрос:
 

From d2b8d9886ffa4fe4eeb1b79d816c2fcf5e39dbdc Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sun, 31 Jan 2021 22:25:53 +0300
Subject: [PATCH 0449/1238] Fixed fuzzer crash

---
 src/Functions/reinterpretAs.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp
index 6cf75467501..363455cb38f 100644
--- a/src/Functions/reinterpretAs.cpp
+++ b/src/Functions/reinterpretAs.cpp
@@ -329,6 +329,12 @@ public:
         if constexpr (std::is_same_v<ToDataType, DataTypeFixedString>)
         {
             const auto & type = argument.type;
+
+            if (!type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
+                throw Exception("Cannot reinterpret " + type->getName() +
+                    " as FixedString because it is not fixed size and contiguous in memory",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
             size_t type_value_size_in_memory = type->getSizeOfValueInMemory();
             data_type = std::make_shared<DataTypeFixedString>(type_value_size_in_memory);
         }

From 65183f1feae3fe017058b5658c8d476e0497e962 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sun, 31 Jan 2021 23:54:52 +0300
Subject: [PATCH 0450/1238] Fixed tests

---
 tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql
index d4ae56d14a6..2a61d66ac73 100644
--- a/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql
+++ b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql
@@ -1,3 +1,4 @@
+DROP DATABASE IF EXISTS 01681_bloom_filter_nullable_column;
 CREATE DATABASE 01681_bloom_filter_nullable_column;
 
 DROP TABLE IF EXISTS 01681_bloom_filter_nullable_column.bloom_filter_nullable_index;

From 84d3d6e7280907a22f6a2ce86b9bf3040b4871f1 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Mon, 1 Feb 2021 02:10:41 +0300
Subject: [PATCH 0451/1238] fix 'Unmatched parentheses', add test

---
 src/Parsers/TokenIterator.cpp                 |  7 ++--
 src/Parsers/TokenIterator.h                   |  2 +-
 src/Parsers/parseQuery.cpp                    |  2 +-
 .../01180_client_syntax_errors.expect         | 32 +++++++++++++++++++
 .../01180_client_syntax_errors.reference      |  0
 5 files changed, 38 insertions(+), 5 deletions(-)
 create mode 100755 tests/queries/0_stateless/01180_client_syntax_errors.expect
 create mode 100644 tests/queries/0_stateless/01180_client_syntax_errors.reference

diff --git a/src/Parsers/TokenIterator.cpp b/src/Parsers/TokenIterator.cpp
index 18360ed29ae..08877e0b2fe 100644
--- a/src/Parsers/TokenIterator.cpp
+++ b/src/Parsers/TokenIterator.cpp
@@ -4,13 +4,14 @@
 namespace DB
 {
 
-UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last)
+UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin)
 {
     /// We have just two kind of parentheses: () and [].
     UnmatchedParentheses stack;
 
-    for (TokenIterator it = begin;
-        it.isValid() && it->begin <= last.begin; ++it)
+    /// We have to iterate through all tokens until the end to avoid false positive "Unmatched parentheses" error
+    /// when parser failed in the middle of the query.
+    for (TokenIterator it = begin; it.isValid(); ++it)
     {
         if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket)
         {
diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h
index a95465500e0..e3a5b9f79c3 100644
--- a/src/Parsers/TokenIterator.h
+++ b/src/Parsers/TokenIterator.h
@@ -80,6 +80,6 @@ public:
 
 /// Returns positions of unmatched parentheses.
 using UnmatchedParentheses = std::vector<Token>;
-UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last);
+UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin);
 
 }
diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp
index 650c0e40c8c..4309ae8955a 100644
--- a/src/Parsers/parseQuery.cpp
+++ b/src/Parsers/parseQuery.cpp
@@ -290,7 +290,7 @@ ASTPtr tryParseQuery(
     }
 
     /// Unmatched parentheses
-    UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens), last_token);
+    UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens));
     if (!unmatched_parens.empty())
     {
         out_error_message = getUnmatchedParenthesesErrorMessage(query_begin,
diff --git a/tests/queries/0_stateless/01180_client_syntax_errors.expect b/tests/queries/0_stateless/01180_client_syntax_errors.expect
new file mode 100755
index 00000000000..bc775ce2c57
--- /dev/null
+++ b/tests/queries/0_stateless/01180_client_syntax_errors.expect
@@ -0,0 +1,32 @@
+#!/usr/bin/expect -f
+
+log_user 0
+set timeout 5
+match_max 100000
+# A default timeout action is to do nothing, change it to fail
+expect_after {
+    timeout {
+        exit 1
+    }
+}
+
+set basedir [file dirname $argv0]
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT"
+expect ":) "
+
+# Make a query with syntax error
+send -- "select \r"
+expect "Syntax error: failed at position 7 (end of query):"
+expect "Expected one of: "
+
+# Make another query with syntax error
+send -- "CREATE TABLE t4 UUID '57f27aa5-141c-47c5-888a-9563681717f5' AS t1 (`rowNumberInAllBlocks()` UInt64, `toLowCardinality(arrayJoin(\['exchange', 'tables'\]))` LowCardinality(String)) ENGINE = MergeTree \r"
+expect "Syntax error: failed at position 93 ('UInt64'):*"
+
+# Make a query with unmatched parentheses
+send -- "select (1, 2\r"
+expect "Syntax error: failed at position 8 ('('):"
+expect "Unmatched parentheses: ("
+
+send -- "\4"
+expect eof
diff --git a/tests/queries/0_stateless/01180_client_syntax_errors.reference b/tests/queries/0_stateless/01180_client_syntax_errors.reference
new file mode 100644
index 00000000000..e69de29bb2d

From cc23a2b399d956fe3c42dcfe689dfddce2277933 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 1 Feb 2021 03:44:40 +0300
Subject: [PATCH 0452/1238] Update PredicateRewriteVisitor.cpp

---
 src/Interpreters/PredicateRewriteVisitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/PredicateRewriteVisitor.cpp b/src/Interpreters/PredicateRewriteVisitor.cpp
index c567e769005..9e6d5543f2f 100644
--- a/src/Interpreters/PredicateRewriteVisitor.cpp
+++ b/src/Interpreters/PredicateRewriteVisitor.cpp
@@ -28,7 +28,7 @@ void PredicateRewriteVisitorData::visit(ASTSelectWithUnionQuery & union_select_q
 
     for (size_t index = 0; index < internal_select_list.size(); ++index)
     {
-        if (auto child_union = internal_select_list[index]->as<ASTSelectWithUnionQuery>())
+        if (auto * child_union = internal_select_list[index]->as<ASTSelectWithUnionQuery>())
             visit(*child_union, internal_select_list[index]);
         else
         {

From 1f948fc414dae41a47ee5af574dec1cc7c5cfe2c Mon Sep 17 00:00:00 2001
From: spff <spff@bitmessage.ch>
Date: Mon, 1 Feb 2021 09:49:55 +0800
Subject: [PATCH 0453/1238] Fix doc/interfaces/formats/jsonstringseachrow

In src/Processors/Formats/Impl
All JSONEachRowRowInputFormat.c / JSONEachRowRowOutputFormat.c / JSONEachRowWithProgressRowOutputFormat.c
are using JSONStringsXXX instead of JSONStringXXX
---
 docs/en/interfaces/formats.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 11291d61300..33bf90a8b52 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -31,8 +31,8 @@ The supported formats are:
 | [JSONCompactString](#jsoncompactstring)                                                 | ✗     | ✔      |
 | [JSONEachRow](#jsoneachrow)                                                             | ✔     | ✔      |
 | [JSONEachRowWithProgress](#jsoneachrowwithprogress)                                     | ✗     | ✔      |
-| [JSONStringEachRow](#jsonstringeachrow)                                                 | ✔     | ✔      |
-| [JSONStringEachRowWithProgress](#jsonstringeachrowwithprogress)                         | ✗     | ✔      |
+| [JSONStringsEachRow](#jsonstringseachrow)                                               | ✔     | ✔      |
+| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress)                       | ✗     | ✔      |
 | [JSONCompactEachRow](#jsoncompacteachrow)                                               | ✔     | ✔      |
 | [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes)             | ✔     | ✔      |
 | [JSONCompactStringEachRow](#jsoncompactstringeachrow)                                   | ✔     | ✔      |
@@ -612,7 +612,7 @@ Example:
 ```
 
 ## JSONEachRow {#jsoneachrow}
-## JSONStringEachRow {#jsonstringeachrow}
+## JSONStringsEachRow {#jsonstringseachrow}
 ## JSONCompactEachRow {#jsoncompacteachrow}
 ## JSONCompactStringEachRow {#jsoncompactstringeachrow}
 
@@ -627,9 +627,9 @@ When using these formats, ClickHouse outputs rows as separated, newline-delimite
 When inserting the data, you should provide a separate JSON value for each row.
 
 ## JSONEachRowWithProgress {#jsoneachrowwithprogress}
-## JSONStringEachRowWithProgress {#jsonstringeachrowwithprogress}
+## JSONStringsEachRowWithProgress {#jsonstringseachrowwithprogress}
 
-Differs from `JSONEachRow`/`JSONStringEachRow` in that ClickHouse will also yield progress information as JSON values.
+Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yield progress information as JSON values.
 
 ```json
 {"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}}

From efb9293587e90daa786a24ca6a1055a2d59cd8f9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 1 Feb 2021 05:28:18 +0300
Subject: [PATCH 0454/1238] Adjust test scale

---
 tests/queries/0_stateless/01683_text_log_deadlock.reference | 2 +-
 tests/queries/0_stateless/01683_text_log_deadlock.sh        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.reference b/tests/queries/0_stateless/01683_text_log_deadlock.reference
index b358a63ce24..76de47c4a46 100644
--- a/tests/queries/0_stateless/01683_text_log_deadlock.reference
+++ b/tests/queries/0_stateless/01683_text_log_deadlock.reference
@@ -1 +1 @@
-queries 100000
+queries 25000
diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.sh b/tests/queries/0_stateless/01683_text_log_deadlock.sh
index ddb10c2d7f3..ee772bffa27 100755
--- a/tests/queries/0_stateless/01683_text_log_deadlock.sh
+++ b/tests/queries/0_stateless/01683_text_log_deadlock.sh
@@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_BENCHMARK --secure -i 100000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries 100000'
+$CLICKHOUSE_BENCHMARK --secure -i 25000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries 25000'

From 768e461d28719bb6af2eecc32059e64429c4b6af Mon Sep 17 00:00:00 2001
From: yiguolei <676222867@qq.com>
Date: Mon, 1 Feb 2021 10:40:48 +0800
Subject: [PATCH 0455/1238] add functional test

---
 src/Interpreters/DDLWorker.cpp                |  6 ++--
 .../01671_ddl_hang_timeout.reference          |  0
 .../0_stateless/01671_ddl_hang_timeout.sh     | 28 +++++++++++++++++++
 3 files changed, 31 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/01671_ddl_hang_timeout.reference
 create mode 100644 tests/queries/0_stateless/01671_ddl_hang_timeout.sh

diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 85445eb0cff..e630c6abeb5 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -871,10 +871,10 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
         // Should return as soon as possible if the table is dropped.
         bool replica_dropped = replicated_storage->is_dropped;
         bool all_replicas_likely_detached = status.active_replicas == 0 && !DatabaseCatalog::instance().isTableExist(replicated_storage->getStorageID(), context);
-        if (replica_dropped || all_replicas_likely_detached) 
+        if (replica_dropped || all_replicas_likely_detached)
         {
-            LOG_WARNING(log, "Table is dropped or detached permantly, task {} will not be executed.", task.entry_name);
-            task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, table is dropped or detached permantly");
+            LOG_WARNING(log, ", task {} will not be executed.", task.entry_name);
+            task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, table is dropped or detached permanently");
             return false;
         }
 
diff --git a/tests/queries/0_stateless/01671_ddl_hang_timeout.reference b/tests/queries/0_stateless/01671_ddl_hang_timeout.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01671_ddl_hang_timeout.sh b/tests/queries/0_stateless/01671_ddl_hang_timeout.sh
new file mode 100644
index 00000000000..3ea65075443
--- /dev/null
+++ b/tests/queries/0_stateless/01671_ddl_hang_timeout.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. "$CURDIR"/../shell_config.sh
+$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS test_01671"
+$CLICKHOUSE_CLIENT --query "CREATE DATABASE test_01671"
+function thread_create_drop_table {
+    while true; do
+        REPLICA=$(($RANDOM % 10))
+        $CLICKHOUSE_CLIENT --query "CREATE TABLE test_01671.t1 (x UInt64, s Array(Nullable(String))) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01671/test_01671', 'r_$REPLICA') order by x"
+        sleep 0.0$RANDOM
+        $CLICKHOUSE_CLIENT --query "DROP TABLE test_01671.t1"
+    done
+}
+function thread_alter_table {
+    while true; do
+        $CLICKHOUSE_CLIENT --query "ALTER TABLE test_01671.t1 on cluster test_shard_localhost ADD COLUMN newcol UInt32"
+        sleep 0.0$RANDOM
+    done
+}
+export -f thread_create_drop_table
+export -f thread_alter_table
+timeout 20 bash -c "thread_create_drop_table" &
+timeout 20 bash -c 'thread_alter_table' &
+wait
+sleep 1
+
+$CLICKHOUSE_CLIENT --query "DROP DATABASE test_01671";
\ No newline at end of file

From bef5af3f57986a103bc6510ee98ef0f981cb821d Mon Sep 17 00:00:00 2001
From: yiguolei <676222867@qq.com>
Date: Mon, 1 Feb 2021 11:35:02 +0800
Subject: [PATCH 0456/1238] fix functional test

---
 tests/queries/0_stateless/01671_ddl_hang_timeout.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
 mode change 100644 => 100755 tests/queries/0_stateless/01671_ddl_hang_timeout.sh

diff --git a/tests/queries/0_stateless/01671_ddl_hang_timeout.sh b/tests/queries/0_stateless/01671_ddl_hang_timeout.sh
old mode 100644
new mode 100755
index 3ea65075443..ec59b03b6d2
--- a/tests/queries/0_stateless/01671_ddl_hang_timeout.sh
+++ b/tests/queries/0_stateless/01671_ddl_hang_timeout.sh
@@ -7,14 +7,14 @@ $CLICKHOUSE_CLIENT --query "CREATE DATABASE test_01671"
 function thread_create_drop_table {
     while true; do
         REPLICA=$(($RANDOM % 10))
-        $CLICKHOUSE_CLIENT --query "CREATE TABLE test_01671.t1 (x UInt64, s Array(Nullable(String))) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01671/test_01671', 'r_$REPLICA') order by x"
+        $CLICKHOUSE_CLIENT --query "CREATE TABLE IF NOT EXISTS test_01671.t1 (x UInt64, s Array(Nullable(String))) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01671/test_01671', 'r_$REPLICA') order by x" 2>/dev/null
         sleep 0.0$RANDOM
-        $CLICKHOUSE_CLIENT --query "DROP TABLE test_01671.t1"
+        $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_01671.t1"
     done
 }
 function thread_alter_table {
     while true; do
-        $CLICKHOUSE_CLIENT --query "ALTER TABLE test_01671.t1 on cluster test_shard_localhost ADD COLUMN newcol UInt32"
+        $CLICKHOUSE_CLIENT --query "ALTER TABLE test_01671.t1 on cluster test_shard_localhost ADD COLUMN newcol UInt32" >/dev/null 2>&1
         sleep 0.0$RANDOM
     done
 }
@@ -25,4 +25,4 @@ timeout 20 bash -c 'thread_alter_table' &
 wait
 sleep 1
 
-$CLICKHOUSE_CLIENT --query "DROP DATABASE test_01671";
\ No newline at end of file
+$CLICKHOUSE_CLIENT --query "DROP DATABASE test_01671";

From e9a61c47380ad14c4b57cefd17758a092a9de2c3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 1 Feb 2021 07:52:18 +0300
Subject: [PATCH 0457/1238] More correct

---
 src/Functions/DivisionUtils.h | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h
index 97f5e81337c..18a0331fd3f 100644
--- a/src/Functions/DivisionUtils.h
+++ b/src/Functions/DivisionUtils.h
@@ -90,8 +90,15 @@ struct DivideIntegralImpl
         }
         else
         {
-            if (!isFinite(a) || !isFinite(b))
-                throw Exception("Cannot perform integer division on infinite floating point numbers", ErrorCodes::ILLEGAL_DIVISION);
+            if constexpr (std::is_floating_point_v<A>)
+                if (!isNaN(a) || a > std::numeric_limits<CastA>::max() || a < std::numeric_limits<CastA>::lowest())
+                    throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
+                        ErrorCodes::ILLEGAL_DIVISION);
+
+            if constexpr (std::is_floating_point_v<B>)
+                if (!isNaN(b) || b > std::numeric_limits<CastB>::max() || b < std::numeric_limits<CastB>::lowest())
+                    throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
+                        ErrorCodes::ILLEGAL_DIVISION);
 
             return static_cast<Result>(checkedDivision(CastA(a), CastB(b)));
         }
@@ -121,8 +128,15 @@ struct ModuloImpl
         }
         else
         {
-            if (!isFinite(a) || !isFinite(b))
-                throw Exception("Cannot perform integer division on infinite floating point numbers", ErrorCodes::ILLEGAL_DIVISION);
+            if constexpr (std::is_floating_point_v<A>)
+                if (!isNaN(a) || a > std::numeric_limits<IntegerAType>::max() || a < std::numeric_limits<IntegerAType>::lowest())
+                    throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
+                        ErrorCodes::ILLEGAL_DIVISION);
+
+            if constexpr (std::is_floating_point_v<B>)
+                if (!isNaN(b) || b > std::numeric_limits<IntegerBType>::max() || b < std::numeric_limits<IntegerBType>::lowest())
+                    throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
+                        ErrorCodes::ILLEGAL_DIVISION);
 
             throwIfDivisionLeadsToFPE(IntegerAType(a), IntegerBType(b));
 

From 2459777f57a265378e949d7ab9926c29fc7b4de6 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 1 Feb 2021 07:55:58 +0300
Subject: [PATCH 0458/1238] Update 01681_bloom_filter_nullable_column.sql

---
 .../01681_bloom_filter_nullable_column.sql    | 51 +++++++++----------
 1 file changed, 23 insertions(+), 28 deletions(-)

diff --git a/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql
index 2a61d66ac73..4af1f74fca6 100644
--- a/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql
+++ b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql
@@ -1,9 +1,5 @@
-DROP DATABASE IF EXISTS 01681_bloom_filter_nullable_column;
-CREATE DATABASE 01681_bloom_filter_nullable_column;
-
-DROP TABLE IF EXISTS 01681_bloom_filter_nullable_column.bloom_filter_nullable_index;
-
-CREATE TABLE 01681_bloom_filter_nullable_column.bloom_filter_nullable_index
+DROP TABLE IF EXISTS bloom_filter_nullable_index;
+CREATE TABLE bloom_filter_nullable_index
     (
         order_key UInt64,
         str Nullable(String),
@@ -13,44 +9,43 @@ CREATE TABLE 01681_bloom_filter_nullable_column.bloom_filter_nullable_index
     ENGINE = MergeTree() 
     ORDER BY order_key SETTINGS index_granularity = 6;
 
-INSERT INTO 01681_bloom_filter_nullable_column.bloom_filter_nullable_index VALUES (1, 'test');
-INSERT INTO 01681_bloom_filter_nullable_column.bloom_filter_nullable_index VALUES (2, 'test2');
+INSERT INTO bloom_filter_nullable_index VALUES (1, 'test');
+INSERT INTO bloom_filter_nullable_index VALUES (2, 'test2');
 
 SELECT 'NullableTuple with transform_null_in=0';
-SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
-    (SELECT '1048576', str FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index) SETTINGS transform_null_in = 0;
-SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
-    (SELECT '1048576', str FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index) SETTINGS transform_null_in = 0;
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT '1048576', str FROM bloom_filter_nullable_index) SETTINGS transform_null_in = 0;
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT '1048576', str FROM bloom_filter_nullable_index) SETTINGS transform_null_in = 0;
 
 SELECT 'NullableTuple with transform_null_in=1';
 
-SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
-    (SELECT '1048576', str FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index) SETTINGS transform_null_in = 1; -- { serverError 20 }
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT '1048576', str FROM bloom_filter_nullable_index) SETTINGS transform_null_in = 1; -- { serverError 20 }
 
-SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
-    (SELECT '1048576', str FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index) SETTINGS transform_null_in = 1; -- { serverError 20 }
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT '1048576', str FROM bloom_filter_nullable_index) SETTINGS transform_null_in = 1; -- { serverError 20 }
 
 
 SELECT 'NullableColumnFromCast with transform_null_in=0';
-SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
     (SELECT cast('test', 'Nullable(String)')) SETTINGS transform_null_in = 0;
 
 SELECT 'NullableColumnFromCast with transform_null_in=1';
-SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
     (SELECT cast('test', 'Nullable(String)')) SETTINGS transform_null_in = 1;
 
-CREATE TABLE 01681_bloom_filter_nullable_column.nullable_string_value (value Nullable(String)) ENGINE=TinyLog;
-INSERT INTO 01681_bloom_filter_nullable_column.nullable_string_value VALUES ('test');
+DROP TABLE IF EXISTS nullable_string_value;
+CREATE TABLE nullable_string_value (value Nullable(String)) ENGINE=TinyLog;
+INSERT INTO nullable_string_value VALUES ('test');
 
 SELECT 'NullableColumnFromTable with transform_null_in=0';
-SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
-    (SELECT value FROM 01681_bloom_filter_nullable_column.nullable_string_value) SETTINGS transform_null_in = 0;
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT value FROM nullable_string_value) SETTINGS transform_null_in = 0;
 
 SELECT 'NullableColumnFromTable with transform_null_in=1';
-SELECT * FROM 01681_bloom_filter_nullable_column.bloom_filter_nullable_index WHERE str IN
-    (SELECT value FROM 01681_bloom_filter_nullable_column.nullable_string_value) SETTINGS transform_null_in = 1;
+SELECT * FROM bloom_filter_nullable_index WHERE str IN
+    (SELECT value FROM nullable_string_value) SETTINGS transform_null_in = 1;
 
-DROP TABLE 01681_bloom_filter_nullable_column.nullable_string_value; 
-
-DROP TABLE 01681_bloom_filter_nullable_column.bloom_filter_nullable_index;
-DROP DATABASE 01681_bloom_filter_nullable_column;
+DROP TABLE nullable_string_value; 
+DROP TABLE bloom_filter_nullable_index;

From fa03fbdc5bb9ffc2f797b1bab6d3403638b87128 Mon Sep 17 00:00:00 2001
From: yiguolei <676222867@qq.com>
Date: Mon, 1 Feb 2021 13:16:54 +0800
Subject: [PATCH 0459/1238] fix functional test

---
 tests/queries/0_stateless/01671_ddl_hang_timeout.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01671_ddl_hang_timeout.sh b/tests/queries/0_stateless/01671_ddl_hang_timeout.sh
index ec59b03b6d2..fb4926c8b00 100755
--- a/tests/queries/0_stateless/01671_ddl_hang_timeout.sh
+++ b/tests/queries/0_stateless/01671_ddl_hang_timeout.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
-CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 $CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS test_01671"
 $CLICKHOUSE_CLIENT --query "CREATE DATABASE test_01671"

From 1517decf4af597257e0b3b74fbaf7573c1be3475 Mon Sep 17 00:00:00 2001
From: "Mc.Spring" <Heresy.Mc@gmail.com>
Date: Mon, 1 Feb 2021 13:43:39 +0800
Subject: [PATCH 0460/1238] fix cmake command issue

add missing space for cmake command line
---
 docs/en/development/build-osx.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md
index 60365ad744a..e0b1be710f1 100644
--- a/docs/en/development/build-osx.md
+++ b/docs/en/development/build-osx.md
@@ -40,7 +40,7 @@ $ cd ClickHouse
 ``` bash
 $ mkdir build
 $ cd build
-$ cmake ..-DCMAKE_C_COMPILER=`brew --prefix llvm`/bin/clang -DCMAKE_CXX_COMPILER=`brew --prefix llvm`/bin/clang++ -DCMAKE_PREFIX_PATH=`brew --prefix llvm`
+$ cmake .. -DCMAKE_C_COMPILER=`brew --prefix llvm`/bin/clang -DCMAKE_CXX_COMPILER=`brew --prefix llvm`/bin/clang++ -DCMAKE_PREFIX_PATH=`brew --prefix llvm`
 $ ninja
 $ cd ..
 ```

From 57664bffe0be9e3dcc76b02aeefca9e5b8e44453 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 1 Feb 2021 10:11:41 +0300
Subject: [PATCH 0461/1238] Amend

---
 src/Functions/DivisionUtils.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h
index 18a0331fd3f..d0df7e41af1 100644
--- a/src/Functions/DivisionUtils.h
+++ b/src/Functions/DivisionUtils.h
@@ -91,12 +91,12 @@ struct DivideIntegralImpl
         else
         {
             if constexpr (std::is_floating_point_v<A>)
-                if (!isNaN(a) || a > std::numeric_limits<CastA>::max() || a < std::numeric_limits<CastA>::lowest())
+                if (isNaN(a) || a > std::numeric_limits<CastA>::max() || a < std::numeric_limits<CastA>::lowest())
                     throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
                         ErrorCodes::ILLEGAL_DIVISION);
 
             if constexpr (std::is_floating_point_v<B>)
-                if (!isNaN(b) || b > std::numeric_limits<CastB>::max() || b < std::numeric_limits<CastB>::lowest())
+                if (isNaN(b) || b > std::numeric_limits<CastB>::max() || b < std::numeric_limits<CastB>::lowest())
                     throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
                         ErrorCodes::ILLEGAL_DIVISION);
 
@@ -129,12 +129,12 @@ struct ModuloImpl
         else
         {
             if constexpr (std::is_floating_point_v<A>)
-                if (!isNaN(a) || a > std::numeric_limits<IntegerAType>::max() || a < std::numeric_limits<IntegerAType>::lowest())
+                if (isNaN(a) || a > std::numeric_limits<IntegerAType>::max() || a < std::numeric_limits<IntegerAType>::lowest())
                     throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
                         ErrorCodes::ILLEGAL_DIVISION);
 
             if constexpr (std::is_floating_point_v<B>)
-                if (!isNaN(b) || b > std::numeric_limits<IntegerBType>::max() || b < std::numeric_limits<IntegerBType>::lowest())
+                if (isNaN(b) || b > std::numeric_limits<IntegerBType>::max() || b < std::numeric_limits<IntegerBType>::lowest())
                     throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
                         ErrorCodes::ILLEGAL_DIVISION);
 

From 49979afbc1e3b6dea30a761dc5db375dc157b242 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 1 Feb 2021 10:12:14 +0300
Subject: [PATCH 0462/1238] Fix bad translation

---
 src/IO/ReadHelpers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 76a722a8ad1..5a159defe06 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -683,7 +683,7 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV &
 
             /** CSV format can contain insignificant spaces and tabs.
               * Usually the task of skipping them is for the calling code.
-              * But in this case, it will be difficult to do this, so remove the trailing whitespace by yourself.
+              * But in this case, it will be difficult to do this, so remove the trailing whitespace by ourself.
               */
             size_t size = s.size();
             while (size > 0

From 67412bd5296453f45dfc050806cc5914fc76dbe8 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 1 Feb 2021 10:51:10 +0300
Subject: [PATCH 0463/1238] Don't accept connections if we don't see leader +
 race fix from upstream

---
 contrib/NuRaft                                |  2 +-
 src/Coordination/NuKeeperServer.cpp           | 33 +++++++++++++++----
 src/Coordination/NuKeeperServer.h             |  2 ++
 .../TestKeeperStorageDispatcher.h             |  5 +++
 src/Server/TestKeeperTCPHandler.cpp           | 29 +++++++++++-----
 src/Server/TestKeeperTCPHandler.h             |  2 +-
 .../configs/enable_test_keeper1.xml           |  4 +--
 .../configs/enable_test_keeper2.xml           |  4 +--
 .../configs/enable_test_keeper3.xml           |  4 +--
 .../test_testkeeper_multinode/test.py         | 30 +++++++++++++++++
 10 files changed, 93 insertions(+), 22 deletions(-)

diff --git a/contrib/NuRaft b/contrib/NuRaft
index 644c264252a..9eb76db3ff1 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit 644c264252aae91d9ad58366b086641bf8314008
+Subproject commit 9eb76db3ff1a78f672303b5b51dcbe0f9b22cf96
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index bcc348d1be3..272632387d5 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -50,9 +50,11 @@ void NuKeeperServer::startup()
     params.auto_forwarding_ = true;
     params.return_method_ = nuraft::raft_params::blocking;
 
+    nuraft::asio_service::options asio_opts{};
+
     raft_instance = launcher.init(
         state_machine, state_manager, nuraft::cs_new<LoggerWrapper>("RaftInstance"), port,
-        nuraft::asio_service::options{}, params);
+        asio_opts, params);
 
     if (!raft_instance)
         throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
@@ -127,10 +129,17 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(n
             response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
         else
         {
-            response = ops_mapping[session_id][xid];
-            ops_mapping[session_id].erase(xid);
+            auto session_xids = ops_mapping.find(session_id);
+            if (session_xids == ops_mapping.end())
+                throw Exception(ErrorCodes::RAFT_ERROR, "Unknown session id {}", session_id);
+            auto response_it = session_xids->second.find(xid);
+            if (response_it == session_xids->second.end())
+                throw Exception(ErrorCodes::RAFT_ERROR, "Unknown xid {} for session id {}", xid, session_id);
+
+            response = response_it->second;
+            ops_mapping[session_id].erase(response_it);
             if (ops_mapping[session_id].empty())
-                ops_mapping.erase(session_id);
+                ops_mapping.erase(session_xids);
         }
 
         if (err == Coordination::Error::ZOK && (xid == Coordination::WATCH_XID || response->getOpNum() != Coordination::OpNum::Close))
@@ -147,7 +156,7 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(n
 
 TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKeeperStorage::RequestsForSessions & requests)
 {
-    if (raft_instance->is_leader_alive() && requests.size() == 1 && requests[0].request->isReadRequest())
+    if (isLeaderAlive() && requests.size() == 1 && requests[0].request->isReadRequest())
     {
         return state_machine->processReadRequest(requests[0]);
     }
@@ -191,7 +200,11 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKe
         else if (result->get_result_code() != nuraft::cmd_result_code::OK)
             throw Exception(ErrorCodes::RAFT_ERROR, "Requests result failed with code {} and message: '{}'", result->get_result_code(), result->get_result_str());
 
-        return readZooKeeperResponses(result->get());
+        auto result_buf = result->get();
+        if (result_buf == nullptr)
+            throw Exception(ErrorCodes::RAFT_ERROR, "Received nullptr from RAFT leader");
+
+        return readZooKeeperResponses(result_buf);
     }
 }
 
@@ -210,6 +223,9 @@ int64_t NuKeeperServer::getSessionID()
         throw Exception(ErrorCodes::RAFT_ERROR, "session_id request failed to RAFT");
 
     auto resp = result->get();
+    if (resp == nullptr)
+        throw Exception(ErrorCodes::RAFT_ERROR, "Received nullptr as session_id");
+
     nuraft::buffer_serializer bs_resp(resp);
     return bs_resp.get_i64();
 }
@@ -219,6 +235,11 @@ bool NuKeeperServer::isLeader() const
     return raft_instance->is_leader();
 }
 
+bool NuKeeperServer::isLeaderAlive() const
+{
+    return raft_instance->is_leader_alive();
+}
+
 bool NuKeeperServer::waitForServer(int32_t id) const
 {
     for (size_t i = 0; i < 10; ++i)
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index 7fd70ac26e2..f5f52802025 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -50,6 +50,8 @@ public:
 
     bool isLeader() const;
 
+    bool isLeaderAlive() const;
+
     bool waitForServer(int32_t server_id) const;
     void waitForServers(const std::vector<int32_t> & ids) const;
     void waitForCatchUp() const;
diff --git a/src/Coordination/TestKeeperStorageDispatcher.h b/src/Coordination/TestKeeperStorageDispatcher.h
index a6c6118f9c4..a67a0c1fa3a 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.h
+++ b/src/Coordination/TestKeeperStorageDispatcher.h
@@ -52,6 +52,11 @@ public:
         return server->isLeader();
     }
 
+    bool hasLeader() const
+    {
+        return server->isLeaderAlive();
+    }
+
     int64_t getSessionID()
     {
         std::lock_guard lock(session_id_mutex);
diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp
index 5e5ba19f1a6..532c0723e69 100644
--- a/src/Server/TestKeeperTCPHandler.cpp
+++ b/src/Server/TestKeeperTCPHandler.cpp
@@ -232,10 +232,14 @@ TestKeeperTCPHandler::TestKeeperTCPHandler(IServer & server_, const Poco::Net::S
 {
 }
 
-void TestKeeperTCPHandler::sendHandshake()
+void TestKeeperTCPHandler::sendHandshake(bool has_leader)
 {
     Coordination::write(Coordination::SERVER_HANDSHAKE_LENGTH, *out);
-    Coordination::write(Coordination::ZOOKEEPER_PROTOCOL_VERSION, *out);
+    if (has_leader)
+        Coordination::write(Coordination::ZOOKEEPER_PROTOCOL_VERSION, *out);
+    else /// Specially ignore connections if we are not leader, client will throw exception
+        Coordination::write(42, *out);
+
     Coordination::write(Coordination::DEFAULT_SESSION_TIMEOUT_MS, *out);
     Coordination::write(session_id, *out);
     std::array<char, Coordination::PASSWORD_LENGTH> passwd{};
@@ -315,18 +319,27 @@ void TestKeeperTCPHandler::runImpl()
         return;
     }
 
-    try
+    if (test_keeper_storage_dispatcher->hasLeader())
     {
-        session_id = test_keeper_storage_dispatcher->getSessionID();
+        try
+        {
+            session_id = test_keeper_storage_dispatcher->getSessionID();
+        }
+        catch (const Exception & e)
+        {
+            LOG_WARNING(log, "Cannot receive session id {}", e.displayText());
+            return;
+
+        }
+
+        sendHandshake(true);
     }
-    catch (const Exception & e)
+    else
     {
-        LOG_WARNING(log, "Cannot receive session id {}", e.displayText());
+        sendHandshake(false);
         return;
     }
 
-    sendHandshake();
-
     auto response_fd = poll_wrapper->getResponseFD();
     auto response_callback = [this, response_fd] (const Coordination::ZooKeeperResponsePtr & response)
     {
diff --git a/src/Server/TestKeeperTCPHandler.h b/src/Server/TestKeeperTCPHandler.h
index e7372e8dd82..53132a2b491 100644
--- a/src/Server/TestKeeperTCPHandler.h
+++ b/src/Server/TestKeeperTCPHandler.h
@@ -45,7 +45,7 @@ private:
 
     void runImpl();
 
-    void sendHandshake();
+    void sendHandshake(bool has_leader);
     void receiveHandshake();
 
     std::pair<Coordination::OpNum, Coordination::XID> receiveRequest();
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
index 7fcd76ea57a..81f68f50c7c 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
@@ -15,14 +15,14 @@
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
-                <port>44445</port>
+                <port>44444</port>
                 <can_become_leader>true</can_become_leader>
                 <priority>2</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
-                <port>44446</port>
+                <port>44444</port>
                 <can_become_leader>true</can_become_leader>
                 <priority>1</priority>
             </server>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
index f9d6dcad1d6..73340973367 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
@@ -15,14 +15,14 @@
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
-                <port>44445</port>
+                <port>44444</port>
                 <can_become_leader>true</can_become_leader>
                 <priority>2</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
-                <port>44446</port>
+                <port>44444</port>
                 <can_become_leader>true</can_become_leader>
                 <priority>1</priority>
             </server>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
index 7d71fd3a20d..fbc51489d11 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
@@ -15,14 +15,14 @@
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
-                <port>44445</port>
+                <port>44444</port>
                 <can_become_leader>true</can_become_leader>
                 <priority>2</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
-                <port>44446</port>
+                <port>44444</port>
                 <can_become_leader>true</can_become_leader>
                 <priority>1</priority>
             </server>
diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index 8d35e30400a..fe568e7252d 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -62,9 +62,14 @@ def test_blocade_leader(started_cluster):
 
         for i in range(100):
             try:
+                node2.query("SYSTEM RESTART REPLICA t1")
                 node2.query("INSERT INTO t1 SELECT rand() FROM numbers(100)")
                 break
             except Exception as ex:
+                try:
+                    node2.query("ATTACH TABLE t1")
+                except Exception as attach_ex:
+                    print("Got exception node2", smaller_exception(attach_ex))
                 print("Got exception node2", smaller_exception(ex))
                 time.sleep(0.5)
         else:
@@ -72,9 +77,14 @@ def test_blocade_leader(started_cluster):
 
         for i in range(100):
             try:
+                node3.query("SYSTEM RESTART REPLICA t1")
                 node3.query("INSERT INTO t1 SELECT rand() FROM numbers(100)")
                 break
             except Exception as ex:
+                try:
+                    node3.query("ATTACH TABLE t1")
+                except Exception as attach_ex:
+                    print("Got exception node3", smaller_exception(attach_ex))
                 print("Got exception node3", smaller_exception(ex))
                 time.sleep(0.5)
         else:
@@ -86,6 +96,11 @@ def test_blocade_leader(started_cluster):
                 node.query("SYSTEM RESTART REPLICA t1")
                 break
             except Exception as ex:
+                try:
+                    node.query("ATTACH TABLE t1")
+                except Exception as attach_ex:
+                    print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
+
                 print("Got exception node{}".format(n + 1), smaller_exception(ex))
                 time.sleep(0.5)
         else:
@@ -136,9 +151,14 @@ def test_blocade_leader_twice(started_cluster):
 
         for i in range(100):
             try:
+                node2.query("SYSTEM RESTART REPLICA t2")
                 node2.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
                 break
             except Exception as ex:
+                try:
+                    node2.query("ATTACH TABLE t2")
+                except Exception as attach_ex:
+                    print("Got exception node2", smaller_exception(attach_ex))
                 print("Got exception node2", smaller_exception(ex))
                 time.sleep(0.5)
         else:
@@ -146,9 +166,14 @@ def test_blocade_leader_twice(started_cluster):
 
         for i in range(100):
             try:
+                node3.query("SYSTEM RESTART REPLICA t2")
                 node3.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
                 break
             except Exception as ex:
+                try:
+                    node3.query("ATTACH TABLE t2")
+                except Exception as attach_ex:
+                    print("Got exception node3", smaller_exception(attach_ex))
                 print("Got exception node3", smaller_exception(ex))
                 time.sleep(0.5)
         else:
@@ -179,6 +204,11 @@ def test_blocade_leader_twice(started_cluster):
                 node.query("SYSTEM RESTART REPLICA t2")
                 break
             except Exception as ex:
+                try:
+                    node.query("ATTACH TABLE t2")
+                except Exception as attach_ex:
+                    print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
+
                 print("Got exception node{}".format(n + 1), smaller_exception(ex))
                 time.sleep(0.5)
         else:

From bedc472bb0f7555f57c524cadec9288f0add76ee Mon Sep 17 00:00:00 2001
From: Stig Bakken <stig@stigbakken.com>
Date: Thu, 28 Jan 2021 20:26:47 +0800
Subject: [PATCH 0464/1238] Show MaterializeMySQL tables in system.parts

---
 src/Storages/System/StorageSystemPartsBase.cpp    |  8 ++++++++
 .../materialize_with_ddl.py                       | 15 ++++++++++++++-
 .../test_materialize_mysql_database/test.py       |  4 ++++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp
index 5a3d7027dd4..9b5bf5a4b48 100644
--- a/src/Storages/System/StorageSystemPartsBase.cpp
+++ b/src/Storages/System/StorageSystemPartsBase.cpp
@@ -7,6 +7,7 @@
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeDate.h>
 #include <Storages/MergeTree/MergeTreeData.h>
+#include <Storages/StorageMaterializeMySQL.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Access/ContextAccess.h>
 #include <Databases/IDatabase.h>
@@ -119,6 +120,13 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, const
 
                     String engine_name = storage->getName();
 
+#if USE_MYSQL
+                    if (auto * proxy = dynamic_cast<StorageMaterializeMySQL *>(storage.get()))
+                    {
+                        auto nested = proxy->getNested();
+                        storage.swap(nested);
+                    }
+#endif
                     if (!dynamic_cast<MergeTreeData *>(storage.get()))
                         continue;
 
diff --git a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
index 11600359b66..e3ac7fc0c14 100644
--- a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
+++ b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
@@ -733,7 +733,6 @@ def clickhouse_killed_while_insert(clickhouse_node, mysql_node, service_name):
     mysql_node.query("DROP DATABASE kill_clickhouse_while_insert")
     clickhouse_node.query("DROP DATABASE kill_clickhouse_while_insert")
 
-
 def utf8mb4_test(clickhouse_node, mysql_node, service_name):
     mysql_node.query("DROP DATABASE IF EXISTS utf8mb4_test")
     clickhouse_node.query("DROP DATABASE IF EXISTS utf8mb4_test")
@@ -743,3 +742,17 @@ def utf8mb4_test(clickhouse_node, mysql_node, service_name):
     clickhouse_node.query("CREATE DATABASE utf8mb4_test ENGINE = MaterializeMySQL('{}:3306', 'utf8mb4_test', 'root', 'clickhouse')".format(service_name))
     check_query(clickhouse_node, "SELECT id, name FROM utf8mb4_test.test ORDER BY id", "1\t\U0001F984\n2\t\u2601\n")
 
+def system_parts_test(clickhouse_node, mysql_node, service_name):
+    mysql_node.query("DROP DATABASE IF EXISTS system_parts_test")
+    clickhouse_node.query("DROP DATABASE IF EXISTS system_parts_test")
+    mysql_node.query("CREATE DATABASE system_parts_test")
+    mysql_node.query("CREATE TABLE system_parts_test.test ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;")
+    mysql_node.query("INSERT INTO system_parts_test.test VALUES(1),(2),(3)")
+    def check_active_parts(num):
+        check_query(clickhouse_node, "SELECT count() FROM system.parts WHERE database = 'system_parts_test' AND table = 'test' AND active = 1", "{}\n".format(num))
+    clickhouse_node.query("CREATE DATABASE system_parts_test ENGINE = MaterializeMySQL('{}:3306', 'system_parts_test', 'root', 'clickhouse')".format(service_name))
+    check_active_parts(1)
+    mysql_node.query("INSERT INTO system_parts_test.test VALUES(4),(5),(6)")
+    check_active_parts(2)
+    clickhouse_node.query("OPTIMIZE TABLE system_parts_test.test")
+    check_active_parts(1)
diff --git a/tests/integration/test_materialize_mysql_database/test.py b/tests/integration/test_materialize_mysql_database/test.py
index 22322671f13..8cd2f7def07 100644
--- a/tests/integration/test_materialize_mysql_database/test.py
+++ b/tests/integration/test_materialize_mysql_database/test.py
@@ -233,3 +233,7 @@ def test_clickhouse_killed_while_insert_8_0(started_cluster, started_mysql_8_0,
 def test_utf8mb4(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node):
     materialize_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_5_7, "mysql1")
     materialize_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_8_0, "mysql8_0")
+
+@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary])
+def test_system_parts_table(started_cluster, started_mysql_8_0, clickhouse_node):
+    materialize_with_ddl.system_parts_test(clickhouse_node, started_mysql_8_0, "mysql8_0")

From 9a1e52a80d30a670fafa296c71173ea5b3c8b080 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Mon, 1 Feb 2021 12:25:52 +0300
Subject: [PATCH 0465/1238] Added performance test

---
 tests/performance/reinterpret_as.xml | 33 ++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 tests/performance/reinterpret_as.xml

diff --git a/tests/performance/reinterpret_as.xml b/tests/performance/reinterpret_as.xml
new file mode 100644
index 00000000000..16045d255b0
--- /dev/null
+++ b/tests/performance/reinterpret_as.xml
@@ -0,0 +1,33 @@
+<test max_ignored_relative_change="0.2">
+    <settings>
+        <allow_experimental_bigint_types>1</allow_experimental_bigint_types>
+        <max_memory_usage>15G</max_memory_usage>
+    </settings>
+
+    <create_query>CREATE TABLE t (a UInt64, b UInt256, c Int128, d Int256, f String, g FixedString(20)) ENGINE = Memory</create_query>
+    <!-- use less threads to save memory -->
+    <fill_query>
+        INSERT INTO t
+        SELECT number, number, number, number, toString(number) as string, string
+        FROM numbers_mt(200000000) SETTINGS max_threads = 8
+    </fill_query>
+
+    <drop_query>DROP TABLE IF EXISTS t</drop_query>
+
+    <query>SELECT reinterpretAsUInt8(a), reinterpretAsUInt8(b), reinterpretAsUInt8(c), reinterpretAsUInt8(d), reinterpretAsUInt8(f), reinterpretAsUInt8(g) FROM t FORMAT Null</query>
+    <query>SELECT reinterpretAsUInt16(a), reinterpretAsUInt16(b), reinterpretAsUInt16(c), reinterpretAsUInt16(d), reinterpretAsUInt16(f), reinterpretAsUInt16(g) FROM t FORMAT Null</query>
+    <query>SELECT reinterpretAsUInt32(a), reinterpretAsUInt32(b), reinterpretAsUInt32(c), reinterpretAsUInt32(d), reinterpretAsUInt32(f), reinterpretAsUInt32(g) FROM t FORMAT Null</query>
+    <query>SELECT reinterpretAsUInt64(a), reinterpretAsUInt64(b), reinterpretAsUInt64(c), reinterpretAsUInt64(d), reinterpretAsUInt64(f), reinterpretAsUInt64(g) FROM t FORMAT Null</query>
+    <query>SELECT reinterpretAsUInt256(a), reinterpretAsUInt256(b), reinterpretAsUInt256(c), reinterpretAsUInt256(d), reinterpretAsUInt256(f), reinterpretAsUInt256(g) FROM t FORMAT Null</query>
+
+    <query>SELECT reinterpretAsInt8(a), reinterpretAsInt8(b), reinterpretAsInt8(c), reinterpretAsInt8(d), reinterpretAsInt8(f), reinterpretAsInt8(g) FROM t FORMAT Null</query>
+    <query>SELECT reinterpretAsInt16(a), reinterpretAsInt16(b), reinterpretAsInt16(c), reinterpretAsInt16(d), reinterpretAsInt16(f), reinterpretAsInt16(g) FROM t FORMAT Null</query>
+    <query>SELECT reinterpretAsInt32(a), reinterpretAsInt32(b), reinterpretAsInt32(c), reinterpretAsInt32(d), reinterpretAsInt32(f), reinterpretAsInt32(g) FROM t FORMAT Null</query>
+    <query>SELECT reinterpretAsInt64(a), reinterpretAsInt64(b), reinterpretAsInt64(c), reinterpretAsInt64(d), reinterpretAsInt64(f), reinterpretAsInt64(g) FROM t FORMAT Null</query>
+    <query>SELECT reinterpretAsInt128(a), reinterpretAsInt128(b), reinterpretAsInt128(c), reinterpretAsInt128(d), reinterpretAsInt128(f), reinterpretAsInt64(g) FROM t FORMAT Null</query>
+    <query>SELECT reinterpretAsInt256(a), reinterpretAsInt256(b), reinterpretAsInt256(c), reinterpretAsInt256(d), reinterpretAsInt256(f), reinterpretAsInt256(g) FROM t FORMAT Null</query>
+
+    <query>SELECT reinterpretAsString(a), reinterpretAsString(b), reinterpretAsString(c), reinterpretAsString(d), reinterpretAsInt128(f), reinterpretAsString(g) FROM t FORMAT Null</query>
+    <query>SELECT reinterpretAsFixedString(a), reinterpretAsFixedString(b), reinterpretAsFixedString(c), reinterpretAsFixedString(d), reinterpretAsFixedString(g) FROM t FORMAT Null</query>
+
+</test>

From 4f955be8328350dc2eb25fd16f6cb13bb074395a Mon Sep 17 00:00:00 2001
From: tavplubix <tavplubix@gmail.com>
Date: Mon, 1 Feb 2021 12:32:36 +0300
Subject: [PATCH 0466/1238] fix sync

---
 src/Storages/StorageMaterializeMySQL.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Storages/StorageMaterializeMySQL.h b/src/Storages/StorageMaterializeMySQL.h
index ea90c1ffc9e..f787470e2d2 100644
--- a/src/Storages/StorageMaterializeMySQL.h
+++ b/src/Storages/StorageMaterializeMySQL.h
@@ -1,6 +1,8 @@
 #pragma once
 
+#if !defined(ARCADIA_BUILD)
 #include "config_core.h"
+#endif
 
 #if USE_MYSQL
 

From dafb0ef4e99a24b6494a2e6cbcdef1458836c651 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Tue, 26 Jan 2021 23:49:52 +0300
Subject: [PATCH 0467/1238] ExecutableDictionarySource added implicit_key
 option

---
 src/Core/Block.cpp                            |   9 ++
 src/Core/Block.h                              |   1 +
 src/Dictionaries/DictionarySourceHelpers.cpp  |  45 +++---
 src/Dictionaries/DictionarySourceHelpers.h    |  16 +-
 src/Dictionaries/DictionaryStructure.cpp      |  15 ++
 src/Dictionaries/DictionaryStructure.h        |   2 +
 .../ExecutableDictionarySource.cpp            | 119 +++++++++++++-
 src/Dictionaries/ExecutableDictionarySource.h |   2 +-
 src/Dictionaries/HTTPDictionarySource.cpp     |  12 +-
 tests/config/executable_dictionary.xml        | 148 ++++++++++++++++++
 .../01474_executable_dictionary.reference     |   5 +
 .../01474_executable_dictionary.sql           |   8 +
 12 files changed, 347 insertions(+), 35 deletions(-)

diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp
index cd2855739e2..56c045bc8df 100644
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@@ -409,6 +409,15 @@ Block Block::cloneWithoutColumns() const
     return res;
 }
 
+Block Block::cloneWithCutColumns(size_t start, size_t length) const
+{
+    Block copy = *this;
+
+    for (size_t i = 0; i < copy.data.size(); ++i)
+        copy.data[i].column = copy.data[i].column->cut(start, length);
+
+    return copy;
+}
 
 Block Block::sortColumns() const
 {
diff --git a/src/Core/Block.h b/src/Core/Block.h
index f588373aaed..14f4f57caed 100644
--- a/src/Core/Block.h
+++ b/src/Core/Block.h
@@ -129,6 +129,7 @@ public:
     void setColumns(const Columns & columns);
     Block cloneWithColumns(const Columns & columns) const;
     Block cloneWithoutColumns() const;
+    Block cloneWithCutColumns(size_t start, size_t length) const;
 
     /** Get empty columns with the same types as in block. */
     MutableColumns cloneEmptyColumns() const;
diff --git a/src/Dictionaries/DictionarySourceHelpers.cpp b/src/Dictionaries/DictionarySourceHelpers.cpp
index 309bc64e179..3227704ff4b 100644
--- a/src/Dictionaries/DictionarySourceHelpers.cpp
+++ b/src/Dictionaries/DictionarySourceHelpers.cpp
@@ -1,6 +1,5 @@
 #include "DictionarySourceHelpers.h"
 #include <Columns/ColumnsNumber.h>
-#include <Core/Block.h>
 #include <Core/ColumnWithTypeAndName.h>
 #include <DataStreams/IBlockOutputStream.h>
 #include <DataTypes/DataTypesNumber.h>
@@ -13,44 +12,54 @@
 
 namespace DB
 {
-/// For simple key
-void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids)
+
+void formatWithBlock(BlockOutputStreamPtr & out, Block block)
 {
-    auto column = ColumnUInt64::create(ids.size());
-    memcpy(column->getData().data(), ids.data(), ids.size() * sizeof(ids.front()));
-
-    Block block{{std::move(column), std::make_shared<DataTypeUInt64>(), "id"}};
-
     out->writePrefix();
     out->write(block);
     out->writeSuffix();
     out->flush();
 }
 
+/// For simple key
+
+Block blockForIds(const std::vector<UInt64> & ids)
+{
+    auto column = ColumnUInt64::create(ids.size());
+    memcpy(column->getData().data(), ids.data(), ids.size() * sizeof(ids.front()));
+
+    Block block{{std::move(column), std::make_shared<DataTypeUInt64>(), "id"}};
+
+    std::cerr << "Block for IDs size " << ids.size() << std::endl;
+
+    return block;
+}
+
 /// For composite key
-void formatKeys(
+
+Block blockForKeys(
     const DictionaryStructure & dict_struct,
-    BlockOutputStreamPtr & out,
     const Columns & key_columns,
     const std::vector<size_t> & requested_rows)
 {
     Block block;
+
     for (size_t i = 0, size = key_columns.size(); i < size; ++i)
     {
         const ColumnPtr & source_column = key_columns[i];
-        auto filtered_column = source_column->cloneEmpty();
-        filtered_column->reserve(requested_rows.size());
+        size_t column_rows_size = source_column->size();
+
+        PaddedPODArray<UInt8> filter(column_rows_size, false);
 
         for (size_t idx : requested_rows)
-            filtered_column->insertFrom(*source_column, idx);
+            filter[idx] = true;
 
-        block.insert({std::move(filtered_column), (*dict_struct.key)[i].type, toString(i)});
+        auto filtered_column = source_column->filter(filter, requested_rows.size());
+
+        block.insert({std::move(filtered_column), (*dict_struct.key)[i].type, (*dict_struct.key)[i].name});
     }
 
-    out->writePrefix();
-    out->write(block);
-    out->writeSuffix();
-    out->flush();
+    return block;
 }
 
 Context copyContextAndApplySettings(
diff --git a/src/Dictionaries/DictionarySourceHelpers.h b/src/Dictionaries/DictionarySourceHelpers.h
index 3f42700d336..d6b0dab8d4f 100644
--- a/src/Dictionaries/DictionarySourceHelpers.h
+++ b/src/Dictionaries/DictionarySourceHelpers.h
@@ -1,11 +1,15 @@
 #pragma once
 
 #include <vector>
-#include <Columns/IColumn.h>
+
 #include <common/types.h>
+
 #include <Poco/File.h>
 #include <Poco/Util/AbstractConfiguration.h>
 
+#include <Columns/IColumn.h>
+#include <Core/Block.h>
+
 namespace DB
 {
 class IBlockOutputStream;
@@ -16,13 +20,16 @@ class Context;
 
 /// Write keys to block output stream.
 
+void formatWithBlock(BlockOutputStreamPtr & out, Block block);
+
 /// For simple key
-void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids);
+
+Block blockForIds(const std::vector<UInt64> & ids);
 
 /// For composite key
-void formatKeys(
+
+Block blockForKeys(
     const DictionaryStructure & dict_struct,
-    BlockOutputStreamPtr & out,
     const Columns & key_columns,
     const std::vector<size_t> & requested_rows);
 
@@ -36,4 +43,5 @@ void applySettingsToContext(
     const std::string & config_prefix,
     Context & context,
     const Poco::Util::AbstractConfiguration & config);
+
 }
diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index 95c2e0a3e09..42a7cb6e4ec 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -281,6 +281,21 @@ size_t DictionaryStructure::getKeySize() const
     });
 }
 
+Strings DictionaryStructure::getKeysNames() const
+{
+    if (id)
+        return { id->name };
+
+    auto & key_attributes = *key;
+
+    Strings keys_names;
+    keys_names.reserve(key_attributes.size());
+
+    for (const auto & key_attribute : key_attributes)
+        keys_names.emplace_back(key_attribute.name);
+
+    return keys_names;
+}
 
 static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys)
 {
diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h
index 945e1c55494..c6c80498a4b 100644
--- a/src/Dictionaries/DictionaryStructure.h
+++ b/src/Dictionaries/DictionaryStructure.h
@@ -158,6 +158,8 @@ struct DictionaryStructure final
     std::string getKeyDescription() const;
     bool isKeySizeFixed() const;
     size_t getKeySize() const;
+    Strings getKeysNames() const;
+
 private:
     /// range_min and range_max have to be parsed before this function call
     std::vector<DictionaryAttribute> getAttributes(
diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index f2abe10f970..d9aa89ec14e 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -26,6 +26,7 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int DICTIONARY_ACCESS_DENIED;
+    extern const int UNSUPPORTED_METHOD;
 }
 
 namespace
@@ -65,18 +66,32 @@ ExecutableDictionarySource::ExecutableDictionarySource(
     const Context & context_)
     : log(&Poco::Logger::get("ExecutableDictionarySource"))
     , dict_struct{dict_struct_}
+    , implicit_key{config.getBool(config_prefix + ".implicit_key", false)}
     , command{config.getString(config_prefix + ".command")}
     , update_field{config.getString(config_prefix + ".update_field", "")}
     , format{config.getString(config_prefix + ".format")}
     , sample_block{sample_block_}
     , context(context_)
 {
+    /// Remove keys from sample_block for implicit_key dictionary because
+    /// this columns will not be provided by client
+    if (implicit_key)
+    {
+        auto keys_names = dict_struct.getKeysNames();
+
+        for (auto & key_name : keys_names)
+        {
+            size_t key_column_position_in_block = sample_block.getPositionByName(key_name);
+            sample_block.erase(key_column_position_in_block);
+        }
+    }
 }
 
 ExecutableDictionarySource::ExecutableDictionarySource(const ExecutableDictionarySource & other)
     : log(&Poco::Logger::get("ExecutableDictionarySource"))
     , update_time{other.update_time}
     , dict_struct{other.dict_struct}
+    , implicit_key{other.implicit_key}
     , command{other.command}
     , update_field{other.update_field}
     , format{other.format}
@@ -87,6 +102,9 @@ ExecutableDictionarySource::ExecutableDictionarySource(const ExecutableDictionar
 
 BlockInputStreamPtr ExecutableDictionarySource::loadAll()
 {
+    if (implicit_key)
+        throw Exception("ExecutableDictionarySource with implicit_key does not support loadAll method", ErrorCodes::UNSUPPORTED_METHOD);
+
     LOG_TRACE(log, "loadAll {}", toString());
     auto process = ShellCommand::execute(command);
     auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size);
@@ -95,6 +113,9 @@ BlockInputStreamPtr ExecutableDictionarySource::loadAll()
 
 BlockInputStreamPtr ExecutableDictionarySource::loadUpdatedAll()
 {
+    if (implicit_key)
+        throw Exception("ExecutableDictionarySource with implicit_key does not support loadUpdatedAll method", ErrorCodes::UNSUPPORTED_METHOD);
+
     time_t new_update_time = time(nullptr);
     SCOPE_EXIT(update_time = new_update_time);
 
@@ -173,6 +194,72 @@ namespace
         std::function<void(WriteBufferFromFile &)> send_data;
         ThreadFromGlobalPool thread;
     };
+
+    /** A stream, adds additional columns to each block that it will read from inner stream.
+     *
+     *  block_to_add rows size must be equal to final summ rows size of all inner stream readed blocks.
+     */
+    class BlockInputStreamWithAdditionalColumns final: public IBlockInputStream
+    {
+    public:
+        BlockInputStreamWithAdditionalColumns(
+            Block block_to_add_,
+            std::unique_ptr<IBlockInputStream>&& stream_)
+            : block_to_add(std::move(block_to_add_))
+            , stream(std::move(stream_))
+        {
+        }
+
+        Block getHeader() const override
+        {
+            auto header = stream->getHeader();
+
+            if (header)
+            {
+                for (int64_t i = static_cast<uint64_t>(block_to_add.columns() - 1); i >= 0; --i)
+                    header.insert(0, block_to_add.getByPosition(i).cloneEmpty());
+            }
+
+            return header;
+        }
+
+        Block readImpl() override
+        {
+            auto block = stream->read();
+
+            if (block)
+            {
+                auto block_rows = block.rows();
+
+                auto cut_block = block_to_add.cloneWithCutColumns(current_range_index, block_rows);
+
+                for (int64_t i = static_cast<uint64_t>(cut_block.columns() - 1); i >= 0; --i)
+                    block.insert(0, cut_block.getByPosition(i));
+
+                current_range_index += block_rows;
+            }
+
+            return block;
+        }
+
+        void readPrefix() override
+        {
+            stream->readPrefix();
+        }
+
+        void readSuffix() override
+        {
+            stream->readSuffix();
+        }
+
+        String getName() const override { return "BlockInputStreamWithAdditionalColumns"; }
+
+    private:
+        Block block_to_add;
+        std::unique_ptr<IBlockInputStream> stream;
+        size_t current_range_index = 0;
+    };
+
 }
 
 
@@ -180,28 +267,44 @@ BlockInputStreamPtr ExecutableDictionarySource::loadIds(const std::vector<UInt64
 {
     LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
 
-    return std::make_shared<BlockInputStreamWithBackgroundThread>(
+    auto block = blockForIds(ids);
+
+    auto stream = std::make_unique<BlockInputStreamWithBackgroundThread>(
         context, format, sample_block, command, log,
-        [&ids, this](WriteBufferFromFile & out) mutable
+        [block, this](WriteBufferFromFile & out) mutable
         {
-            auto output_stream = context.getOutputStream(format, out, sample_block);
-            formatIDs(output_stream, ids);
+            auto output_stream = context.getOutputStream(format, out, block.cloneEmpty());
+            formatWithBlock(output_stream, block);
             out.close();
         });
+
+    if (implicit_key)
+    {
+        return std::make_shared<BlockInputStreamWithAdditionalColumns>(block, std::move(stream));
+    }
+    else
+        return std::shared_ptr<BlockInputStreamWithBackgroundThread>(stream.release());
 }
 
 BlockInputStreamPtr ExecutableDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
 {
     LOG_TRACE(log, "loadKeys {} size = {}", toString(), requested_rows.size());
 
-    return std::make_shared<BlockInputStreamWithBackgroundThread>(
+    auto block = blockForKeys(dict_struct, key_columns, requested_rows);
+
+    auto stream = std::make_unique<BlockInputStreamWithBackgroundThread>(
         context, format, sample_block, command, log,
-        [key_columns, &requested_rows, this](WriteBufferFromFile & out) mutable
+        [block, this](WriteBufferFromFile & out) mutable
         {
-            auto output_stream = context.getOutputStream(format, out, sample_block);
-            formatKeys(dict_struct, output_stream, key_columns, requested_rows);
+            auto output_stream = context.getOutputStream(format, out, block.cloneEmpty());
+            formatWithBlock(output_stream, block);
             out.close();
         });
+
+    if (implicit_key)
+        return std::make_shared<BlockInputStreamWithAdditionalColumns>(block, std::move(stream));
+    else
+        return std::shared_ptr<BlockInputStreamWithBackgroundThread>(stream.release());
 }
 
 bool ExecutableDictionarySource::isModified() const
diff --git a/src/Dictionaries/ExecutableDictionarySource.h b/src/Dictionaries/ExecutableDictionarySource.h
index f28d71ca5e3..7aa203f267b 100644
--- a/src/Dictionaries/ExecutableDictionarySource.h
+++ b/src/Dictionaries/ExecutableDictionarySource.h
@@ -49,9 +49,9 @@ public:
 
 private:
     Poco::Logger * log;
-
     time_t update_time = 0;
     const DictionaryStructure dict_struct;
+    bool implicit_key;
     const std::string command;
     const std::string update_field;
     const std::string format;
diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp
index da5623bcdb0..55585b836d9 100644
--- a/src/Dictionaries/HTTPDictionarySource.cpp
+++ b/src/Dictionaries/HTTPDictionarySource.cpp
@@ -130,12 +130,14 @@ BlockInputStreamPtr HTTPDictionarySource::loadUpdatedAll()
 BlockInputStreamPtr HTTPDictionarySource::loadIds(const std::vector<UInt64> & ids)
 {
     LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
+     
+    auto block = blockForIds(ids);
 
-    ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [&](std::ostream & ostr)
+    ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
     {
         WriteBufferFromOStream out_buffer(ostr);
         auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
-        formatIDs(output_stream, ids);
+        formatWithBlock(output_stream, block);
     };
 
     Poco::URI uri(url);
@@ -150,11 +152,13 @@ BlockInputStreamPtr HTTPDictionarySource::loadKeys(const Columns & key_columns,
 {
     LOG_TRACE(log, "loadKeys {} size = {}", toString(), requested_rows.size());
 
-    ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [&](std::ostream & ostr)
+    auto block = blockForKeys(dict_struct, key_columns, requested_rows);
+
+    ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
     {
         WriteBufferFromOStream out_buffer(ostr);
         auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
-        formatKeys(dict_struct, output_stream, key_columns, requested_rows);
+        formatWithBlock(output_stream, block);
     };
 
     Poco::URI uri(url);
diff --git a/tests/config/executable_dictionary.xml b/tests/config/executable_dictionary.xml
index 50df32e2ec6..c5a4a0947bc 100644
--- a/tests/config/executable_dictionary.xml
+++ b/tests/config/executable_dictionary.xml
@@ -105,4 +105,152 @@
     </structure>
 </dictionary>
 
+<dictionary>
+    <name>simple_executable_cache_dictionary_no_implicit_key</name>
+
+    <structure>
+        <id>
+            <name>id</name>
+            <type>UInt64</type>
+        </id>
+
+        <attribute>
+            <name>value</name>
+            <type>String</type>
+            <null_value></null_value>
+        </attribute>
+    </structure>
+
+    <source>
+        <executable>
+            <command>echo "1\tValue"</command>
+            <format>TabSeparated</format>
+            <implicit_key>false</implicit_key>
+        </executable>
+    </source>
+
+    <layout>
+        <cache>
+            <size_in_cells>10000</size_in_cells>
+        </cache>
+    </layout>
+
+    <lifetime>300</lifetime>
+</dictionary>
+
+<dictionary>
+    <name>simple_executable_cache_dictionary_implicit_key</name>
+
+    <structure>
+        <id>
+            <name>id</name>
+            <type>UInt64</type>
+        </id>
+
+        <attribute>
+            <name>value</name>
+            <type>String</type>
+            <null_value></null_value>
+        </attribute>
+    </structure>
+
+    <source>
+        <executable>
+            <command>echo "Value"</command>
+            <format>TabSeparated</format>
+            <implicit_key>true</implicit_key>
+        </executable>
+    </source>
+
+    <layout>
+        <cache>
+            <size_in_cells>10000</size_in_cells>
+        </cache>
+    </layout>
+
+    <lifetime>300</lifetime>
+</dictionary>
+
+<dictionary>
+    <name>complex_executable_cache_dictionary_no_implicit_key</name>
+
+    <structure>
+        <key>
+            <attribute>
+                <name>id</name>
+                <type>UInt64</type>
+                <null_value></null_value>
+            </attribute>
+            <attribute>
+                <name>id_key</name>
+                <type>String</type>
+                <null_value></null_value>
+            </attribute>
+        </key>
+
+        <attribute>
+            <name>value</name>
+            <type>String</type>
+            <null_value></null_value>
+        </attribute>
+    </structure>
+
+    <source>
+        <executable>
+            <command>echo "1\tFirstKey\tValue"</command>
+            <format>TabSeparated</format>
+            <implicit_key>false</implicit_key>
+        </executable>
+    </source>
+
+    <layout>
+        <complex_key_cache>
+            <size_in_cells>10000</size_in_cells>
+        </complex_key_cache>
+    </layout>
+
+    <lifetime>300</lifetime>
+</dictionary>
+
+<dictionary>
+    <name>complex_executable_cache_dictionary_implicit_key</name>
+
+    <structure>
+        <key>
+            <attribute>
+                <name>id</name>
+                <type>UInt64</type>
+                <null_value></null_value>
+            </attribute>
+            <attribute>
+                <name>id_key</name>
+                <type>String</type>
+                <null_value></null_value>
+            </attribute>
+        </key>
+
+        <attribute>
+            <name>value</name>
+            <type>String</type>
+            <null_value></null_value>
+        </attribute>
+    </structure>
+
+    <source>
+        <executable>
+            <command>echo "Value"</command>
+            <format>TabSeparated</format>
+            <implicit_key>true</implicit_key>
+        </executable>
+    </source>
+
+    <layout>
+        <complex_key_cache>
+            <size_in_cells>10000</size_in_cells>
+        </complex_key_cache>
+    </layout>
+
+    <lifetime>300</lifetime>
+</dictionary>
+
 </dictionaries>
diff --git a/tests/queries/0_stateless/01474_executable_dictionary.reference b/tests/queries/0_stateless/01474_executable_dictionary.reference
index 4d0994b08c3..66894d4f41f 100644
--- a/tests/queries/0_stateless/01474_executable_dictionary.reference
+++ b/tests/queries/0_stateless/01474_executable_dictionary.reference
@@ -1,3 +1,8 @@
 999999	1999998	999998000001
 999999	1999998	999998000001
 999999	1999998	999998000001
+Check implicit_key option
+Value
+Value
+Value
+Value
\ No newline at end of file
diff --git a/tests/queries/0_stateless/01474_executable_dictionary.sql b/tests/queries/0_stateless/01474_executable_dictionary.sql
index 727cf47f79f..83f8a946354 100644
--- a/tests/queries/0_stateless/01474_executable_dictionary.sql
+++ b/tests/queries/0_stateless/01474_executable_dictionary.sql
@@ -1,3 +1,11 @@
 SELECT number, dictGet('executable_complex', 'a', (number, number)) AS a, dictGet('executable_complex', 'b', (number, number)) AS b FROM numbers(1000000) WHERE number = 999999;
 SELECT number, dictGet('executable_complex_direct', 'a', (number, number)) AS a, dictGet('executable_complex_direct', 'b', (number, number)) AS b FROM numbers(1000000) WHERE number = 999999;
 SELECT number, dictGet('executable_simple', 'a', number) AS a, dictGet('executable_simple', 'b', number) AS b FROM numbers(1000000) WHERE number = 999999;
+
+SELECT 'Check implicit_key option';
+
+SELECT dictGet('simple_executable_cache_dictionary_no_implicit_key', 'value', toUInt64(1));
+SELECT dictGet('simple_executable_cache_dictionary_implicit_key', 'value', toUInt64(1));
+
+SELECT dictGet('complex_executable_cache_dictionary_no_implicit_key', 'value', (toUInt64(1), 'FirstKey'));
+SELECT dictGet('complex_executable_cache_dictionary_implicit_key', 'value', (toUInt64(1), 'FirstKey'));

From d194294d0b741b342011e73575d6483ffde5e467 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Wed, 27 Jan 2021 16:23:02 +0300
Subject: [PATCH 0468/1238] Fixed tests

---
 src/Dictionaries/DictionarySourceHelpers.cpp           | 10 +++++-----
 src/Dictionaries/DictionarySourceHelpers.h             |  6 ++++--
 src/Dictionaries/ExecutableDictionarySource.cpp        |  2 +-
 src/Dictionaries/HTTPDictionarySource.cpp              |  4 ++--
 .../0_stateless/01474_executable_dictionary.reference  |  2 +-
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/Dictionaries/DictionarySourceHelpers.cpp b/src/Dictionaries/DictionarySourceHelpers.cpp
index 3227704ff4b..ff769d51354 100644
--- a/src/Dictionaries/DictionarySourceHelpers.cpp
+++ b/src/Dictionaries/DictionarySourceHelpers.cpp
@@ -13,7 +13,7 @@
 namespace DB
 {
 
-void formatWithBlock(BlockOutputStreamPtr & out, Block block)
+void formatWithBlock(BlockOutputStreamPtr & out, const Block & block)
 {
     out->writePrefix();
     out->write(block);
@@ -23,14 +23,14 @@ void formatWithBlock(BlockOutputStreamPtr & out, Block block)
 
 /// For simple key
 
-Block blockForIds(const std::vector<UInt64> & ids)
+Block blockForIds(
+    const DictionaryStructure & dict_struct,
+    const std::vector<UInt64> & ids)
 {
     auto column = ColumnUInt64::create(ids.size());
     memcpy(column->getData().data(), ids.data(), ids.size() * sizeof(ids.front()));
 
-    Block block{{std::move(column), std::make_shared<DataTypeUInt64>(), "id"}};
-
-    std::cerr << "Block for IDs size " << ids.size() << std::endl;
+    Block block{{std::move(column), std::make_shared<DataTypeUInt64>(), (*dict_struct.id).name}};
 
     return block;
 }
diff --git a/src/Dictionaries/DictionarySourceHelpers.h b/src/Dictionaries/DictionarySourceHelpers.h
index d6b0dab8d4f..27ead1b447d 100644
--- a/src/Dictionaries/DictionarySourceHelpers.h
+++ b/src/Dictionaries/DictionarySourceHelpers.h
@@ -20,11 +20,13 @@ class Context;
 
 /// Write keys to block output stream.
 
-void formatWithBlock(BlockOutputStreamPtr & out, Block block);
+void formatWithBlock(BlockOutputStreamPtr & out, const Block & block);
 
 /// For simple key
 
-Block blockForIds(const std::vector<UInt64> & ids);
+Block blockForIds(
+    const DictionaryStructure & dict_struct,
+    const std::vector<UInt64> & ids);
 
 /// For composite key
 
diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index d9aa89ec14e..19eb6a82672 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -267,7 +267,7 @@ BlockInputStreamPtr ExecutableDictionarySource::loadIds(const std::vector<UInt64
 {
     LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
 
-    auto block = blockForIds(ids);
+    auto block = blockForIds(dict_struct, ids);
 
     auto stream = std::make_unique<BlockInputStreamWithBackgroundThread>(
         context, format, sample_block, command, log,
diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp
index 55585b836d9..75daf1323e8 100644
--- a/src/Dictionaries/HTTPDictionarySource.cpp
+++ b/src/Dictionaries/HTTPDictionarySource.cpp
@@ -130,8 +130,8 @@ BlockInputStreamPtr HTTPDictionarySource::loadUpdatedAll()
 BlockInputStreamPtr HTTPDictionarySource::loadIds(const std::vector<UInt64> & ids)
 {
     LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
-     
-    auto block = blockForIds(ids);
+
+    auto block = blockForIds(dict_struct, ids);
 
     ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
     {
diff --git a/tests/queries/0_stateless/01474_executable_dictionary.reference b/tests/queries/0_stateless/01474_executable_dictionary.reference
index 66894d4f41f..d88c4d078e3 100644
--- a/tests/queries/0_stateless/01474_executable_dictionary.reference
+++ b/tests/queries/0_stateless/01474_executable_dictionary.reference
@@ -5,4 +5,4 @@ Check implicit_key option
 Value
 Value
 Value
-Value
\ No newline at end of file
+Value

From a9ecb6bf78cc72d6c744094df37c2c3fa8cf63fc Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Thu, 28 Jan 2021 20:10:27 +0300
Subject: [PATCH 0469/1238] Fixed clang-tidy

---
 src/Core/Block.cpp                       | 4 ++--
 src/Dictionaries/DictionaryStructure.cpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp
index 56c045bc8df..0c9a470dc1d 100644
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@@ -413,8 +413,8 @@ Block Block::cloneWithCutColumns(size_t start, size_t length) const
 {
     Block copy = *this;
 
-    for (size_t i = 0; i < copy.data.size(); ++i)
-        copy.data[i].column = copy.data[i].column->cut(start, length);
+    for (auto & column_to_cut : copy.data)
+        column_to_cut.column = column_to_cut.column->cut(start, length);
 
     return copy;
 }
diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp
index 42a7cb6e4ec..25f5736221e 100644
--- a/src/Dictionaries/DictionaryStructure.cpp
+++ b/src/Dictionaries/DictionaryStructure.cpp
@@ -286,7 +286,7 @@ Strings DictionaryStructure::getKeysNames() const
     if (id)
         return { id->name };
 
-    auto & key_attributes = *key;
+    const auto & key_attributes = *key;
 
     Strings keys_names;
     keys_names.reserve(key_attributes.size());

From a0755e439c0b20eff24044462b2787590a5c731a Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Thu, 28 Jan 2021 22:57:11 +0300
Subject: [PATCH 0470/1238] Fixed typo

---
 src/Dictionaries/ExecutableDictionarySource.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index 19eb6a82672..e6bf18d3239 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -197,7 +197,7 @@ namespace
 
     /** A stream, adds additional columns to each block that it will read from inner stream.
      *
-     *  block_to_add rows size must be equal to final summ rows size of all inner stream readed blocks.
+     *  block_to_add rows size must be equal to final summ rows size of all inner stream blocks.
      */
     class BlockInputStreamWithAdditionalColumns final: public IBlockInputStream
     {

From b83b8c56dac5045ef93f4cc47e924379dc881ebf Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 00:06:16 +0300
Subject: [PATCH 0471/1238] Update ExecutableDictionarySource.cpp

---
 src/Dictionaries/ExecutableDictionarySource.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index e6bf18d3239..40addc574b9 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -197,7 +197,7 @@ namespace
 
     /** A stream, adds additional columns to each block that it will read from inner stream.
      *
-     *  block_to_add rows size must be equal to final summ rows size of all inner stream blocks.
+     *  block_to_add rows size must be equal to final sum rows size of all inner stream blocks.
      */
     class BlockInputStreamWithAdditionalColumns final: public IBlockInputStream
     {

From ecd6e10e2208e8b6ecfcddd4b94d4c4f1dc1170d Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 01:27:04 +0300
Subject: [PATCH 0472/1238] Update ExecutableDictionarySource.cpp

---
 src/Dictionaries/ExecutableDictionarySource.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index 40addc574b9..be0d3a4afa7 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -74,7 +74,7 @@ ExecutableDictionarySource::ExecutableDictionarySource(
     , context(context_)
 {
     /// Remove keys from sample_block for implicit_key dictionary because
-    /// this columns will not be provided by client
+    /// these columns will not be provided by client
     if (implicit_key)
     {
         auto keys_names = dict_struct.getKeysNames();

From 77a8ef24de2dadd9f0a09f9c48d6b61f914a69f7 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 01:29:36 +0300
Subject: [PATCH 0473/1238] Update ExecutableDictionarySource.cpp

---
 src/Dictionaries/ExecutableDictionarySource.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index be0d3a4afa7..48cd2b130d4 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -74,7 +74,9 @@ ExecutableDictionarySource::ExecutableDictionarySource(
     , context(context_)
 {
     /// Remove keys from sample_block for implicit_key dictionary because
-    /// these columns will not be provided by client
+    /// these columns will not be returned from source
+    /// Implicit key means that the source script will return only values,
+    /// and the correspondence to keys is determined implicitly - by the order of rows in the result.
     if (implicit_key)
     {
         auto keys_names = dict_struct.getKeysNames();

From e5b89f24b57fcd52b32b34b5ec60e0e485141233 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 31 Jan 2021 01:30:00 +0300
Subject: [PATCH 0474/1238] Update ExecutableDictionarySource.cpp

---
 src/Dictionaries/ExecutableDictionarySource.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index 48cd2b130d4..fd6991d3e97 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -76,7 +76,7 @@ ExecutableDictionarySource::ExecutableDictionarySource(
     /// Remove keys from sample_block for implicit_key dictionary because
     /// these columns will not be returned from source
     /// Implicit key means that the source script will return only values,
-    /// and the correspondence to keys is determined implicitly - by the order of rows in the result.
+    /// and the correspondence to the requested keys is determined implicitly - by the order of rows in the result.
     if (implicit_key)
     {
         auto keys_names = dict_struct.getKeysNames();

From d3f299269f64fd7a636303481e964b2688ae4a7f Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sun, 31 Jan 2021 12:59:35 +0300
Subject: [PATCH 0475/1238] Fixed code review issues

---
 src/Dictionaries/DictionarySourceHelpers.cpp         |  2 +-
 src/Dictionaries/DictionarySourceHelpers.h           |  2 +-
 src/Dictionaries/ExecutableDictionarySource.cpp      | 12 ++++++++----
 src/Dictionaries/HTTPDictionarySource.cpp            |  4 ++--
 .../01474_executable_dictionary.reference            |  5 -----
 .../0_stateless/01474_executable_dictionary.sql      |  8 --------
 ...1674_executable_dictionary_implicit_key.reference |  4 ++++
 .../01674_executable_dictionary_implicit_key.sql     |  5 +++++
 8 files changed, 21 insertions(+), 21 deletions(-)
 create mode 100644 tests/queries/0_stateless/01674_executable_dictionary_implicit_key.reference
 create mode 100644 tests/queries/0_stateless/01674_executable_dictionary_implicit_key.sql

diff --git a/src/Dictionaries/DictionarySourceHelpers.cpp b/src/Dictionaries/DictionarySourceHelpers.cpp
index ff769d51354..2a872672aff 100644
--- a/src/Dictionaries/DictionarySourceHelpers.cpp
+++ b/src/Dictionaries/DictionarySourceHelpers.cpp
@@ -13,7 +13,7 @@
 namespace DB
 {
 
-void formatWithBlock(BlockOutputStreamPtr & out, const Block & block)
+void formatBlock(BlockOutputStreamPtr & out, const Block & block)
 {
     out->writePrefix();
     out->write(block);
diff --git a/src/Dictionaries/DictionarySourceHelpers.h b/src/Dictionaries/DictionarySourceHelpers.h
index 27ead1b447d..cad5441c66e 100644
--- a/src/Dictionaries/DictionarySourceHelpers.h
+++ b/src/Dictionaries/DictionarySourceHelpers.h
@@ -20,7 +20,7 @@ class Context;
 
 /// Write keys to block output stream.
 
-void formatWithBlock(BlockOutputStreamPtr & out, const Block & block);
+void formatBlock(BlockOutputStreamPtr & out, const Block & block);
 
 /// For simple key
 
diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index fd6991d3e97..fc061f93f2b 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -218,7 +218,7 @@ namespace
 
             if (header)
             {
-                for (int64_t i = static_cast<uint64_t>(block_to_add.columns() - 1); i >= 0; --i)
+                for (Int64 i = static_cast<Int64>(block_to_add.columns() - 1); i >= 0; --i)
                     header.insert(0, block_to_add.getByPosition(i).cloneEmpty());
             }
 
@@ -235,7 +235,11 @@ namespace
 
                 auto cut_block = block_to_add.cloneWithCutColumns(current_range_index, block_rows);
 
-                for (int64_t i = static_cast<uint64_t>(cut_block.columns() - 1); i >= 0; --i)
+                if (cut_block.rows() != block_rows)
+                    throw Exception("Rows in block to add after cut must equal to rows in readed block",
+                        ErrorCodes::LOGICAL_ERROR);
+
+                for (Int64 i = static_cast<Int64>(cut_block.columns() - 1); i >= 0; --i)
                     block.insert(0, cut_block.getByPosition(i));
 
                 current_range_index += block_rows;
@@ -276,7 +280,7 @@ BlockInputStreamPtr ExecutableDictionarySource::loadIds(const std::vector<UInt64
         [block, this](WriteBufferFromFile & out) mutable
         {
             auto output_stream = context.getOutputStream(format, out, block.cloneEmpty());
-            formatWithBlock(output_stream, block);
+            formatBlock(output_stream, block);
             out.close();
         });
 
@@ -299,7 +303,7 @@ BlockInputStreamPtr ExecutableDictionarySource::loadKeys(const Columns & key_col
         [block, this](WriteBufferFromFile & out) mutable
         {
             auto output_stream = context.getOutputStream(format, out, block.cloneEmpty());
-            formatWithBlock(output_stream, block);
+            formatBlock(output_stream, block);
             out.close();
         });
 
diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp
index 75daf1323e8..ddcac117e58 100644
--- a/src/Dictionaries/HTTPDictionarySource.cpp
+++ b/src/Dictionaries/HTTPDictionarySource.cpp
@@ -137,7 +137,7 @@ BlockInputStreamPtr HTTPDictionarySource::loadIds(const std::vector<UInt64> & id
     {
         WriteBufferFromOStream out_buffer(ostr);
         auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
-        formatWithBlock(output_stream, block);
+        formatBlock(output_stream, block);
     };
 
     Poco::URI uri(url);
@@ -158,7 +158,7 @@ BlockInputStreamPtr HTTPDictionarySource::loadKeys(const Columns & key_columns,
     {
         WriteBufferFromOStream out_buffer(ostr);
         auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
-        formatWithBlock(output_stream, block);
+        formatBlock(output_stream, block);
     };
 
     Poco::URI uri(url);
diff --git a/tests/queries/0_stateless/01474_executable_dictionary.reference b/tests/queries/0_stateless/01474_executable_dictionary.reference
index d88c4d078e3..4d0994b08c3 100644
--- a/tests/queries/0_stateless/01474_executable_dictionary.reference
+++ b/tests/queries/0_stateless/01474_executable_dictionary.reference
@@ -1,8 +1,3 @@
 999999	1999998	999998000001
 999999	1999998	999998000001
 999999	1999998	999998000001
-Check implicit_key option
-Value
-Value
-Value
-Value
diff --git a/tests/queries/0_stateless/01474_executable_dictionary.sql b/tests/queries/0_stateless/01474_executable_dictionary.sql
index 83f8a946354..727cf47f79f 100644
--- a/tests/queries/0_stateless/01474_executable_dictionary.sql
+++ b/tests/queries/0_stateless/01474_executable_dictionary.sql
@@ -1,11 +1,3 @@
 SELECT number, dictGet('executable_complex', 'a', (number, number)) AS a, dictGet('executable_complex', 'b', (number, number)) AS b FROM numbers(1000000) WHERE number = 999999;
 SELECT number, dictGet('executable_complex_direct', 'a', (number, number)) AS a, dictGet('executable_complex_direct', 'b', (number, number)) AS b FROM numbers(1000000) WHERE number = 999999;
 SELECT number, dictGet('executable_simple', 'a', number) AS a, dictGet('executable_simple', 'b', number) AS b FROM numbers(1000000) WHERE number = 999999;
-
-SELECT 'Check implicit_key option';
-
-SELECT dictGet('simple_executable_cache_dictionary_no_implicit_key', 'value', toUInt64(1));
-SELECT dictGet('simple_executable_cache_dictionary_implicit_key', 'value', toUInt64(1));
-
-SELECT dictGet('complex_executable_cache_dictionary_no_implicit_key', 'value', (toUInt64(1), 'FirstKey'));
-SELECT dictGet('complex_executable_cache_dictionary_implicit_key', 'value', (toUInt64(1), 'FirstKey'));
diff --git a/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.reference b/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.reference
new file mode 100644
index 00000000000..0777c3c2625
--- /dev/null
+++ b/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.reference
@@ -0,0 +1,4 @@
+Value
+Value
+Value
+Value
diff --git a/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.sql b/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.sql
new file mode 100644
index 00000000000..c98cb0a5e0f
--- /dev/null
+++ b/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.sql
@@ -0,0 +1,5 @@
+SELECT dictGet('simple_executable_cache_dictionary_no_implicit_key', 'value', toUInt64(1));
+SELECT dictGet('simple_executable_cache_dictionary_implicit_key', 'value', toUInt64(1));
+
+SELECT dictGet('complex_executable_cache_dictionary_no_implicit_key', 'value', (toUInt64(1), 'FirstKey'));
+SELECT dictGet('complex_executable_cache_dictionary_implicit_key', 'value', (toUInt64(1), 'FirstKey'));

From d9d206af92918394429a775decf8447c29c0cf29 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sun, 31 Jan 2021 15:26:53 +0300
Subject: [PATCH 0476/1238] Fixed typo

---
 src/Dictionaries/ExecutableDictionarySource.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index fc061f93f2b..42dac540f09 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -27,6 +27,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
     extern const int DICTIONARY_ACCESS_DENIED;
     extern const int UNSUPPORTED_METHOD;
+    extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
 }
 
 namespace
@@ -236,8 +237,9 @@ namespace
                 auto cut_block = block_to_add.cloneWithCutColumns(current_range_index, block_rows);
 
                 if (cut_block.rows() != block_rows)
-                    throw Exception("Rows in block to add after cut must equal to rows in readed block",
-                        ErrorCodes::LOGICAL_ERROR);
+                    throw Exception(
+                        "Number of rows in block to add after cut must equal to number of rows in block from inner stream",
+                        ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
 
                 for (Int64 i = static_cast<Int64>(cut_block.columns() - 1); i >= 0; --i)
                     block.insert(0, cut_block.getByPosition(i));

From beee7a4f025bf4a75367e5607d65b34cd28fe952 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Mon, 1 Feb 2021 11:19:08 +0300
Subject: [PATCH 0477/1238] Added test to arcadia_skip_list

---
 tests/queries/0_stateless/arcadia_skip_list.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 253eab720af..5b8256bb5af 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -198,3 +198,4 @@
 01659_test_base64Decode_mysql_compatibility
 01675_data_type_coroutine
 01671_aggregate_function_group_bitmap_data
+01674_executable_dictionary_implicit_key

From c9b78186d91db9c76258e6eb5a3c54af430bce43 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Mon, 1 Feb 2021 12:48:27 +0300
Subject: [PATCH 0478/1238] Auto version update to [21.2.1.5869] [54447]

---
 cmake/autogenerated_versions.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index 5d643cc4bee..a13725a2c08 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -3,7 +3,7 @@ SET(VERSION_REVISION 54447)
 SET(VERSION_MAJOR 21)
 SET(VERSION_MINOR 2)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH 53d0c9fa7255aa1dc48991d19f4246ff71cc2fd7)
-SET(VERSION_DESCRIBE v21.2.1.1-prestable)
-SET(VERSION_STRING 21.2.1.1)
+SET(VERSION_GITHASH ef72ba7349f230321750c13ee63b49a11a7c0adc)
+SET(VERSION_DESCRIBE v21.2.1.5869-prestable)
+SET(VERSION_STRING 21.2.1.5869)
 # end of autochange

From cd82eed093513675da19a303f38884caee07aa47 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Mon, 1 Feb 2021 12:50:56 +0300
Subject: [PATCH 0479/1238] Auto version update to [21.3.1.1] [54448]

---
 cmake/autogenerated_versions.txt              |  8 ++++----
 debian/changelog                              |  4 ++--
 docker/client/Dockerfile                      |  2 +-
 docker/server/Dockerfile                      |  2 +-
 docker/test/Dockerfile                        |  2 +-
 .../StorageSystemContributors.generated.cpp   | 19 +++++++++++++++++++
 6 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index a13725a2c08..ce92ae203ea 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -1,9 +1,9 @@
 # This strings autochanged from release_lib.sh:
-SET(VERSION_REVISION 54447)
+SET(VERSION_REVISION 54448)
 SET(VERSION_MAJOR 21)
-SET(VERSION_MINOR 2)
+SET(VERSION_MINOR 3)
 SET(VERSION_PATCH 1)
 SET(VERSION_GITHASH ef72ba7349f230321750c13ee63b49a11a7c0adc)
-SET(VERSION_DESCRIBE v21.2.1.5869-prestable)
-SET(VERSION_STRING 21.2.1.5869)
+SET(VERSION_DESCRIBE v21.3.1.1-prestable)
+SET(VERSION_STRING 21.3.1.1)
 # end of autochange
diff --git a/debian/changelog b/debian/changelog
index 1cec020f026..53b36cae114 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,5 @@
-clickhouse (21.2.1.1) unstable; urgency=low
+clickhouse (21.3.1.1) unstable; urgency=low
 
   * Modified source code
 
- -- clickhouse-release <clickhouse-release@yandex-team.ru>  Mon, 11 Jan 2021 11:12:08 +0300
+ -- clickhouse-release <clickhouse-release@yandex-team.ru>  Mon, 01 Feb 2021 12:50:53 +0300
diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile
index 5022687c47b..43921a4d3c4 100644
--- a/docker/client/Dockerfile
+++ b/docker/client/Dockerfile
@@ -1,7 +1,7 @@
 FROM ubuntu:18.04
 
 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.2.1.*
+ARG version=21.3.1.*
 
 RUN apt-get update \
     && apt-get install --yes --no-install-recommends \
diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile
index 3528ae68ef6..8e39af5646c 100644
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@@ -1,7 +1,7 @@
 FROM ubuntu:20.04
 
 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.2.1.*
+ARG version=21.3.1.*
 ARG gosu_ver=1.10
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile
index df918928f99..f151ae8fddf 100644
--- a/docker/test/Dockerfile
+++ b/docker/test/Dockerfile
@@ -1,7 +1,7 @@
 FROM ubuntu:18.04
 
 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.2.1.*
+ARG version=21.3.1.*
 
 RUN apt-get update && \
     apt-get install -y apt-transport-https dirmngr && \
diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp
index 0c50e452e95..d9f4a2a7dd0 100644
--- a/src/Storages/System/StorageSystemContributors.generated.cpp
+++ b/src/Storages/System/StorageSystemContributors.generated.cpp
@@ -23,6 +23,7 @@ const char * auto_contributors[] {
     "Alexander Burmak",
     "Alexander Ermolaev",
     "Alexander GQ Gerasiov",
+    "Alexander Gololobov",
     "Alexander Kazakov",
     "Alexander Kozhikhov",
     "Alexander Krasheninnikov",
@@ -43,6 +44,7 @@ const char * auto_contributors[] {
     "Alexandr Krasheninnikov",
     "Alexandr Orlov",
     "Alexandra Latysheva",
+    "Alexandre Snarskii",
     "Alexei Averchenko",
     "Alexey",
     "Alexey Arno",
@@ -143,6 +145,7 @@ const char * auto_contributors[] {
     "CurtizJ",
     "Daniel Bershatsky",
     "Daniel Dao",
+    "Daniel Qin",
     "Danila Kutenin",
     "Dao Minh Thuc",
     "Daria Mozhaeva",
@@ -309,7 +312,9 @@ const char * auto_contributors[] {
     "Marek Vavrusa",
     "Marek Vavruša",
     "Marek Vavruša",
+    "Mariano Benítez Mulet",
     "Mark Andreev",
+    "Mark Frost",
     "Mark Papadakis",
     "Maroun Maroun",
     "Marsel Arduanov",
@@ -422,6 +427,7 @@ const char * auto_contributors[] {
     "Rafael David Tinoco",
     "Ramazan Polat",
     "Ravengg",
+    "RegulusZ",
     "Reilee",
     "Reto Kromer",
     "Ri",
@@ -482,9 +488,11 @@ const char * auto_contributors[] {
     "Tangaev",
     "Tema Novikov",
     "The-Alchemist",
+    "TiunovNN",
     "Tobias Adamson",
     "Tom Bombadil",
     "Tsarkova Anastasia",
+    "TszkitLo40",
     "Ubuntu",
     "Ubus",
     "UnamedRus",
@@ -556,6 +564,7 @@ const char * auto_contributors[] {
     "Yury Stankevich",
     "Zhichang Yu",
     "Zhipeng",
+    "Zoran Pandovski",
     "a.palagashvili",
     "abdrakhmanov",
     "abyss7",
@@ -571,6 +580,7 @@ const char * auto_contributors[] {
     "alex.lvxin",
     "alexander kozhikhov",
     "alexey-milovidov",
+    "alfredlu",
     "amoschen",
     "amudong",
     "ana-uvarova",
@@ -588,14 +598,17 @@ const char * auto_contributors[] {
     "avsharapov",
     "awesomeleo",
     "benamazing",
+    "benbiti",
     "bgranvea",
     "bharatnc",
     "blazerer",
     "bluebirddm",
+    "bo zeng",
     "bobrovskij artemij",
     "booknouse",
     "bseng",
     "cekc",
+    "centos7",
     "champtar",
     "chang.chen",
     "chengy8934",
@@ -606,6 +619,7 @@ const char * auto_contributors[] {
     "comunodi",
     "coraxster",
     "damozhaeva",
+    "dankondr",
     "daoready",
     "dasmfm",
     "davydovska",
@@ -627,6 +641,7 @@ const char * auto_contributors[] {
     "elBroom",
     "elenaspb2019",
     "emakarov",
+    "emhlbmc",
     "emironyuk",
     "evtan",
     "exprmntr",
@@ -673,6 +688,7 @@ const char * auto_contributors[] {
     "javi santana",
     "jetgm",
     "jianmei zhang",
+    "jyz0309",
     "kmeaw",
     "koshachy",
     "kreuzerkrieg",
@@ -779,7 +795,9 @@ const char * auto_contributors[] {
     "taiyang-li",
     "tao jiang",
     "tavplubix",
+    "templarzq",
     "tiger.yan",
+    "tison",
     "topvisor",
     "tyrionhuang",
     "ubuntu",
@@ -800,6 +818,7 @@ const char * auto_contributors[] {
     "weeds085490",
     "xPoSx",
     "yangshuai",
+    "ygrek",
     "yhgcn",
     "ylchou",
     "yonesko",

From 1e44e3f3417b3d23d8ef3e840c6a11f0716645f0 Mon Sep 17 00:00:00 2001
From: tavplubix <tavplubix@gmail.com>
Date: Mon, 1 Feb 2021 12:53:13 +0300
Subject: [PATCH 0480/1238] Update 01671_ddl_hang_timeout.sh

---
 tests/queries/0_stateless/01671_ddl_hang_timeout.sh | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/01671_ddl_hang_timeout.sh b/tests/queries/0_stateless/01671_ddl_hang_timeout.sh
index fb4926c8b00..2ca97e3978b 100755
--- a/tests/queries/0_stateless/01671_ddl_hang_timeout.sh
+++ b/tests/queries/0_stateless/01671_ddl_hang_timeout.sh
@@ -2,22 +2,23 @@
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
-$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS test_01671"
-$CLICKHOUSE_CLIENT --query "CREATE DATABASE test_01671"
+
 function thread_create_drop_table {
     while true; do
         REPLICA=$(($RANDOM % 10))
-        $CLICKHOUSE_CLIENT --query "CREATE TABLE IF NOT EXISTS test_01671.t1 (x UInt64, s Array(Nullable(String))) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01671/test_01671', 'r_$REPLICA') order by x" 2>/dev/null
+        $CLICKHOUSE_CLIENT --query "CREATE TABLE IF NOT EXISTS t1 (x UInt64, s Array(Nullable(String))) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01671/test_01671', 'r_$REPLICA') order by x" 2>/dev/null
         sleep 0.0$RANDOM
-        $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_01671.t1"
+        $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t1"
     done
 }
+
 function thread_alter_table {
     while true; do
-        $CLICKHOUSE_CLIENT --query "ALTER TABLE test_01671.t1 on cluster test_shard_localhost ADD COLUMN newcol UInt32" >/dev/null 2>&1
+        $CLICKHOUSE_CLIENT --query "ALTER TABLE $CLICKHOUSE_DATABASE.t1 on cluster test_shard_localhost ADD COLUMN newcol UInt32" >/dev/null 2>&1
         sleep 0.0$RANDOM
     done
 }
+
 export -f thread_create_drop_table
 export -f thread_alter_table
 timeout 20 bash -c "thread_create_drop_table" &
@@ -25,4 +26,4 @@ timeout 20 bash -c 'thread_alter_table' &
 wait
 sleep 1
 
-$CLICKHOUSE_CLIENT --query "DROP DATABASE test_01671";
+$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t1";

From 7780c0b8e933b8d00bbe832ccafc2848130c6ede Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Mon, 1 Feb 2021 13:49:19 +0300
Subject: [PATCH 0481/1238] Remove an always-false condition from query parser

https://github.com/ClickHouse/ClickHouse/pull/19236#discussion_r567673775
---
 src/Parsers/parseQuery.cpp | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp
index 48a92534e74..46f9fb3310d 100644
--- a/src/Parsers/parseQuery.cpp
+++ b/src/Parsers/parseQuery.cpp
@@ -269,14 +269,6 @@ ASTPtr tryParseQuery(
     // most of the checks.
     if (insert && insert->data)
     {
-        if (!parse_res)
-        {
-            // Generic parse error.
-            out_error_message = getSyntaxErrorMessage(query_begin, all_queries_end,
-                last_token, expected, hilite, query_description);
-            return nullptr;
-        }
-
         return res;
     }
 

From eb5c77f558bb823ec62410ed81f5763c3fc71d21 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 1 Feb 2021 14:27:26 +0300
Subject: [PATCH 0482/1238] Fix some races and better reaction to leader change

---
 contrib/NuRaft                                |  2 +-
 contrib/boost                                 |  2 +-
 src/Coordination/NuKeeperServer.cpp           | 18 ++++++---
 src/Coordination/NuKeeperServer.h             |  2 +
 .../TestKeeperStorageDispatcher.cpp           | 40 ++++++++++++++-----
 .../TestKeeperStorageDispatcher.h             |  7 ++--
 src/Server/TestKeeperTCPHandler.cpp           |  2 +
 7 files changed, 54 insertions(+), 19 deletions(-)

diff --git a/contrib/NuRaft b/contrib/NuRaft
index 9eb76db3ff1..c6f8528ead6 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit 9eb76db3ff1a78f672303b5b51dcbe0f9b22cf96
+Subproject commit c6f8528ead61f7e4565164c6f15afef221235aa8
diff --git a/contrib/boost b/contrib/boost
index b2368f43f37..48f40ebb539 160000
--- a/contrib/boost
+++ b/contrib/boost
@@ -1 +1 @@
-Subproject commit b2368f43f37c4a592b17b1e9a474b93749c47319
+Subproject commit 48f40ebb539220d328958f8823b094c0b07a4e79
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 272632387d5..014b2761f37 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -33,7 +33,11 @@ void NuKeeperServer::addServer(int server_id_, const std::string & server_uri_,
 {
     nuraft::srv_config config(server_id_, 0, server_uri_, "", /* follower= */ !can_become_leader_, priority);
     auto ret1 = raft_instance->add_srv(config);
-    if (ret1->get_result_code() != nuraft::cmd_result_code::OK)
+    auto code = ret1->get_result_code();
+    if (code == nuraft::cmd_result_code::TIMEOUT
+        || code == nuraft::cmd_result_code::BAD_REQUEST
+        || code == nuraft::cmd_result_code::NOT_LEADER
+        || code == nuraft::cmd_result_code::FAILED)
         throw Exception(ErrorCodes::RAFT_ERROR, "Cannot add server to RAFT quorum with code {}, message '{}'", ret1->get_result_code(), ret1->get_result_str());
 }
 
@@ -41,9 +45,9 @@ void NuKeeperServer::addServer(int server_id_, const std::string & server_uri_,
 void NuKeeperServer::startup()
 {
     nuraft::raft_params params;
-    params.heart_beat_interval_ = 100;
-    params.election_timeout_lower_bound_ = 200;
-    params.election_timeout_upper_bound_ = 400;
+    params.heart_beat_interval_ = 1000;
+    params.election_timeout_lower_bound_ = 3000;
+    params.election_timeout_upper_bound_ = 6000;
     params.reserved_log_items_ = 5000;
     params.snapshot_distance_ = 5000;
     params.client_req_timeout_ = 10000;
@@ -59,7 +63,7 @@ void NuKeeperServer::startup()
     if (!raft_instance)
         throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
 
-    static constexpr auto MAX_RETRY = 30;
+    static constexpr auto MAX_RETRY = 100;
     for (size_t i = 0; i < MAX_RETRY; ++i)
     {
         if (raft_instance->is_initialized())
@@ -169,6 +173,8 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKe
             entries.push_back(getZooKeeperLogEntry(session_id, request));
         }
 
+        std::lock_guard lock(append_entries_mutex);
+
         auto result = raft_instance->append_entries(entries);
         if (!result->get_accepted())
         {
@@ -215,6 +221,8 @@ int64_t NuKeeperServer::getSessionID()
     nuraft::buffer_serializer bs(entry);
     bs.put_i64(0);
 
+    std::lock_guard lock(append_entries_mutex);
+
     auto result = raft_instance->append_entries({entry});
     if (!result->get_accepted())
         throw Exception(ErrorCodes::RAFT_ERROR, "Cannot send session_id request to RAFT");
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index f5f52802025..c1f32c67166 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -37,6 +37,8 @@ private:
 
     TestKeeperStorage::ResponsesForSessions readZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer);
 
+    std::mutex append_entries_mutex;
+
 public:
     NuKeeperServer(int server_id_, const std::string & hostname_, int port_);
 
diff --git a/src/Coordination/TestKeeperStorageDispatcher.cpp b/src/Coordination/TestKeeperStorageDispatcher.cpp
index d5682e1688b..d9f9dfd30eb 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.cpp
+++ b/src/Coordination/TestKeeperStorageDispatcher.cpp
@@ -11,6 +11,11 @@ namespace ErrorCodes
     extern const int TIMEOUT_EXCEEDED;
 }
 
+TestKeeperStorageDispatcher::TestKeeperStorageDispatcher()
+    : log(&Poco::Logger::get("TestKeeperDispatcher"))
+{
+}
+
 void TestKeeperStorageDispatcher::processingThread()
 {
     setThreadName("TestKeeperSProc");
@@ -101,6 +106,7 @@ namespace
 
 void TestKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration & config)
 {
+    LOG_DEBUG(log, "Initializing storage dispatcher");
     int myid = config.getInt("test_keeper_server.server_id");
     std::string myhostname;
     int myport;
@@ -134,26 +140,39 @@ void TestKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigura
     }
 
     server = std::make_unique<NuKeeperServer>(myid, myhostname, myport);
-    server->startup();
-    if (shouldBuildQuorum(myid, my_priority, my_can_become_leader, server_configs))
+    try
     {
-        for (const auto & [id, hostname, port, can_become_leader, priority] : server_configs)
+        server->startup();
+        if (shouldBuildQuorum(myid, my_priority, my_can_become_leader, server_configs))
         {
-            do
+            for (const auto & [id, hostname, port, can_become_leader, priority] : server_configs)
             {
-                server->addServer(id, hostname + ":" + std::to_string(port), can_become_leader, priority);
+                LOG_DEBUG(log, "Adding server with id {} ({}:{})", id, hostname, port);
+                do
+                {
+                    server->addServer(id, hostname + ":" + std::to_string(port), can_become_leader, priority);
+                }
+                while (!server->waitForServer(id));
+
+                LOG_DEBUG(log, "Server with id {} ({}:{}) added to cluster", id, hostname, port);
             }
-            while (!server->waitForServer(id));
+        }
+        else
+        {
+            LOG_DEBUG(log, "Waiting for {} servers to build cluster", ids.size());
+            server->waitForServers(ids);
+            server->waitForCatchUp();
         }
     }
-    else
+    catch (...)
     {
-        server->waitForServers(ids);
-        server->waitForCatchUp();
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+        throw;
     }
 
     processing_thread = ThreadFromGlobalPool([this] { processingThread(); });
 
+    LOG_DEBUG(log, "Dispatcher initialized");
 }
 
 void TestKeeperStorageDispatcher::shutdown()
@@ -166,6 +185,7 @@ void TestKeeperStorageDispatcher::shutdown()
             if (shutdown_called)
                 return;
 
+            LOG_DEBUG(log, "Shutting down storage dispatcher");
             shutdown_called = true;
 
             if (processing_thread.joinable())
@@ -189,6 +209,8 @@ void TestKeeperStorageDispatcher::shutdown()
     {
         tryLogCurrentException(__PRETTY_FUNCTION__);
     }
+
+    LOG_DEBUG(log, "Dispatcher shut down");
 }
 
 TestKeeperStorageDispatcher::~TestKeeperStorageDispatcher()
diff --git a/src/Coordination/TestKeeperStorageDispatcher.h b/src/Coordination/TestKeeperStorageDispatcher.h
index a67a0c1fa3a..7ee88c8e940 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.h
+++ b/src/Coordination/TestKeeperStorageDispatcher.h
@@ -5,6 +5,7 @@
 #include <functional>
 #include <Coordination/NuKeeperServer.h>
 #include <Poco/Util/AbstractConfiguration.h>
+#include <common/logger_useful.h>
 
 namespace DB
 {
@@ -30,14 +31,15 @@ private:
     ThreadFromGlobalPool processing_thread;
 
     std::unique_ptr<NuKeeperServer> server;
-    std::mutex session_id_mutex;
+
+    Poco::Logger * log;
 
 private:
     void processingThread();
     void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
 
 public:
-    TestKeeperStorageDispatcher() = default;
+    TestKeeperStorageDispatcher();
 
     void initialize(const Poco::Util::AbstractConfiguration & config);
 
@@ -59,7 +61,6 @@ public:
 
     int64_t getSessionID()
     {
-        std::lock_guard lock(session_id_mutex);
         return server->getSessionID();
     }
 
diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp
index 532c0723e69..bf7cb39d747 100644
--- a/src/Server/TestKeeperTCPHandler.cpp
+++ b/src/Server/TestKeeperTCPHandler.cpp
@@ -328,6 +328,7 @@ void TestKeeperTCPHandler::runImpl()
         catch (const Exception & e)
         {
             LOG_WARNING(log, "Cannot receive session id {}", e.displayText());
+            sendHandshake(false);
             return;
 
         }
@@ -336,6 +337,7 @@ void TestKeeperTCPHandler::runImpl()
     }
     else
     {
+        LOG_WARNING(log, "Ignoring user request, because no alive leader exist");
         sendHandshake(false);
         return;
     }

From 0fb7bc6cbe9cb73fb1aa99adc254f5611884d33f Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 1 Feb 2021 15:10:55 +0300
Subject: [PATCH 0483/1238] clang-tidy is very smart

---
 src/Coordination/NuKeeperServer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 014b2761f37..99af40154ca 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -31,7 +31,7 @@ NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, in
 
 void NuKeeperServer::addServer(int server_id_, const std::string & server_uri_, bool can_become_leader_, int32_t priority)
 {
-    nuraft::srv_config config(server_id_, 0, server_uri_, "", /* follower= */ !can_become_leader_, priority);
+    nuraft::srv_config config(server_id_, 0, server_uri_, "", /* learner = */ !can_become_leader_, priority);
     auto ret1 = raft_instance->add_srv(config);
     auto code = ret1->get_result_code();
     if (code == nuraft::cmd_result_code::TIMEOUT

From accf4d262e904aca42d8f8ffdb2f864e3cfe2b94 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Mon, 1 Feb 2021 15:40:57 +0300
Subject: [PATCH 0484/1238] fix logical error

---
 .../Formats/Impl/ValuesBlockInputFormat.cpp   | 10 +++++
 .../01179_insert_values_semicolon.expect      | 39 +++++++++++++++++++
 .../01179_insert_values_semicolon.reference   |  0
 3 files changed, 49 insertions(+)
 create mode 100755 tests/queries/0_stateless/01179_insert_values_semicolon.expect
 create mode 100644 tests/queries/0_stateless/01179_insert_values_semicolon.reference

diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
index c3b753e7261..34a4a98f16b 100644
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
@@ -24,6 +24,7 @@ namespace ErrorCodes
     extern const int TYPE_MISMATCH;
     extern const int SUPPORT_IS_DISABLED;
     extern const int ARGUMENT_OUT_OF_BOUND;
+    extern const int CANNOT_READ_ALL_DATA;
 }
 
 
@@ -412,6 +413,15 @@ void ValuesBlockInputFormat::readPrefix()
 
 void ValuesBlockInputFormat::readSuffix()
 {
+    if (!buf.eof() && *buf.position() == ';')
+    {
+        ++buf.position();
+        skipWhitespaceIfAny(buf);
+        if (buf.hasUnreadData())
+            throw Exception("Cannot read data after semicolon", ErrorCodes::CANNOT_READ_ALL_DATA);
+        return;
+    }
+
     if (buf.hasUnreadData())
         throw Exception("Unread data in PeekableReadBuffer will be lost. Most likely it's a bug.", ErrorCodes::LOGICAL_ERROR);
 }
diff --git a/tests/queries/0_stateless/01179_insert_values_semicolon.expect b/tests/queries/0_stateless/01179_insert_values_semicolon.expect
new file mode 100755
index 00000000000..c832be72c10
--- /dev/null
+++ b/tests/queries/0_stateless/01179_insert_values_semicolon.expect
@@ -0,0 +1,39 @@
+#!/usr/bin/expect -f
+
+log_user 0
+set timeout 5
+match_max 100000
+# A default timeout action is to do nothing, change it to fail
+expect_after {
+    timeout {
+        exit 1
+    }
+}
+
+set basedir [file dirname $argv0]
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT"
+expect ":) "
+
+send -- "DROP TABLE IF EXISTS test_01179\r"
+expect "Ok."
+
+send -- "CREATE TABLE test_01179 (date DateTime) ENGINE=Memory()\r"
+expect "Ok."
+
+send -- "INSERT INTO test_01179 values ('2020-01-01')\r"
+expect "Ok."
+
+send -- "INSERT INTO test_01179 values ('2020-01-01'); \r"
+expect "Ok."
+
+send -- "INSERT INTO test_01179 values ('2020-01-01'); (1) \r"
+expect "Cannot read data after semicolon"
+
+send -- "SELECT date, count() FROM test_01179 GROUP BY date FORMAT TSV\r"
+expect "2020-01-01 00:00:00\t3"
+
+send -- "DROP TABLE test_01179\r"
+expect "Ok."
+
+send -- "\4"
+expect eof
diff --git a/tests/queries/0_stateless/01179_insert_values_semicolon.reference b/tests/queries/0_stateless/01179_insert_values_semicolon.reference
new file mode 100644
index 00000000000..e69de29bb2d

From 57c9b6c864d10909e74cd02e9ba4c90cd4487339 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 1 Feb 2021 16:18:17 +0300
Subject: [PATCH 0485/1238] Fix build without nuraft

---
 cmake/find/nuraft.cmake                       |  6 +--
 programs/server/Server.cpp                    | 42 +++++++++++--------
 src/CMakeLists.txt                            |  5 ++-
 .../TestKeeperStorageDispatcher.h             | 17 +++++++-
 src/Coordination/tests/gtest_for_build.cpp    | 12 +++++-
 src/Core/config_core.h.in                     |  1 +
 src/Interpreters/Context.cpp                  | 10 +++++
 src/Interpreters/Context.h                    |  5 ++-
 src/Server/TestKeeperTCPHandler.cpp           |  7 ++++
 src/Server/TestKeeperTCPHandler.h             |  8 ++++
 src/Server/TestKeeperTCPHandlerFactory.h      |  1 +
 11 files changed, 87 insertions(+), 27 deletions(-)

diff --git a/cmake/find/nuraft.cmake b/cmake/find/nuraft.cmake
index d31fe9c1de8..bcc656de129 100644
--- a/cmake/find/nuraft.cmake
+++ b/cmake/find/nuraft.cmake
@@ -1,6 +1,6 @@
 option(ENABLE_NURAFT "Enable NuRaft" ${ENABLE_LIBRARIES})
 
-if (NOT ENABLE_NURAFT)
+ if (NOT ENABLE_NURAFT)
     return()
 endif()
 
@@ -11,7 +11,7 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/NuRaft/CMakeLists.txt")
     return()
 endif ()
 
-if (NOT OS_FREEBSD)
+if (NOT OS_FREEBSD AND NOT OS_DARWIN)
     set (USE_NURAFT 1)
     set (NURAFT_LIBRARY nuraft)
 
@@ -20,5 +20,5 @@ if (NOT OS_FREEBSD)
     message (STATUS "Using NuRaft=${USE_NURAFT}: ${NURAFT_INCLUDE_DIR} : ${NURAFT_LIBRARY}")
 else()
     set (USE_NURAFT 0)
-    message (STATUS "Using internal NuRaft library on FreeBSD is not supported")
+    message (STATUS "Using internal NuRaft library on FreeBSD and Darwin is not supported")
 endif()
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index fefabd8be71..801e8f2122b 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -59,7 +59,6 @@
 #include <Disks/registerDisks.h>
 #include <Common/Config/ConfigReloader.h>
 #include <Server/HTTPHandlerFactory.h>
-#include <Server/TestKeeperTCPHandlerFactory.h>
 #include "MetricsTransmitter.h"
 #include <Common/StatusFile.h>
 #include <Server/TCPHandlerFactory.h>
@@ -94,6 +93,9 @@
 #   include <Server/GRPCServer.h>
 #endif
 
+#if USE_NURAFT
+#   include <Server/TestKeeperTCPHandlerFactory.h>
+#endif
 
 namespace CurrentMetrics
 {
@@ -844,27 +846,31 @@ int Server::main(const std::vector<std::string> & /*args*/)
 
     if (config().has("test_keeper_server"))
     {
+#if USE_NURAFT
         /// Initialize test keeper RAFT. Do nothing if no test_keeper_server in config.
         global_context->initializeTestKeeperStorageDispatcher();
-    }
-
-    for (const auto & listen_host : listen_hosts)
-    {
-        /// TCP TestKeeper
-        const char * port_name = "test_keeper_server.tcp_port";
-        createServer(listen_host, port_name, listen_try, [&](UInt16 port)
+        for (const auto & listen_host : listen_hosts)
         {
-            Poco::Net::ServerSocket socket;
-            auto address = socketBindListen(socket, listen_host, port);
-            socket.setReceiveTimeout(settings.receive_timeout);
-            socket.setSendTimeout(settings.send_timeout);
-            servers_to_start_before_tables->emplace_back(
-                port_name,
-                std::make_unique<Poco::Net::TCPServer>(
-                    new TestKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
+            /// TCP TestKeeper
+            const char * port_name = "test_keeper_server.tcp_port";
+            createServer(listen_host, port_name, listen_try, [&](UInt16 port)
+            {
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(socket, listen_host, port);
+                socket.setReceiveTimeout(settings.receive_timeout);
+                socket.setSendTimeout(settings.send_timeout);
+                servers_to_start_before_tables->emplace_back(
+                    port_name,
+                    std::make_unique<Poco::Net::TCPServer>(
+                        new TestKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
+
+                LOG_INFO(log, "Listening for connections to fake zookeeper (tcp): {}", address.toString());
+            });
+        }
+#else
+        throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination.");
+#endif
 
-            LOG_INFO(log, "Listening for connections to fake zookeeper (tcp): {}", address.toString());
-        });
     }
 
     for (auto & server : *servers_to_start_before_tables)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 34c437ebde6..13703a5cd55 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -192,7 +192,10 @@ add_object_library(clickhouse_processors_merges Processors/Merges)
 add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms)
 add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)
 add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations)
-add_object_library(clickhouse_coordination Coordination)
+
+if (USE_NURAFT)
+    add_object_library(clickhouse_coordination Coordination)
+endif()
 
 set (DBMS_COMMON_LIBRARIES)
 # libgcc_s does not provide an implementation of an atomics library. Instead,
diff --git a/src/Coordination/TestKeeperStorageDispatcher.h b/src/Coordination/TestKeeperStorageDispatcher.h
index 7ee88c8e940..ddb90abb88a 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.h
+++ b/src/Coordination/TestKeeperStorageDispatcher.h
@@ -1,11 +1,20 @@
 #pragma once
 
+#if !defined(ARCADIA_BUILD)
+#    include <Common/config.h>
+#    include "config_core.h"
+#endif
+
+#if USE_NURAFT
+
 #include <Common/ThreadPool.h>
 #include <Common/ConcurrentBoundedQueue.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Common/Exception.h>
+#include <common/logger_useful.h>
 #include <functional>
 #include <Coordination/NuKeeperServer.h>
-#include <Poco/Util/AbstractConfiguration.h>
-#include <common/logger_useful.h>
+
 
 namespace DB
 {
@@ -14,6 +23,7 @@ using ZooKeeperResponseCallback = std::function<void(const Coordination::ZooKeep
 
 class TestKeeperStorageDispatcher
 {
+
 private:
     Poco::Timespan operation_timeout{0, Coordination::DEFAULT_OPERATION_TIMEOUT_MS * 1000};
 
@@ -67,6 +77,9 @@ public:
     void registerSession(int64_t session_id, ZooKeeperResponseCallback callback);
     /// Call if we don't need any responses for this session no more (session was expired)
     void finishSession(int64_t session_id);
+
 };
 
 }
+
+#endif
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index b0fcec7e10d..d69f2c18bd4 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -1,5 +1,12 @@
 #include <gtest/gtest.h>
 
+#if !defined(ARCADIA_BUILD)
+#    include <Common/config.h>
+#    include "config_core.h"
+#endif
+
+#if USE_NURAFT
+
 #include <Coordination/InMemoryLogStore.h>
 #include <Coordination/InMemoryStateManager.h>
 #include <Coordination/TestKeeperStorageSerializer.h>
@@ -454,5 +461,8 @@ TEST(CoordinationTest, TestNuKeeperRaft)
     s4.launcher.shutdown(5);
 }
 
-#  endif
+# endif
+
+#endif
+
 #endif
diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in
index 6c7a35abd7c..666ef32efdf 100644
--- a/src/Core/config_core.h.in
+++ b/src/Core/config_core.h.in
@@ -13,3 +13,4 @@
 #cmakedefine01 USE_LDAP
 #cmakedefine01 USE_ROCKSDB
 #cmakedefine01 USE_LIBPQXX
+#cmakedefine01 USE_NURAFT
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index adeb9b5862c..fe1b6a8a32e 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -304,8 +304,10 @@ struct ContextShared
     mutable zkutil::ZooKeeperPtr zookeeper;                 /// Client for ZooKeeper.
     ConfigurationPtr zookeeper_config;                      /// Stores zookeeper configs
 
+#if USE_NURAFT
     mutable std::mutex test_keeper_storage_dispatcher_mutex;
     mutable std::shared_ptr<TestKeeperStorageDispatcher> test_keeper_storage_dispatcher;
+#endif
     mutable std::mutex auxiliary_zookeepers_mutex;
     mutable std::map<String, zkutil::ZooKeeperPtr> auxiliary_zookeepers;    /// Map for auxiliary ZooKeeper clients.
     ConfigurationPtr auxiliary_zookeepers_config;           /// Stores auxiliary zookeepers configs
@@ -1579,8 +1581,10 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const
     return shared->zookeeper;
 }
 
+
 void Context::initializeTestKeeperStorageDispatcher() const
 {
+#if USE_NURAFT
     std::lock_guard lock(shared->test_keeper_storage_dispatcher_mutex);
 
     if (shared->test_keeper_storage_dispatcher)
@@ -1592,8 +1596,10 @@ void Context::initializeTestKeeperStorageDispatcher() const
         shared->test_keeper_storage_dispatcher = std::make_shared<TestKeeperStorageDispatcher>();
         shared->test_keeper_storage_dispatcher->initialize(config);
     }
+#endif
 }
 
+#if USE_NURAFT
 std::shared_ptr<TestKeeperStorageDispatcher> & Context::getTestKeeperStorageDispatcher() const
 {
     std::lock_guard lock(shared->test_keeper_storage_dispatcher_mutex);
@@ -1602,17 +1608,21 @@ std::shared_ptr<TestKeeperStorageDispatcher> & Context::getTestKeeperStorageDisp
 
     return shared->test_keeper_storage_dispatcher;
 }
+#endif
 
 void Context::shutdownTestKeeperStorageDispatcher() const
 {
+#if USE_NURAFT
     std::lock_guard lock(shared->test_keeper_storage_dispatcher_mutex);
     if (shared->test_keeper_storage_dispatcher)
     {
         shared->test_keeper_storage_dispatcher->shutdown();
         shared->test_keeper_storage_dispatcher.reset();
     }
+#endif
 }
 
+
 zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const
 {
     std::lock_guard lock(shared->auxiliary_zookeepers_mutex);
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index c8a71244164..3c78973b21a 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -573,9 +573,10 @@ public:
     /// Same as above but return a zookeeper connection from auxiliary_zookeepers configuration entry.
     std::shared_ptr<zkutil::ZooKeeper> getAuxiliaryZooKeeper(const String & name) const;
 
-
-    void initializeTestKeeperStorageDispatcher() const;
+#if USE_NURAFT
     std::shared_ptr<TestKeeperStorageDispatcher> & getTestKeeperStorageDispatcher() const;
+#endif
+    void initializeTestKeeperStorageDispatcher() const;
     void shutdownTestKeeperStorageDispatcher() const;
 
     /// Set auxiliary zookeepers configuration at server starting or configuration reloading.
diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/TestKeeperTCPHandler.cpp
index bf7cb39d747..17f2ec547c9 100644
--- a/src/Server/TestKeeperTCPHandler.cpp
+++ b/src/Server/TestKeeperTCPHandler.cpp
@@ -1,4 +1,7 @@
 #include <Server/TestKeeperTCPHandler.h>
+
+#if USE_NURAFT
+
 #include <Common/ZooKeeper/ZooKeeperIO.h>
 #include <Core/Types.h>
 #include <IO/WriteBufferFromPocoSocket.h>
@@ -22,9 +25,11 @@
     #include <poll.h>
 #endif
 
+
 namespace DB
 {
 
+
 namespace ErrorCodes
 {
     extern const int SYSTEM_ERROR;
@@ -454,3 +459,5 @@ std::pair<Coordination::OpNum, Coordination::XID> TestKeeperTCPHandler::receiveR
 }
 
 }
+
+#endif
diff --git a/src/Server/TestKeeperTCPHandler.h b/src/Server/TestKeeperTCPHandler.h
index 53132a2b491..09543b5a888 100644
--- a/src/Server/TestKeeperTCPHandler.h
+++ b/src/Server/TestKeeperTCPHandler.h
@@ -1,5 +1,12 @@
 #pragma once
 
+#if !defined(ARCADIA_BUILD)
+#    include <Common/config.h>
+#    include "config_core.h"
+#endif
+
+#if USE_NURAFT
+
 #include <Poco/Net/TCPServerConnection.h>
 #include "IServer.h"
 #include <Common/Stopwatch.h>
@@ -53,3 +60,4 @@ private:
 };
 
 }
+#endif
diff --git a/src/Server/TestKeeperTCPHandlerFactory.h b/src/Server/TestKeeperTCPHandlerFactory.h
index ebf91aa31d4..a5bf6be8c8a 100644
--- a/src/Server/TestKeeperTCPHandlerFactory.h
+++ b/src/Server/TestKeeperTCPHandlerFactory.h
@@ -1,4 +1,5 @@
 #pragma once
+
 #include <Server/TestKeeperTCPHandler.h>
 #include <Poco/Net/TCPServerConnectionFactory.h>
 #include <Poco/Net/NetException.h>

From 365bf65f5a8223dba319c86182ecb20236b611a0 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 1 Feb 2021 17:14:59 +0300
Subject: [PATCH 0486/1238] Fix install script

---
 programs/server/Server.cpp                    |  18 +-
 programs/server/config.d/test_keeper_port.xml |   1 -
 src/Coordination/NuKeeperServer.cpp           |  24 +--
 src/Coordination/NuKeeperServer.h             |   8 +-
 src/Coordination/NuKeeperStateMachine.cpp     |  18 +-
 src/Coordination/NuKeeperStateMachine.h       |  12 +-
 ...tKeeperStorage.cpp => NuKeeperStorage.cpp} | 156 +++++++++---------
 ...{TestKeeperStorage.h => NuKeeperStorage.h} |   8 +-
 ...cher.cpp => NuKeeperStorageDispatcher.cpp} |  48 +++---
 ...spatcher.h => NuKeeperStorageDispatcher.h} |   8 +-
 ...izer.cpp => NuKeeperStorageSerializer.cpp} |  12 +-
 src/Coordination/NuKeeperStorageSerializer.h  |  17 ++
 .../TestKeeperStorageSerializer.h             |  17 --
 src/Coordination/tests/gtest_for_build.cpp    |  18 +-
 src/Coordination/ya.make                      |   6 +-
 src/Interpreters/Context.cpp                  |  40 ++---
 src/Interpreters/Context.h                    |   8 +-
 ...rTCPHandler.cpp => NuKeeperTCPHandler.cpp} |  36 ++--
 ...eeperTCPHandler.h => NuKeeperTCPHandler.h} |   8 +-
 ...rFactory.h => NuKeeperTCPHandlerFactory.h} |  12 +-
 src/Server/ya.make                            |   2 +-
 ...est_keeper_port.xml => nu_keeper_port.xml} |   4 +-
 tests/config/install.sh                       |   2 +-
 .../configs/enable_test_keeper.xml            |   4 +-
 .../configs/enable_test_keeper1.xml           |   4 +-
 .../configs/enable_test_keeper2.xml           |   4 +-
 .../configs/enable_test_keeper3.xml           |   4 +-
 27 files changed, 249 insertions(+), 250 deletions(-)
 delete mode 120000 programs/server/config.d/test_keeper_port.xml
 rename src/Coordination/{TestKeeperStorage.cpp => NuKeeperStorage.cpp} (75%)
 rename src/Coordination/{TestKeeperStorage.h => NuKeeperStorage.h} (92%)
 rename src/Coordination/{TestKeeperStorageDispatcher.cpp => NuKeeperStorageDispatcher.cpp} (76%)
 rename src/Coordination/{TestKeeperStorageDispatcher.h => NuKeeperStorageDispatcher.h} (90%)
 rename src/Coordination/{TestKeeperStorageSerializer.cpp => NuKeeperStorageSerializer.cpp} (84%)
 create mode 100644 src/Coordination/NuKeeperStorageSerializer.h
 delete mode 100644 src/Coordination/TestKeeperStorageSerializer.h
 rename src/Server/{TestKeeperTCPHandler.cpp => NuKeeperTCPHandler.cpp} (92%)
 rename src/Server/{TestKeeperTCPHandler.h => NuKeeperTCPHandler.h} (83%)
 rename src/Server/{TestKeeperTCPHandlerFactory.h => NuKeeperTCPHandlerFactory.h} (68%)
 rename tests/config/config.d/{test_keeper_port.xml => nu_keeper_port.xml} (88%)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 801e8f2122b..fb58e85d813 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -94,7 +94,7 @@
 #endif
 
 #if USE_NURAFT
-#   include <Server/TestKeeperTCPHandlerFactory.h>
+#   include <Server/NuKeeperTCPHandlerFactory.h>
 #endif
 
 namespace CurrentMetrics
@@ -844,15 +844,15 @@ int Server::main(const std::vector<std::string> & /*args*/)
         listen_try = true;
     }
 
-    if (config().has("test_keeper_server"))
+    if (config().has("nu_keeper_server"))
     {
 #if USE_NURAFT
-        /// Initialize test keeper RAFT. Do nothing if no test_keeper_server in config.
-        global_context->initializeTestKeeperStorageDispatcher();
+        /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config.
+        global_context->initializeNuKeeperStorageDispatcher();
         for (const auto & listen_host : listen_hosts)
         {
-            /// TCP TestKeeper
-            const char * port_name = "test_keeper_server.tcp_port";
+            /// TCP NuKeeper
+            const char * port_name = "nu_keeper_server.tcp_port";
             createServer(listen_host, port_name, listen_try, [&](UInt16 port)
             {
                 Poco::Net::ServerSocket socket;
@@ -862,9 +862,9 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 servers_to_start_before_tables->emplace_back(
                     port_name,
                     std::make_unique<Poco::Net::TCPServer>(
-                        new TestKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
+                        new NuKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
 
-                LOG_INFO(log, "Listening for connections to fake zookeeper (tcp): {}", address.toString());
+                LOG_INFO(log, "Listening for connections to NuKeeper (tcp): {}", address.toString());
             });
         }
 #else
@@ -911,7 +911,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
             else
                 LOG_INFO(log, "Closed connections to servers for tables.");
 
-            global_context->shutdownTestKeeperStorageDispatcher();
+            global_context->shutdownNuKeeperStorageDispatcher();
         }
 
         /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
diff --git a/programs/server/config.d/test_keeper_port.xml b/programs/server/config.d/test_keeper_port.xml
deleted file mode 120000
index f3f721caae0..00000000000
--- a/programs/server/config.d/test_keeper_port.xml
+++ /dev/null
@@ -1 +0,0 @@
-../../../tests/config/config.d/test_keeper_port.xml
\ No newline at end of file
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 99af40154ca..bb74ea19aa7 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -46,8 +46,8 @@ void NuKeeperServer::startup()
 {
     nuraft::raft_params params;
     params.heart_beat_interval_ = 1000;
-    params.election_timeout_lower_bound_ = 3000;
-    params.election_timeout_upper_bound_ = 6000;
+    params.election_timeout_lower_bound_ = 500;
+    params.election_timeout_upper_bound_ = 1000;
     params.reserved_log_items_ = 5000;
     params.snapshot_distance_ = 5000;
     params.client_req_timeout_ = 10000;
@@ -75,9 +75,9 @@ void NuKeeperServer::startup()
     throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot start RAFT server within startup timeout");
 }
 
-TestKeeperStorage::ResponsesForSessions NuKeeperServer::shutdown(const TestKeeperStorage::RequestsForSessions & expired_requests)
+NuKeeperStorage::ResponsesForSessions NuKeeperServer::shutdown(const NuKeeperStorage::RequestsForSessions & expired_requests)
 {
-    TestKeeperStorage::ResponsesForSessions responses;
+    NuKeeperStorage::ResponsesForSessions responses;
     if (isLeader())
     {
         try
@@ -108,9 +108,9 @@ nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coord
 
 }
 
-TestKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer)
+NuKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer)
 {
-    DB::TestKeeperStorage::ResponsesForSessions results;
+    DB::NuKeeperStorage::ResponsesForSessions results;
     DB::ReadBufferFromNuraftBuffer buf(buffer);
 
     while (!buf.eof())
@@ -153,12 +153,12 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(n
         response->zxid = zxid;
         response->error = err;
 
-        results.push_back(DB::TestKeeperStorage::ResponseForSession{session_id, response});
+        results.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
     }
     return results;
 }
 
-TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKeeperStorage::RequestsForSessions & requests)
+NuKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const NuKeeperStorage::RequestsForSessions & requests)
 {
     if (isLeaderAlive() && requests.size() == 1 && requests[0].request->isReadRequest())
     {
@@ -178,28 +178,28 @@ TestKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const TestKe
         auto result = raft_instance->append_entries(entries);
         if (!result->get_accepted())
         {
-            TestKeeperStorage::ResponsesForSessions responses;
+            NuKeeperStorage::ResponsesForSessions responses;
             for (const auto & [session_id, request] : requests)
             {
                 auto response = request->makeResponse();
                 response->xid = request->xid;
                 response->zxid = 0; /// FIXME what we can do with it?
                 response->error = Coordination::Error::ZSESSIONEXPIRED;
-                responses.push_back(DB::TestKeeperStorage::ResponseForSession{session_id, response});
+                responses.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
             }
             return responses;
         }
 
         if (result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
         {
-            TestKeeperStorage::ResponsesForSessions responses;
+            NuKeeperStorage::ResponsesForSessions responses;
             for (const auto & [session_id, request] : requests)
             {
                 auto response = request->makeResponse();
                 response->xid = request->xid;
                 response->zxid = 0; /// FIXME what we can do with it?
                 response->error = Coordination::Error::ZOPERATIONTIMEOUT;
-                responses.push_back(DB::TestKeeperStorage::ResponseForSession{session_id, response});
+                responses.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
             }
             return responses;
         }
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index c1f32c67166..352836dfc27 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -4,7 +4,7 @@
 #include <Coordination/InMemoryLogStore.h>
 #include <Coordination/InMemoryStateManager.h>
 #include <Coordination/NuKeeperStateMachine.h>
-#include <Coordination/TestKeeperStorage.h>
+#include <Coordination/NuKeeperStorage.h>
 #include <unordered_map>
 
 namespace DB
@@ -35,7 +35,7 @@ private:
 
     SessionIDOps ops_mapping;
 
-    TestKeeperStorage::ResponsesForSessions readZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer);
+    NuKeeperStorage::ResponsesForSessions readZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer);
 
     std::mutex append_entries_mutex;
 
@@ -44,7 +44,7 @@ public:
 
     void startup();
 
-    TestKeeperStorage::ResponsesForSessions putRequests(const TestKeeperStorage::RequestsForSessions & requests);
+    NuKeeperStorage::ResponsesForSessions putRequests(const NuKeeperStorage::RequestsForSessions & requests);
 
     int64_t getSessionID();
 
@@ -58,7 +58,7 @@ public:
     void waitForServers(const std::vector<int32_t> & ids) const;
     void waitForCatchUp() const;
 
-    TestKeeperStorage::ResponsesForSessions shutdown(const TestKeeperStorage::RequestsForSessions & expired_requests);
+    NuKeeperStorage::ResponsesForSessions shutdown(const NuKeeperStorage::RequestsForSessions & expired_requests);
 };
 
 }
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 9f4572c02e0..b6521e1d648 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -3,17 +3,17 @@
 #include <Coordination/WriteBufferFromNuraftBuffer.h>
 #include <IO/ReadHelpers.h>
 #include <Common/ZooKeeper/ZooKeeperIO.h>
-#include <Coordination/TestKeeperStorageSerializer.h>
+#include <Coordination/NuKeeperStorageSerializer.h>
 
 namespace DB
 {
 
 static constexpr int MAX_SNAPSHOTS = 3;
 
-TestKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
+NuKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
 {
     ReadBufferFromNuraftBuffer buffer(data);
-    TestKeeperStorage::RequestForSession request_for_session;
+    NuKeeperStorage::RequestForSession request_for_session;
     readIntBinary(request_for_session.session_id, buffer);
 
     int32_t length;
@@ -31,7 +31,7 @@ TestKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
     return request_for_session;
 }
 
-nuraft::ptr<nuraft::buffer> writeResponses(TestKeeperStorage::ResponsesForSessions & responses)
+nuraft::ptr<nuraft::buffer> writeResponses(NuKeeperStorage::ResponsesForSessions & responses)
 {
     WriteBufferFromNuraftBuffer buffer;
     for (const auto & response_and_session : responses)
@@ -67,7 +67,7 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
     else
     {
         auto request_for_session = parseRequest(data);
-        TestKeeperStorage::ResponsesForSessions responses_for_sessions;
+        NuKeeperStorage::ResponsesForSessions responses_for_sessions;
         {
             std::lock_guard lock(storage_lock);
             responses_for_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id);
@@ -118,10 +118,10 @@ NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::readSnapshot(nura
 {
     nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
     nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
-    TestKeeperStorageSerializer serializer;
+    NuKeeperStorageSerializer serializer;
 
     ReadBufferFromNuraftBuffer reader(in);
-    TestKeeperStorage new_storage;
+    NuKeeperStorage new_storage;
     serializer.deserialize(new_storage, reader);
     return std::make_shared<StorageSnapshot>(ss, new_storage);
 }
@@ -129,7 +129,7 @@ NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::readSnapshot(nura
 
 void NuKeeperStateMachine::writeSnapshot(const NuKeeperStateMachine::StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out)
 {
-    TestKeeperStorageSerializer serializer;
+    NuKeeperStorageSerializer serializer;
 
     WriteBufferFromNuraftBuffer writer;
     serializer.serialize(snapshot->storage, writer);
@@ -223,7 +223,7 @@ int NuKeeperStateMachine::read_logical_snp_obj(
     return 0;
 }
 
-TestKeeperStorage::ResponsesForSessions NuKeeperStateMachine::processReadRequest(const TestKeeperStorage::RequestForSession & request_for_session)
+NuKeeperStorage::ResponsesForSessions NuKeeperStateMachine::processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session)
 {
     std::lock_guard lock(storage_lock);
     return storage.processRequest(request_for_session.request, request_for_session.session_id);
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index 7767f552cec..41c28caa76c 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Coordination/TestKeeperStorage.h>
+#include <Coordination/NuKeeperStorage.h>
 #include <libnuraft/nuraft.hxx>
 #include <common/logger_useful.h>
 
@@ -42,23 +42,23 @@ public:
         nuraft::ptr<nuraft::buffer> & data_out,
         bool & is_last_obj) override;
 
-    TestKeeperStorage & getStorage()
+    NuKeeperStorage & getStorage()
     {
         return storage;
     }
 
-    TestKeeperStorage::ResponsesForSessions processReadRequest(const TestKeeperStorage::RequestForSession & request_for_session);
+    NuKeeperStorage::ResponsesForSessions processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session);
 
 private:
     struct StorageSnapshot
     {
-        StorageSnapshot(const nuraft::ptr<nuraft::snapshot> & s, const TestKeeperStorage & storage_)
+        StorageSnapshot(const nuraft::ptr<nuraft::snapshot> & s, const NuKeeperStorage & storage_)
             : snapshot(s)
             , storage(storage_)
         {}
 
         nuraft::ptr<nuraft::snapshot> snapshot;
-        TestKeeperStorage storage;
+        NuKeeperStorage storage;
     };
 
     using StorageSnapshotPtr = std::shared_ptr<StorageSnapshot>;
@@ -69,7 +69,7 @@ private:
 
     static void writeSnapshot(const StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out);
 
-    TestKeeperStorage storage;
+    NuKeeperStorage storage;
     /// Mutex for snapshots
     std::mutex snapshots_lock;
 
diff --git a/src/Coordination/TestKeeperStorage.cpp b/src/Coordination/NuKeeperStorage.cpp
similarity index 75%
rename from src/Coordination/TestKeeperStorage.cpp
rename to src/Coordination/NuKeeperStorage.cpp
index ef72f5d4eaa..9a8b96d63a3 100644
--- a/src/Coordination/TestKeeperStorage.cpp
+++ b/src/Coordination/NuKeeperStorage.cpp
@@ -1,4 +1,4 @@
-#include <Coordination/TestKeeperStorage.h>
+#include <Coordination/NuKeeperStorage.h>
 #include <Common/ZooKeeper/IKeeper.h>
 #include <Common/setThreadName.h>
 #include <mutex>
@@ -31,9 +31,9 @@ static String baseName(const String & path)
     return path.substr(rslash_pos + 1);
 }
 
-static TestKeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches, Coordination::Event event_type)
+static NuKeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches, Coordination::Event event_type)
 {
-    TestKeeperStorage::ResponsesForSessions result;
+    NuKeeperStorage::ResponsesForSessions result;
     auto it = watches.find(path);
     if (it != watches.end())
     {
@@ -44,7 +44,7 @@ static TestKeeperStorage::ResponsesForSessions processWatchesImpl(const String &
         watch_response->type = event_type;
         watch_response->state = Coordination::State::CONNECTED;
         for (auto watcher_session : it->second)
-            result.push_back(TestKeeperStorage::ResponseForSession{watcher_session, watch_response});
+            result.push_back(NuKeeperStorage::ResponseForSession{watcher_session, watch_response});
 
         watches.erase(it);
     }
@@ -60,52 +60,52 @@ static TestKeeperStorage::ResponsesForSessions processWatchesImpl(const String &
         watch_list_response->type = Coordination::Event::CHILD;
         watch_list_response->state = Coordination::State::CONNECTED;
         for (auto watcher_session : it->second)
-            result.push_back(TestKeeperStorage::ResponseForSession{watcher_session, watch_list_response});
+            result.push_back(NuKeeperStorage::ResponseForSession{watcher_session, watch_list_response});
 
         list_watches.erase(it);
     }
     return result;
 }
 
-TestKeeperStorage::TestKeeperStorage()
+NuKeeperStorage::NuKeeperStorage()
 {
     container.emplace("/", Node());
 }
 
 using Undo = std::function<void()>;
 
-struct TestKeeperStorageRequest
+struct NuKeeperStorageRequest
 {
     Coordination::ZooKeeperRequestPtr zk_request;
 
-    explicit TestKeeperStorageRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
+    explicit NuKeeperStorageRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
         : zk_request(zk_request_)
     {}
-    virtual std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const = 0;
-    virtual TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & /*watches*/, TestKeeperStorage::Watches & /*list_watches*/) const { return {}; }
+    virtual std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const = 0;
+    virtual NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & /*watches*/, NuKeeperStorage::Watches & /*list_watches*/) const { return {}; }
 
-    virtual ~TestKeeperStorageRequest() = default;
+    virtual ~NuKeeperStorageRequest() = default;
 };
 
-struct TestKeeperStorageHeartbeatRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageHeartbeatRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & /* container */, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & /* container */, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
     {
         return {zk_request->makeResponse(), {}};
     }
 };
 
-struct TestKeeperStorageCreateRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
 
-    TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override
+    NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
     {
         return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED);
     }
 
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Undo undo;
@@ -130,7 +130,7 @@ struct TestKeeperStorageCreateRequest final : public TestKeeperStorageRequest
             }
             else
             {
-                TestKeeperStorage::Node created_node;
+                NuKeeperStorage::Node created_node;
                 created_node.seq_num = 0;
                 created_node.stat.czxid = zxid;
                 created_node.stat.mzxid = zxid;
@@ -185,10 +185,10 @@ struct TestKeeperStorageCreateRequest final : public TestKeeperStorageRequest
     }
 };
 
-struct TestKeeperStorageGetRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageGetRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperGetResponse & response = dynamic_cast<Coordination::ZooKeeperGetResponse &>(*response_ptr);
@@ -210,10 +210,10 @@ struct TestKeeperStorageGetRequest final : public TestKeeperStorageRequest
     }
 };
 
-struct TestKeeperStorageRemoveRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t session_id) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t session_id) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperRemoveResponse & response = dynamic_cast<Coordination::ZooKeeperRemoveResponse &>(*response_ptr);
@@ -260,16 +260,16 @@ struct TestKeeperStorageRemoveRequest final : public TestKeeperStorageRequest
         return { response_ptr, undo };
     }
 
-    TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override
+    NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
     {
         return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED);
     }
 };
 
-struct TestKeeperStorageExistsRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageExistsRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /* session_id */) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /* session_id */) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperExistsResponse & response = dynamic_cast<Coordination::ZooKeeperExistsResponse &>(*response_ptr);
@@ -290,10 +290,10 @@ struct TestKeeperStorageExistsRequest final : public TestKeeperStorageRequest
     }
 };
 
-struct TestKeeperStorageSetRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageSetRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t zxid, int64_t /* session_id */) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t zxid, int64_t /* session_id */) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperSetResponse & response = dynamic_cast<Coordination::ZooKeeperSetResponse &>(*response_ptr);
@@ -333,17 +333,17 @@ struct TestKeeperStorageSetRequest final : public TestKeeperStorageRequest
         return { response_ptr, undo };
     }
 
-    TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override
+    NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
     {
         return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED);
     }
 
 };
 
-struct TestKeeperStorageListRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageListRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperListResponse & response = dynamic_cast<Coordination::ZooKeeperListResponse &>(*response_ptr);
@@ -379,10 +379,10 @@ struct TestKeeperStorageListRequest final : public TestKeeperStorageRequest
     }
 };
 
-struct TestKeeperStorageCheckRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageCheckRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperCheckResponse & response = dynamic_cast<Coordination::ZooKeeperCheckResponse &>(*response_ptr);
@@ -405,11 +405,11 @@ struct TestKeeperStorageCheckRequest final : public TestKeeperStorageRequest
     }
 };
 
-struct TestKeeperStorageMultiRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageMultiRequest final : public NuKeeperStorageRequest
 {
-    std::vector<TestKeeperStorageRequestPtr> concrete_requests;
-    explicit TestKeeperStorageMultiRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
-        : TestKeeperStorageRequest(zk_request_)
+    std::vector<NuKeeperStorageRequestPtr> concrete_requests;
+    explicit NuKeeperStorageMultiRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
+        : NuKeeperStorageRequest(zk_request_)
     {
         Coordination::ZooKeeperMultiRequest & request = dynamic_cast<Coordination::ZooKeeperMultiRequest &>(*zk_request);
         concrete_requests.reserve(request.requests.size());
@@ -419,26 +419,26 @@ struct TestKeeperStorageMultiRequest final : public TestKeeperStorageRequest
             auto sub_zk_request = std::dynamic_pointer_cast<Coordination::ZooKeeperRequest>(sub_request);
             if (sub_zk_request->getOpNum() == Coordination::OpNum::Create)
             {
-                concrete_requests.push_back(std::make_shared<TestKeeperStorageCreateRequest>(sub_zk_request));
+                concrete_requests.push_back(std::make_shared<NuKeeperStorageCreateRequest>(sub_zk_request));
             }
             else if (sub_zk_request->getOpNum() == Coordination::OpNum::Remove)
             {
-                concrete_requests.push_back(std::make_shared<TestKeeperStorageRemoveRequest>(sub_zk_request));
+                concrete_requests.push_back(std::make_shared<NuKeeperStorageRemoveRequest>(sub_zk_request));
             }
             else if (sub_zk_request->getOpNum() == Coordination::OpNum::Set)
             {
-                concrete_requests.push_back(std::make_shared<TestKeeperStorageSetRequest>(sub_zk_request));
+                concrete_requests.push_back(std::make_shared<NuKeeperStorageSetRequest>(sub_zk_request));
             }
             else if (sub_zk_request->getOpNum() == Coordination::OpNum::Check)
             {
-                concrete_requests.push_back(std::make_shared<TestKeeperStorageCheckRequest>(sub_zk_request));
+                concrete_requests.push_back(std::make_shared<NuKeeperStorageCheckRequest>(sub_zk_request));
             }
             else
                 throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum());
         }
     }
 
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
     {
         Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
         Coordination::ZooKeeperMultiResponse & response = dynamic_cast<Coordination::ZooKeeperMultiResponse &>(*response_ptr);
@@ -491,9 +491,9 @@ struct TestKeeperStorageMultiRequest final : public TestKeeperStorageRequest
         }
     }
 
-    TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override
+    NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
     {
-        TestKeeperStorage::ResponsesForSessions result;
+        NuKeeperStorage::ResponsesForSessions result;
         for (const auto & generic_request : concrete_requests)
         {
             auto responses = generic_request->processWatches(watches, list_watches);
@@ -503,16 +503,16 @@ struct TestKeeperStorageMultiRequest final : public TestKeeperStorageRequest
     }
 };
 
-struct TestKeeperStorageCloseRequest final : public TestKeeperStorageRequest
+struct NuKeeperStorageCloseRequest final : public NuKeeperStorageRequest
 {
-    using TestKeeperStorageRequest::TestKeeperStorageRequest;
-    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container &, TestKeeperStorage::Ephemerals &, int64_t, int64_t) const override
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container &, NuKeeperStorage::Ephemerals &, int64_t, int64_t) const override
     {
         throw DB::Exception("Called process on close request", ErrorCodes::LOGICAL_ERROR);
     }
 };
 
-TestKeeperStorage::ResponsesForSessions TestKeeperStorage::finalize(const RequestsForSessions & expired_requests)
+NuKeeperStorage::ResponsesForSessions NuKeeperStorage::finalize(const RequestsForSessions & expired_requests)
 {
     if (finalized)
         throw DB::Exception("Testkeeper storage already finalized", ErrorCodes::LOGICAL_ERROR);
@@ -559,20 +559,20 @@ TestKeeperStorage::ResponsesForSessions TestKeeperStorage::finalize(const Reques
 }
 
 
-class TestKeeperWrapperFactory final : private boost::noncopyable
+class NuKeeperWrapperFactory final : private boost::noncopyable
 {
 
 public:
-    using Creator = std::function<TestKeeperStorageRequestPtr(const Coordination::ZooKeeperRequestPtr &)>;
+    using Creator = std::function<NuKeeperStorageRequestPtr(const Coordination::ZooKeeperRequestPtr &)>;
     using OpNumToRequest = std::unordered_map<Coordination::OpNum, Creator>;
 
-    static TestKeeperWrapperFactory & instance()
+    static NuKeeperWrapperFactory & instance()
     {
-        static TestKeeperWrapperFactory factory;
+        static NuKeeperWrapperFactory factory;
         return factory;
     }
 
-    TestKeeperStorageRequestPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const
+    NuKeeperStorageRequestPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const
     {
         auto it = op_num_to_request.find(zk_request->getOpNum());
         if (it == op_num_to_request.end())
@@ -589,36 +589,36 @@ public:
 
 private:
     OpNumToRequest op_num_to_request;
-    TestKeeperWrapperFactory();
+    NuKeeperWrapperFactory();
 };
 
 template<Coordination::OpNum num, typename RequestT>
-void registerTestKeeperRequestWrapper(TestKeeperWrapperFactory & factory)
+void registerNuKeeperRequestWrapper(NuKeeperWrapperFactory & factory)
 {
     factory.registerRequest(num, [] (const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared<RequestT>(zk_request); });
 }
 
 
-TestKeeperWrapperFactory::TestKeeperWrapperFactory()
+NuKeeperWrapperFactory::NuKeeperWrapperFactory()
 {
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Heartbeat, TestKeeperStorageHeartbeatRequest>(*this);
-    //registerTestKeeperRequestWrapper<Coordination::OpNum::Auth, TestKeeperStorageAuthRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Close, TestKeeperStorageCloseRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Create, TestKeeperStorageCreateRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Remove, TestKeeperStorageRemoveRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Exists, TestKeeperStorageExistsRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Get, TestKeeperStorageGetRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Set, TestKeeperStorageSetRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::List, TestKeeperStorageListRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::SimpleList, TestKeeperStorageListRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Check, TestKeeperStorageCheckRequest>(*this);
-    registerTestKeeperRequestWrapper<Coordination::OpNum::Multi, TestKeeperStorageMultiRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Heartbeat, NuKeeperStorageHeartbeatRequest>(*this);
+    //registerNuKeeperRequestWrapper<Coordination::OpNum::Auth, NuKeeperStorageAuthRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Close, NuKeeperStorageCloseRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Create, NuKeeperStorageCreateRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Remove, NuKeeperStorageRemoveRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Exists, NuKeeperStorageExistsRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Get, NuKeeperStorageGetRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Set, NuKeeperStorageSetRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::List, NuKeeperStorageListRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::SimpleList, NuKeeperStorageListRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Check, NuKeeperStorageCheckRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Multi, NuKeeperStorageMultiRequest>(*this);
 }
 
 
-TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id)
+NuKeeperStorage::ResponsesForSessions NuKeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id)
 {
-    TestKeeperStorage::ResponsesForSessions results;
+    NuKeeperStorage::ResponsesForSessions results;
     if (zk_request->getOpNum() == Coordination::OpNum::Close)
     {
         auto it = ephemerals.find(session_id);
@@ -643,7 +643,7 @@ TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const
     else
     {
 
-        TestKeeperStorageRequestPtr storage_request = TestKeeperWrapperFactory::instance().get(zk_request);
+        NuKeeperStorageRequestPtr storage_request = NuKeeperWrapperFactory::instance().get(zk_request);
         auto [response, _] = storage_request->process(container, ephemerals, zxid, session_id);
 
         if (zk_request->has_watch)
@@ -689,7 +689,7 @@ TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const
 }
 
 
-void TestKeeperStorage::clearDeadWatches(int64_t session_id)
+void NuKeeperStorage::clearDeadWatches(int64_t session_id)
 {
     auto watches_it = sessions_and_watchers.find(session_id);
     if (watches_it != sessions_and_watchers.end())
diff --git a/src/Coordination/TestKeeperStorage.h b/src/Coordination/NuKeeperStorage.h
similarity index 92%
rename from src/Coordination/TestKeeperStorage.h
rename to src/Coordination/NuKeeperStorage.h
index 6f70ff1c584..dce00391bce 100644
--- a/src/Coordination/TestKeeperStorage.h
+++ b/src/Coordination/NuKeeperStorage.h
@@ -12,11 +12,11 @@ namespace DB
 {
 
 using namespace DB;
-struct TestKeeperStorageRequest;
-using TestKeeperStorageRequestPtr = std::shared_ptr<TestKeeperStorageRequest>;
+struct NuKeeperStorageRequest;
+using NuKeeperStorageRequestPtr = std::shared_ptr<NuKeeperStorageRequest>;
 using ResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr &)>;
 
-class TestKeeperStorage
+class NuKeeperStorage
 {
 public:
     int64_t session_id_counter{0};
@@ -72,7 +72,7 @@ public:
     }
 
 public:
-    TestKeeperStorage();
+    NuKeeperStorage();
 
     int64_t getSessionID()
     {
diff --git a/src/Coordination/TestKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
similarity index 76%
rename from src/Coordination/TestKeeperStorageDispatcher.cpp
rename to src/Coordination/NuKeeperStorageDispatcher.cpp
index d9f9dfd30eb..c531939d6ee 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -1,4 +1,4 @@
-#include <Coordination/TestKeeperStorageDispatcher.h>
+#include <Coordination/NuKeeperStorageDispatcher.h>
 #include <Common/setThreadName.h>
 
 namespace DB
@@ -11,17 +11,17 @@ namespace ErrorCodes
     extern const int TIMEOUT_EXCEEDED;
 }
 
-TestKeeperStorageDispatcher::TestKeeperStorageDispatcher()
-    : log(&Poco::Logger::get("TestKeeperDispatcher"))
+NuKeeperStorageDispatcher::NuKeeperStorageDispatcher()
+    : log(&Poco::Logger::get("NuKeeperDispatcher"))
 {
 }
 
-void TestKeeperStorageDispatcher::processingThread()
+void NuKeeperStorageDispatcher::processingThread()
 {
-    setThreadName("TestKeeperSProc");
+    setThreadName("NuKeeperSProc");
     while (!shutdown_called)
     {
-        TestKeeperStorage::RequestForSession request;
+        NuKeeperStorage::RequestForSession request;
 
         UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds());
 
@@ -44,7 +44,7 @@ void TestKeeperStorageDispatcher::processingThread()
     }
 }
 
-void TestKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
+void NuKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
 {
     std::lock_guard lock(session_to_response_callback_mutex);
     auto session_writer = session_to_response_callback.find(session_id);
@@ -57,7 +57,7 @@ void TestKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordina
         session_to_response_callback.erase(session_writer);
 }
 
-bool TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
+bool NuKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
 {
 
     {
@@ -66,7 +66,7 @@ bool TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperReques
             return false;
     }
 
-    TestKeeperStorage::RequestForSession request_info;
+    NuKeeperStorage::RequestForSession request_info;
     request_info.request = request;
     request_info.session_id = session_id;
 
@@ -104,27 +104,27 @@ namespace
     }
 }
 
-void TestKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration & config)
+void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration & config)
 {
     LOG_DEBUG(log, "Initializing storage dispatcher");
-    int myid = config.getInt("test_keeper_server.server_id");
+    int myid = config.getInt("nu_keeper_server.server_id");
     std::string myhostname;
     int myport;
     int32_t my_priority = 1;
 
     Poco::Util::AbstractConfiguration::Keys keys;
-    config.keys("test_keeper_server.raft_configuration", keys);
+    config.keys("nu_keeper_server.raft_configuration", keys);
     bool my_can_become_leader = true;
 
     std::vector<std::tuple<int, std::string, int, bool, int32_t>> server_configs;
     std::vector<int32_t> ids;
     for (const auto & server_key : keys)
     {
-        int server_id = config.getInt("test_keeper_server.raft_configuration." + server_key + ".id");
-        std::string hostname = config.getString("test_keeper_server.raft_configuration." + server_key + ".hostname");
-        int port = config.getInt("test_keeper_server.raft_configuration." + server_key + ".port");
-        bool can_become_leader = config.getBool("test_keeper_server.raft_configuration." + server_key + ".can_become_leader", true);
-        int32_t priority = config.getInt("test_keeper_server.raft_configuration." + server_key + ".priority", 1);
+        int server_id = config.getInt("nu_keeper_server.raft_configuration." + server_key + ".id");
+        std::string hostname = config.getString("nu_keeper_server.raft_configuration." + server_key + ".hostname");
+        int port = config.getInt("nu_keeper_server.raft_configuration." + server_key + ".port");
+        bool can_become_leader = config.getBool("nu_keeper_server.raft_configuration." + server_key + ".can_become_leader", true);
+        int32_t priority = config.getInt("nu_keeper_server.raft_configuration." + server_key + ".priority", 1);
         if (server_id == myid)
         {
             myhostname = hostname;
@@ -175,7 +175,7 @@ void TestKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigura
     LOG_DEBUG(log, "Dispatcher initialized");
 }
 
-void TestKeeperStorageDispatcher::shutdown()
+void NuKeeperStorageDispatcher::shutdown()
 {
     try
     {
@@ -194,10 +194,10 @@ void TestKeeperStorageDispatcher::shutdown()
 
         if (server)
         {
-            TestKeeperStorage::RequestsForSessions expired_requests;
-            TestKeeperStorage::RequestForSession request;
+            NuKeeperStorage::RequestsForSessions expired_requests;
+            NuKeeperStorage::RequestForSession request;
             while (requests_queue.tryPop(request))
-                expired_requests.push_back(TestKeeperStorage::RequestForSession{request});
+                expired_requests.push_back(NuKeeperStorage::RequestForSession{request});
 
             auto expired_responses = server->shutdown(expired_requests);
 
@@ -213,19 +213,19 @@ void TestKeeperStorageDispatcher::shutdown()
     LOG_DEBUG(log, "Dispatcher shut down");
 }
 
-TestKeeperStorageDispatcher::~TestKeeperStorageDispatcher()
+NuKeeperStorageDispatcher::~NuKeeperStorageDispatcher()
 {
     shutdown();
 }
 
-void TestKeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback)
+void NuKeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback)
 {
     std::lock_guard lock(session_to_response_callback_mutex);
     if (!session_to_response_callback.try_emplace(session_id, callback).second)
         throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session with id {} already registered in dispatcher", session_id);
 }
 
-void TestKeeperStorageDispatcher::finishSession(int64_t session_id)
+void NuKeeperStorageDispatcher::finishSession(int64_t session_id)
 {
     std::lock_guard lock(session_to_response_callback_mutex);
     auto session_it = session_to_response_callback.find(session_id);
diff --git a/src/Coordination/TestKeeperStorageDispatcher.h b/src/Coordination/NuKeeperStorageDispatcher.h
similarity index 90%
rename from src/Coordination/TestKeeperStorageDispatcher.h
rename to src/Coordination/NuKeeperStorageDispatcher.h
index ddb90abb88a..c292cd99c4f 100644
--- a/src/Coordination/TestKeeperStorageDispatcher.h
+++ b/src/Coordination/NuKeeperStorageDispatcher.h
@@ -21,7 +21,7 @@ namespace DB
 
 using ZooKeeperResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr & response)>;
 
-class TestKeeperStorageDispatcher
+class NuKeeperStorageDispatcher
 {
 
 private:
@@ -30,7 +30,7 @@ private:
 
     std::mutex push_request_mutex;
 
-    using RequestsQueue = ConcurrentBoundedQueue<TestKeeperStorage::RequestForSession>;
+    using RequestsQueue = ConcurrentBoundedQueue<NuKeeperStorage::RequestForSession>;
     RequestsQueue requests_queue{1};
     std::atomic<bool> shutdown_called{false};
     using SessionToResponseCallback = std::unordered_map<int64_t, ZooKeeperResponseCallback>;
@@ -49,13 +49,13 @@ private:
     void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
 
 public:
-    TestKeeperStorageDispatcher();
+    NuKeeperStorageDispatcher();
 
     void initialize(const Poco::Util::AbstractConfiguration & config);
 
     void shutdown();
 
-    ~TestKeeperStorageDispatcher();
+    ~NuKeeperStorageDispatcher();
 
     bool putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
 
diff --git a/src/Coordination/TestKeeperStorageSerializer.cpp b/src/Coordination/NuKeeperStorageSerializer.cpp
similarity index 84%
rename from src/Coordination/TestKeeperStorageSerializer.cpp
rename to src/Coordination/NuKeeperStorageSerializer.cpp
index f6116d29104..298df45cde0 100644
--- a/src/Coordination/TestKeeperStorageSerializer.cpp
+++ b/src/Coordination/NuKeeperStorageSerializer.cpp
@@ -1,4 +1,4 @@
-#include <Coordination/TestKeeperStorageSerializer.h>
+#include <Coordination/NuKeeperStorageSerializer.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 #include <Common/ZooKeeper/ZooKeeperIO.h>
@@ -8,7 +8,7 @@ namespace DB
 
 namespace
 {
-    void writeNode(const TestKeeperStorage::Node & node, WriteBuffer & out)
+    void writeNode(const NuKeeperStorage::Node & node, WriteBuffer & out)
     {
         Coordination::write(node.data, out);
         Coordination::write(node.acls, out);
@@ -18,7 +18,7 @@ namespace
         Coordination::write(node.seq_num, out);
     }
 
-    void readNode(TestKeeperStorage::Node & node, ReadBuffer & in)
+    void readNode(NuKeeperStorage::Node & node, ReadBuffer & in)
     {
         Coordination::read(node.data, in);
         Coordination::read(node.acls, in);
@@ -29,7 +29,7 @@ namespace
     }
 }
 
-void TestKeeperStorageSerializer::serialize(const TestKeeperStorage & storage, WriteBuffer & out)
+void NuKeeperStorageSerializer::serialize(const NuKeeperStorage & storage, WriteBuffer & out)
 {
     Coordination::write(storage.zxid, out);
     Coordination::write(storage.session_id_counter, out);
@@ -49,7 +49,7 @@ void TestKeeperStorageSerializer::serialize(const TestKeeperStorage & storage, W
     }
 }
 
-void TestKeeperStorageSerializer::deserialize(TestKeeperStorage & storage, ReadBuffer & in)
+void NuKeeperStorageSerializer::deserialize(NuKeeperStorage & storage, ReadBuffer & in)
 {
     int64_t session_id_counter, zxid;
     Coordination::read(zxid, in);
@@ -63,7 +63,7 @@ void TestKeeperStorageSerializer::deserialize(TestKeeperStorage & storage, ReadB
     {
         std::string path;
         Coordination::read(path, in);
-        TestKeeperStorage::Node node;
+        NuKeeperStorage::Node node;
         readNode(node, in);
         storage.container[path] = node;
     }
diff --git a/src/Coordination/NuKeeperStorageSerializer.h b/src/Coordination/NuKeeperStorageSerializer.h
new file mode 100644
index 00000000000..e54c65a739d
--- /dev/null
+++ b/src/Coordination/NuKeeperStorageSerializer.h
@@ -0,0 +1,17 @@
+#pragma once
+#include <Coordination/NuKeeperStorage.h>
+#include <IO/WriteBuffer.h>
+#include <IO/ReadBuffer.h>
+
+namespace DB
+{
+
+class NuKeeperStorageSerializer
+{
+public:
+    static void serialize(const NuKeeperStorage & storage, WriteBuffer & out);
+
+    static void deserialize(NuKeeperStorage & storage, ReadBuffer & in);
+};
+
+}
diff --git a/src/Coordination/TestKeeperStorageSerializer.h b/src/Coordination/TestKeeperStorageSerializer.h
deleted file mode 100644
index a3909c24694..00000000000
--- a/src/Coordination/TestKeeperStorageSerializer.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-#include <Coordination/TestKeeperStorage.h>
-#include <IO/WriteBuffer.h>
-#include <IO/ReadBuffer.h>
-
-namespace DB
-{
-
-class TestKeeperStorageSerializer
-{
-public:
-    static void serialize(const TestKeeperStorage & storage, WriteBuffer & out);
-
-    static void deserialize(TestKeeperStorage & storage, ReadBuffer & in);
-};
-
-}
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index d69f2c18bd4..d2f4938dfd3 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -9,7 +9,7 @@
 
 #include <Coordination/InMemoryLogStore.h>
 #include <Coordination/InMemoryStateManager.h>
-#include <Coordination/TestKeeperStorageSerializer.h>
+#include <Coordination/NuKeeperStorageSerializer.h>
 #include <Coordination/SummingStateMachine.h>
 #include <Coordination/NuKeeperStateMachine.h>
 #include <Coordination/LoggerWrapper.h>
@@ -283,9 +283,9 @@ nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coord
     return buf.getBuffer();
 }
 
-DB::TestKeeperStorage::ResponsesForSessions getZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer, const Coordination::ZooKeeperRequestPtr & request)
+DB::NuKeeperStorage::ResponsesForSessions getZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer, const Coordination::ZooKeeperRequestPtr & request)
 {
-    DB::TestKeeperStorage::ResponsesForSessions results;
+    DB::NuKeeperStorage::ResponsesForSessions results;
     DB::ReadBufferFromNuraftBuffer buf(buffer);
     while (!buf.eof())
     {
@@ -303,28 +303,28 @@ DB::TestKeeperStorage::ResponsesForSessions getZooKeeperResponses(nuraft::ptr<nu
         Coordination::read(err, buf);
         auto response = request->makeResponse();
         response->readImpl(buf);
-        results.push_back(DB::TestKeeperStorage::ResponseForSession{session_id, response});
+        results.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
     }
     return results;
 }
 
 TEST(CoordinationTest, TestStorageSerialization)
 {
-    DB::TestKeeperStorage storage;
-    storage.container["/hello"] = DB::TestKeeperStorage::Node{.data="world"};
-    storage.container["/hello/somepath"] =  DB::TestKeeperStorage::Node{.data="somedata"};
+    DB::NuKeeperStorage storage;
+    storage.container["/hello"] = DB::NuKeeperStorage::Node{.data="world"};
+    storage.container["/hello/somepath"] =  DB::NuKeeperStorage::Node{.data="somedata"};
     storage.session_id_counter = 5;
     storage.zxid = 156;
     storage.ephemerals[3] = {"/hello", "/"};
     storage.ephemerals[1] = {"/hello/somepath"};
 
     DB::WriteBufferFromOwnString buffer;
-    DB::TestKeeperStorageSerializer serializer;
+    DB::NuKeeperStorageSerializer serializer;
     serializer.serialize(storage, buffer);
     std::string serialized = buffer.str();
     EXPECT_NE(serialized.size(), 0);
     DB::ReadBufferFromString read(serialized);
-    DB::TestKeeperStorage new_storage;
+    DB::NuKeeperStorage new_storage;
     serializer.deserialize(new_storage, read);
 
     EXPECT_EQ(new_storage.container.size(), 3);
diff --git a/src/Coordination/ya.make b/src/Coordination/ya.make
index de2be9df7ac..833ca27f2f4 100644
--- a/src/Coordination/ya.make
+++ b/src/Coordination/ya.make
@@ -14,10 +14,10 @@ SRCS(
     InMemoryStateManager.cpp
     NuKeeperServer.cpp
     NuKeeperStateMachine.cpp
+    NuKeeperStorage.cpp
+    NuKeeperStorageDispatcher.cpp
+    NuKeeperStorageSerializer.cpp
     SummingStateMachine.cpp
-    TestKeeperStorage.cpp
-    TestKeeperStorageDispatcher.cpp
-    TestKeeperStorageSerializer.cpp
     WriteBufferFromNuraftBuffer.cpp
 
 )
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index fe1b6a8a32e..983ac733849 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -12,7 +12,7 @@
 #include <Common/Stopwatch.h>
 #include <Common/formatReadable.h>
 #include <Common/thread_local_rng.h>
-#include <Coordination/TestKeeperStorageDispatcher.h>
+#include <Coordination/NuKeeperStorageDispatcher.h>
 #include <Compression/ICompressionCodec.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <Formats/FormatFactory.h>
@@ -305,8 +305,8 @@ struct ContextShared
     ConfigurationPtr zookeeper_config;                      /// Stores zookeeper configs
 
 #if USE_NURAFT
-    mutable std::mutex test_keeper_storage_dispatcher_mutex;
-    mutable std::shared_ptr<TestKeeperStorageDispatcher> test_keeper_storage_dispatcher;
+    mutable std::mutex nu_keeper_storage_dispatcher_mutex;
+    mutable std::shared_ptr<NuKeeperStorageDispatcher> nu_keeper_storage_dispatcher;
 #endif
     mutable std::mutex auxiliary_zookeepers_mutex;
     mutable std::map<String, zkutil::ZooKeeperPtr> auxiliary_zookeepers;    /// Map for auxiliary ZooKeeper clients.
@@ -1582,42 +1582,42 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const
 }
 
 
-void Context::initializeTestKeeperStorageDispatcher() const
+void Context::initializeNuKeeperStorageDispatcher() const
 {
 #if USE_NURAFT
-    std::lock_guard lock(shared->test_keeper_storage_dispatcher_mutex);
+    std::lock_guard lock(shared->nu_keeper_storage_dispatcher_mutex);
 
-    if (shared->test_keeper_storage_dispatcher)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize TestKeeper multiple times");
+    if (shared->nu_keeper_storage_dispatcher)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize NuKeeper multiple times");
 
     const auto & config = getConfigRef();
-    if (config.has("test_keeper_server"))
+    if (config.has("nu_keeper_server"))
     {
-        shared->test_keeper_storage_dispatcher = std::make_shared<TestKeeperStorageDispatcher>();
-        shared->test_keeper_storage_dispatcher->initialize(config);
+        shared->nu_keeper_storage_dispatcher = std::make_shared<NuKeeperStorageDispatcher>();
+        shared->nu_keeper_storage_dispatcher->initialize(config);
     }
 #endif
 }
 
 #if USE_NURAFT
-std::shared_ptr<TestKeeperStorageDispatcher> & Context::getTestKeeperStorageDispatcher() const
+std::shared_ptr<NuKeeperStorageDispatcher> & Context::getNuKeeperStorageDispatcher() const
 {
-    std::lock_guard lock(shared->test_keeper_storage_dispatcher_mutex);
-    if (!shared->test_keeper_storage_dispatcher)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "TestKeeper must be initialized before requests");
+    std::lock_guard lock(shared->nu_keeper_storage_dispatcher_mutex);
+    if (!shared->nu_keeper_storage_dispatcher)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "NuKeeper must be initialized before requests");
 
-    return shared->test_keeper_storage_dispatcher;
+    return shared->nu_keeper_storage_dispatcher;
 }
 #endif
 
-void Context::shutdownTestKeeperStorageDispatcher() const
+void Context::shutdownNuKeeperStorageDispatcher() const
 {
 #if USE_NURAFT
-    std::lock_guard lock(shared->test_keeper_storage_dispatcher_mutex);
-    if (shared->test_keeper_storage_dispatcher)
+    std::lock_guard lock(shared->nu_keeper_storage_dispatcher_mutex);
+    if (shared->nu_keeper_storage_dispatcher)
     {
-        shared->test_keeper_storage_dispatcher->shutdown();
-        shared->test_keeper_storage_dispatcher.reset();
+        shared->nu_keeper_storage_dispatcher->shutdown();
+        shared->nu_keeper_storage_dispatcher.reset();
     }
 #endif
 }
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 3c78973b21a..446c64f1bbd 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -106,7 +106,7 @@ using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
 using StoragePoliciesMap = std::map<String, StoragePolicyPtr>;
 class StoragePolicySelector;
 using StoragePolicySelectorPtr = std::shared_ptr<const StoragePolicySelector>;
-class TestKeeperStorageDispatcher;
+class NuKeeperStorageDispatcher;
 
 class IOutputFormat;
 using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
@@ -574,10 +574,10 @@ public:
     std::shared_ptr<zkutil::ZooKeeper> getAuxiliaryZooKeeper(const String & name) const;
 
 #if USE_NURAFT
-    std::shared_ptr<TestKeeperStorageDispatcher> & getTestKeeperStorageDispatcher() const;
+    std::shared_ptr<NuKeeperStorageDispatcher> & getNuKeeperStorageDispatcher() const;
 #endif
-    void initializeTestKeeperStorageDispatcher() const;
-    void shutdownTestKeeperStorageDispatcher() const;
+    void initializeNuKeeperStorageDispatcher() const;
+    void shutdownNuKeeperStorageDispatcher() const;
 
     /// Set auxiliary zookeepers configuration at server starting or configuration reloading.
     void reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr & config);
diff --git a/src/Server/TestKeeperTCPHandler.cpp b/src/Server/NuKeeperTCPHandler.cpp
similarity index 92%
rename from src/Server/TestKeeperTCPHandler.cpp
rename to src/Server/NuKeeperTCPHandler.cpp
index 17f2ec547c9..6deee5094ca 100644
--- a/src/Server/TestKeeperTCPHandler.cpp
+++ b/src/Server/NuKeeperTCPHandler.cpp
@@ -1,4 +1,4 @@
-#include <Server/TestKeeperTCPHandler.h>
+#include <Server/NuKeeperTCPHandler.h>
 
 #if USE_NURAFT
 
@@ -224,20 +224,20 @@ struct SocketInterruptablePollWrapper
 #endif
 };
 
-TestKeeperTCPHandler::TestKeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_)
+NuKeeperTCPHandler::NuKeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_)
     : Poco::Net::TCPServerConnection(socket_)
     , server(server_)
-    , log(&Poco::Logger::get("TestKeeperTCPHandler"))
+    , log(&Poco::Logger::get("NuKeeperTCPHandler"))
     , global_context(server.context())
-    , test_keeper_storage_dispatcher(global_context.getTestKeeperStorageDispatcher())
-    , operation_timeout(0, global_context.getConfigRef().getUInt("test_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
-    , session_timeout(0, global_context.getConfigRef().getUInt("test_keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
+    , nu_keeper_storage_dispatcher(global_context.getNuKeeperStorageDispatcher())
+    , operation_timeout(0, global_context.getConfigRef().getUInt("nu_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
+    , session_timeout(0, global_context.getConfigRef().getUInt("nu_keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
     , poll_wrapper(std::make_unique<SocketInterruptablePollWrapper>(socket_))
     , responses(std::make_unique<ThreadSafeResponseQueue>())
 {
 }
 
-void TestKeeperTCPHandler::sendHandshake(bool has_leader)
+void NuKeeperTCPHandler::sendHandshake(bool has_leader)
 {
     Coordination::write(Coordination::SERVER_HANDSHAKE_LENGTH, *out);
     if (has_leader)
@@ -252,12 +252,12 @@ void TestKeeperTCPHandler::sendHandshake(bool has_leader)
     out->next();
 }
 
-void TestKeeperTCPHandler::run()
+void NuKeeperTCPHandler::run()
 {
     runImpl();
 }
 
-void TestKeeperTCPHandler::receiveHandshake()
+void NuKeeperTCPHandler::receiveHandshake()
 {
     int32_t handshake_length;
     int32_t protocol_version;
@@ -294,7 +294,7 @@ void TestKeeperTCPHandler::receiveHandshake()
 }
 
 
-void TestKeeperTCPHandler::runImpl()
+void NuKeeperTCPHandler::runImpl()
 {
     setThreadName("TstKprHandler");
     ThreadStatus thread_status;
@@ -324,11 +324,11 @@ void TestKeeperTCPHandler::runImpl()
         return;
     }
 
-    if (test_keeper_storage_dispatcher->hasLeader())
+    if (nu_keeper_storage_dispatcher->hasLeader())
     {
         try
         {
-            session_id = test_keeper_storage_dispatcher->getSessionID();
+            session_id = nu_keeper_storage_dispatcher->getSessionID();
         }
         catch (const Exception & e)
         {
@@ -354,7 +354,7 @@ void TestKeeperTCPHandler::runImpl()
         UInt8 single_byte = 1;
         [[maybe_unused]] int result = write(response_fd, &single_byte, sizeof(single_byte));
     };
-    test_keeper_storage_dispatcher->registerSession(session_id, response_callback);
+    nu_keeper_storage_dispatcher->registerSession(session_id, response_callback);
 
     session_stopwatch.start();
     bool close_received = false;
@@ -428,18 +428,18 @@ void TestKeeperTCPHandler::runImpl()
     }
 }
 
-void TestKeeperTCPHandler::finish()
+void NuKeeperTCPHandler::finish()
 {
     Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
     request->xid = close_xid;
     /// Put close request (so storage will remove all info about session)
-    test_keeper_storage_dispatcher->putRequest(request, session_id);
+    nu_keeper_storage_dispatcher->putRequest(request, session_id);
     /// We don't need any callbacks because session can be already dead and
     /// nobody wait for response
-    test_keeper_storage_dispatcher->finishSession(session_id);
+    nu_keeper_storage_dispatcher->finishSession(session_id);
 }
 
-std::pair<Coordination::OpNum, Coordination::XID> TestKeeperTCPHandler::receiveRequest()
+std::pair<Coordination::OpNum, Coordination::XID> NuKeeperTCPHandler::receiveRequest()
 {
     int32_t length;
     Coordination::read(length, *in);
@@ -453,7 +453,7 @@ std::pair<Coordination::OpNum, Coordination::XID> TestKeeperTCPHandler::receiveR
     request->xid = xid;
     request->readImpl(*in);
 
-    if (!test_keeper_storage_dispatcher->putRequest(request, session_id))
+    if (!nu_keeper_storage_dispatcher->putRequest(request, session_id))
         throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Session {} already disconnected", session_id);
     return std::make_pair(opnum, xid);
 }
diff --git a/src/Server/TestKeeperTCPHandler.h b/src/Server/NuKeeperTCPHandler.h
similarity index 83%
rename from src/Server/TestKeeperTCPHandler.h
rename to src/Server/NuKeeperTCPHandler.h
index 09543b5a888..1874b8cd309 100644
--- a/src/Server/TestKeeperTCPHandler.h
+++ b/src/Server/NuKeeperTCPHandler.h
@@ -13,7 +13,7 @@
 #include <Interpreters/Context.h>
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
 #include <Common/ZooKeeper/ZooKeeperConstants.h>
-#include <Coordination/TestKeeperStorageDispatcher.h>
+#include <Coordination/NuKeeperStorageDispatcher.h>
 #include <IO/WriteBufferFromPocoSocket.h>
 #include <IO/ReadBufferFromPocoSocket.h>
 #include <unordered_map>
@@ -26,16 +26,16 @@ using SocketInterruptablePollWrapperPtr = std::unique_ptr<SocketInterruptablePol
 class ThreadSafeResponseQueue;
 using ThreadSafeResponseQueuePtr = std::unique_ptr<ThreadSafeResponseQueue>;
 
-class TestKeeperTCPHandler : public Poco::Net::TCPServerConnection
+class NuKeeperTCPHandler : public Poco::Net::TCPServerConnection
 {
 public:
-    TestKeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_);
+    NuKeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_);
     void run() override;
 private:
     IServer & server;
     Poco::Logger * log;
     Context global_context;
-    std::shared_ptr<TestKeeperStorageDispatcher> test_keeper_storage_dispatcher;
+    std::shared_ptr<NuKeeperStorageDispatcher> nu_keeper_storage_dispatcher;
     Poco::Timespan operation_timeout;
     Poco::Timespan session_timeout;
     int64_t session_id;
diff --git a/src/Server/TestKeeperTCPHandlerFactory.h b/src/Server/NuKeeperTCPHandlerFactory.h
similarity index 68%
rename from src/Server/TestKeeperTCPHandlerFactory.h
rename to src/Server/NuKeeperTCPHandlerFactory.h
index a5bf6be8c8a..0fd86ebc21f 100644
--- a/src/Server/TestKeeperTCPHandlerFactory.h
+++ b/src/Server/NuKeeperTCPHandlerFactory.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Server/TestKeeperTCPHandler.h>
+#include <Server/NuKeeperTCPHandler.h>
 #include <Poco/Net/TCPServerConnectionFactory.h>
 #include <Poco/Net/NetException.h>
 #include <common/logger_useful.h>
@@ -9,7 +9,7 @@
 namespace DB
 {
 
-class TestKeeperTCPHandlerFactory : public Poco::Net::TCPServerConnectionFactory
+class NuKeeperTCPHandlerFactory : public Poco::Net::TCPServerConnectionFactory
 {
 private:
     IServer & server;
@@ -21,9 +21,9 @@ private:
         void run() override {}
     };
 public:
-    TestKeeperTCPHandlerFactory(IServer & server_)
+    NuKeeperTCPHandlerFactory(IServer & server_)
         : server(server_)
-        , log(&Poco::Logger::get("TestKeeperTCPHandlerFactory"))
+        , log(&Poco::Logger::get("NuKeeperTCPHandlerFactory"))
     {
     }
 
@@ -31,8 +31,8 @@ public:
     {
         try
         {
-            LOG_TRACE(log, "Test keeper request. Address: {}", socket.peerAddress().toString());
-            return new TestKeeperTCPHandler(server, socket);
+            LOG_TRACE(log, "NuKeeper request. Address: {}", socket.peerAddress().toString());
+            return new NuKeeperTCPHandler(server, socket);
         }
         catch (const Poco::Net::NetException &)
         {
diff --git a/src/Server/ya.make b/src/Server/ya.make
index 1e44577aea9..a0269e9ac84 100644
--- a/src/Server/ya.make
+++ b/src/Server/ya.make
@@ -17,6 +17,7 @@ SRCS(
     MySQLHandler.cpp
     MySQLHandlerFactory.cpp
     NotFoundHandler.cpp
+    NuKeeperTCPHandler.cpp
     PostgreSQLHandler.cpp
     PostgreSQLHandlerFactory.cpp
     PrometheusMetricsWriter.cpp
@@ -25,7 +26,6 @@ SRCS(
     ReplicasStatusHandler.cpp
     StaticRequestHandler.cpp
     TCPHandler.cpp
-    TestKeeperTCPHandler.cpp
     WebUIRequestHandler.cpp
 
 )
diff --git a/tests/config/config.d/test_keeper_port.xml b/tests/config/config.d/nu_keeper_port.xml
similarity index 88%
rename from tests/config/config.d/test_keeper_port.xml
rename to tests/config/config.d/nu_keeper_port.xml
index fff60d749f6..afd22955a33 100644
--- a/tests/config/config.d/test_keeper_port.xml
+++ b/tests/config/config.d/nu_keeper_port.xml
@@ -1,5 +1,5 @@
 <yandex>
-    <test_keeper_server>
+    <nu_keeper_server>
         <tcp_port>9181</tcp_port>
         <operation_timeout_ms>10000</operation_timeout_ms>
         <session_timeout_ms>30000</session_timeout_ms>
@@ -11,5 +11,5 @@
                 <port>44444</port>
             </server>
         </raft_configuration>
-    </test_keeper_server>
+    </nu_keeper_server>
 </yandex>
diff --git a/tests/config/install.sh b/tests/config/install.sh
index 9965e1fb1ad..6f620ef6404 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -29,7 +29,7 @@ ln -sf $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/max_concurrent_queries.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/test_cluster_with_incorrect_pw.xml $DEST_SERVER_PATH/config.d/
-ln -sf $SRC_PATH/config.d/test_keeper_port.xml $DEST_SERVER_PATH/config.d/
+ln -sf $SRC_PATH/config.d/nu_keeper_port.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/logging_no_rotate.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/tcp_with_proxy.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/top_level_domains_lists.xml $DEST_SERVER_PATH/config.d/
diff --git a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
index fff60d749f6..afd22955a33 100644
--- a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
+++ b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
@@ -1,5 +1,5 @@
 <yandex>
-    <test_keeper_server>
+    <nu_keeper_server>
         <tcp_port>9181</tcp_port>
         <operation_timeout_ms>10000</operation_timeout_ms>
         <session_timeout_ms>30000</session_timeout_ms>
@@ -11,5 +11,5 @@
                 <port>44444</port>
             </server>
         </raft_configuration>
-    </test_keeper_server>
+    </nu_keeper_server>
 </yandex>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
index 81f68f50c7c..fde0d511886 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
@@ -1,5 +1,5 @@
 <yandex>
-    <test_keeper_server>
+    <nu_keeper_server>
         <tcp_port>9181</tcp_port>
         <operation_timeout_ms>10000</operation_timeout_ms>
         <session_timeout_ms>30000</session_timeout_ms>
@@ -27,5 +27,5 @@
                 <priority>1</priority>
             </server>
         </raft_configuration>
-    </test_keeper_server>
+    </nu_keeper_server>
 </yandex>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
index 73340973367..c6f4e7b5a22 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
@@ -1,5 +1,5 @@
 <yandex>
-    <test_keeper_server>
+    <nu_keeper_server>
         <tcp_port>9181</tcp_port>
         <operation_timeout_ms>10000</operation_timeout_ms>
         <session_timeout_ms>30000</session_timeout_ms>
@@ -27,5 +27,5 @@
                 <priority>1</priority>
             </server>
         </raft_configuration>
-    </test_keeper_server>
+    </nu_keeper_server>
 </yandex>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
index fbc51489d11..d1e8830c480 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
@@ -1,5 +1,5 @@
 <yandex>
-    <test_keeper_server>
+    <nu_keeper_server>
         <tcp_port>9181</tcp_port>
         <operation_timeout_ms>10000</operation_timeout_ms>
         <session_timeout_ms>30000</session_timeout_ms>
@@ -27,5 +27,5 @@
                 <priority>1</priority>
             </server>
         </raft_configuration>
-    </test_keeper_server>
+    </nu_keeper_server>
 </yandex>

From a8d30bedea4b2ccc00333c4d6621ab431985ae8d Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 1 Feb 2021 17:16:37 +0300
Subject: [PATCH 0487/1238] Missed config file

---
 programs/server/config.d/nu_keeper_port.xml | 1 +
 1 file changed, 1 insertion(+)
 create mode 120000 programs/server/config.d/nu_keeper_port.xml

diff --git a/programs/server/config.d/nu_keeper_port.xml b/programs/server/config.d/nu_keeper_port.xml
new file mode 120000
index 00000000000..07f71c63435
--- /dev/null
+++ b/programs/server/config.d/nu_keeper_port.xml
@@ -0,0 +1 @@
+tests/config/config.d/nu_keeper_port.xml
\ No newline at end of file

From c4a3acd4f82d441e8f58367149167543def26b3d Mon Sep 17 00:00:00 2001
From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
Date: Mon, 1 Feb 2021 17:53:38 +0300
Subject: [PATCH 0488/1238] 2021 footer

---
 website/templates/footer.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/templates/footer.html b/website/templates/footer.html
index 765ea63d528..1eaf519b58b 100644
--- a/website/templates/footer.html
+++ b/website/templates/footer.html
@@ -8,7 +8,7 @@
         {{ _('ClickHouse source code is published under the Apache 2.0 License.') }}</a> {{ _('Software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.') }}
             </div>
             <div class="col-lg-4 text-right">
-                &copy; 2016–2020 <a href="https://yandex.com/company/" rel="external nofollow noreferrer" target="_blank" class="text-muted">{{ _('Yandex LLC') }}</a>
+                &copy; 2016–2021 <a href="https://yandex.com/company/" rel="external nofollow noreferrer" target="_blank" class="text-muted">{{ _('Yandex LLC') }}</a>
             </div>
         </div>
     </div>

From e6fd6abe6f78ee017236b8fa059635ad0fbfcb37 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 1 Feb 2021 18:07:34 +0300
Subject: [PATCH 0489/1238] Fix test keeper integration tests

---
 tests/integration/test_testkeeper_back_to_back/test.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_testkeeper_back_to_back/test.py b/tests/integration/test_testkeeper_back_to_back/test.py
index f74d6a4c646..d3a9b742cdd 100644
--- a/tests/integration/test_testkeeper_back_to_back/test.py
+++ b/tests/integration/test_testkeeper_back_to_back/test.py
@@ -8,7 +8,7 @@ from multiprocessing.dummy import Pool
 
 cluster = ClickHouseCluster(__file__)
 node = cluster.add_instance('node', main_configs=['configs/enable_test_keeper.xml', 'configs/logs_conf.xml'], with_zookeeper=True)
-from kazoo.client import KazooClient
+from kazoo.client import KazooClient, KazooState
 
 _genuine_zk_instance = None
 _fake_zk_instance = None
@@ -26,6 +26,13 @@ def get_fake_zk():
     if not _fake_zk_instance:
         print("node", cluster.get_instance_ip("node"))
         _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip("node") + ":9181")
+        def reset_last_zxid_listener(state):
+            print("Fake zk callback called for state", state)
+            global _fake_zk_instance
+            # reset last_zxid -- fake server doesn't support it
+            _fake_zk_instance.last_zxid = 0
+
+        _fake_zk_instance.add_listener(reset_last_zxid_listener)
         _fake_zk_instance.start()
     return _fake_zk_instance
 

From f6a8c90be269c9cdb5ed0cbb1c46838b2169dddd Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 1 Feb 2021 18:12:00 +0300
Subject: [PATCH 0490/1238] Fix config path

---
 programs/server/config.d/nu_keeper_port.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/server/config.d/nu_keeper_port.xml b/programs/server/config.d/nu_keeper_port.xml
index 07f71c63435..8de0a309ff0 120000
--- a/programs/server/config.d/nu_keeper_port.xml
+++ b/programs/server/config.d/nu_keeper_port.xml
@@ -1 +1 @@
-tests/config/config.d/nu_keeper_port.xml
\ No newline at end of file
+../../../tests/config/config.d/nu_keeper_port.xml
\ No newline at end of file

From 349d783089d55b9dd1bdeb8bff63fd07a0b0be42 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 1 Feb 2021 20:12:12 +0300
Subject: [PATCH 0491/1238] Improve performance of aggregate functions

---
 .../AggregateFunctionAggThrow.cpp             |  4 +-
 .../AggregateFunctionArgMinMax.h              | 10 ++--
 .../AggregateFunctionArray.h                  | 14 +++---
 src/AggregateFunctions/AggregateFunctionAvg.h | 10 ++--
 .../AggregateFunctionAvgWeighted.h            |  2 +-
 .../AggregateFunctionBitwise.h                | 10 ++--
 .../AggregateFunctionBoundingRatio.h          | 10 ++--
 ...egateFunctionCategoricalInformationValue.h |  2 +-
 .../AggregateFunctionCount.h                  | 22 ++++-----
 .../AggregateFunctionDistinct.h               | 18 ++++----
 .../AggregateFunctionEntropy.h                | 10 ++--
 .../AggregateFunctionForEach.h                | 14 +++---
 .../AggregateFunctionGroupArray.h             | 46 +++++++++----------
 .../AggregateFunctionGroupArrayInsertAt.h     | 10 ++--
 .../AggregateFunctionGroupArrayMoving.h       | 10 ++--
 .../AggregateFunctionGroupBitmap.h            | 20 ++++----
 .../AggregateFunctionGroupUniqArray.h         | 20 ++++----
 .../AggregateFunctionHistogram.h              | 10 ++--
 .../AggregateFunctionIf.cpp                   |  4 +-
 src/AggregateFunctions/AggregateFunctionIf.h  | 14 +++---
 .../AggregateFunctionMLMethod.h               | 12 ++---
 .../AggregateFunctionMannWhitney.h            | 10 ++--
 .../AggregateFunctionMaxIntersections.h       | 10 ++--
 .../AggregateFunctionMerge.h                  | 14 +++---
 .../AggregateFunctionMinMaxAny.h              | 10 ++--
 .../AggregateFunctionNull.h                   | 26 +++++------
 .../AggregateFunctionOrFill.h                 |  4 +-
 .../AggregateFunctionQuantile.h               | 10 ++--
 .../AggregateFunctionRankCorrelation.h        | 10 ++--
 .../AggregateFunctionResample.h               |  4 +-
 .../AggregateFunctionRetention.h              | 10 ++--
 .../AggregateFunctionSequenceMatch.h          | 14 +++---
 .../AggregateFunctionSimpleState.h            | 14 +++---
 .../AggregateFunctionState.h                  | 14 +++---
 .../AggregateFunctionStatistics.h             | 20 ++++----
 .../AggregateFunctionStatisticsSimple.h       | 10 ++--
 src/AggregateFunctions/AggregateFunctionSum.h | 10 ++--
 .../AggregateFunctionSumMap.h                 | 10 ++--
 .../AggregateFunctionTTest.h                  | 10 ++--
 .../AggregateFunctionTopK.h                   | 20 ++++----
 .../AggregateFunctionUniq.h                   | 20 ++++----
 .../AggregateFunctionUniqCombined.h           | 20 ++++----
 .../AggregateFunctionUniqUpTo.h               | 20 ++++----
 .../AggregateFunctionWindowFunnel.h           | 10 ++--
 src/AggregateFunctions/IAggregateFunction.h   | 22 ++++-----
 45 files changed, 297 insertions(+), 297 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionAggThrow.cpp b/src/AggregateFunctions/AggregateFunctionAggThrow.cpp
index fada039e20a..c699dd4f217 100644
--- a/src/AggregateFunctions/AggregateFunctionAggThrow.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAggThrow.cpp
@@ -60,7 +60,7 @@ public:
         return std::make_shared<DataTypeUInt8>();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         if (std::uniform_real_distribution<>(0.0, 1.0)(thread_local_rng) <= throw_probability)
             throw Exception("Aggregate function " + getName() + " has thrown exception successfully", ErrorCodes::AGGREGATE_FUNCTION_THROW);
@@ -68,7 +68,7 @@ public:
         new (place) Data;
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         data(place).~Data();
     }
diff --git a/src/AggregateFunctions/AggregateFunctionArgMinMax.h b/src/AggregateFunctions/AggregateFunctionArgMinMax.h
index 67f21db0240..b559c1c8a7e 100644
--- a/src/AggregateFunctions/AggregateFunctionArgMinMax.h
+++ b/src/AggregateFunctions/AggregateFunctionArgMinMax.h
@@ -70,25 +70,25 @@ public:
         return type_res;
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         if (this->data(place).value.changeIfBetter(*columns[1], row_num, arena))
             this->data(place).result.change(*columns[0], row_num, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         if (this->data(place).value.changeIfBetter(this->data(rhs).value, arena))
             this->data(place).result.change(this->data(rhs).result, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).result.write(buf, *type_res);
         this->data(place).value.write(buf, *type_val);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         this->data(place).result.read(buf, *type_res, arena);
         this->data(place).value.read(buf, *type_val, arena);
@@ -96,7 +96,7 @@ public:
 
     bool allocatesMemoryInArena() const override { return Data::allocatesMemoryInArena(); }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         if (tuple_argument)
         {
diff --git a/src/AggregateFunctions/AggregateFunctionArray.h b/src/AggregateFunctions/AggregateFunctionArray.h
index e72fd3ab6ff..ef16fcde87b 100644
--- a/src/AggregateFunctions/AggregateFunctionArray.h
+++ b/src/AggregateFunctions/AggregateFunctionArray.h
@@ -47,12 +47,12 @@ public:
         return nested_func->getReturnType();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         nested_func->create(place);
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         nested_func->destroy(place);
     }
@@ -77,7 +77,7 @@ public:
         return nested_func->isState();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         const IColumn * nested[num_arguments];
 
@@ -104,22 +104,22 @@ public:
             nested_func->add(place, nested, i, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         nested_func->merge(place, rhs, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         nested_func->serialize(place, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         nested_func->deserialize(place, buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         nested_func->insertResultInto(place, to, arena);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h
index e2f912cc320..7bf742294b4 100644
--- a/src/AggregateFunctions/AggregateFunctionAvg.h
+++ b/src/AggregateFunctions/AggregateFunctionAvg.h
@@ -98,13 +98,13 @@ public:
 
     DataTypePtr getReturnType() const final { return std::make_shared<DataTypeNumber<Float64>>(); }
 
-    void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).numerator += this->data(rhs).numerator;
         this->data(place).denominator += this->data(rhs).denominator;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         writeBinary(this->data(place).numerator, buf);
 
@@ -114,7 +114,7 @@ public:
             writeBinary(this->data(place).denominator, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         readBinary(this->data(place).numerator, buf);
 
@@ -124,7 +124,7 @@ public:
             readBinary(this->data(place).denominator, buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         if constexpr (IsDecimalNumber<Numerator> || IsDecimalNumber<Denominator>)
             assert_cast<ColumnVector<Float64> &>(to).getData().push_back(
@@ -148,7 +148,7 @@ class AggregateFunctionAvg final : public AggregateFunctionAvgBase<AvgFieldType<
 public:
     using AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>::AggregateFunctionAvgBase;
 
-    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const final
+    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final
     {
         this->data(place).numerator += static_cast<const DecimalOrVectorCol<T> &>(*columns[0]).getData()[row_num];
         ++this->data(place).denominator;
diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h
index ab9ce9c2a61..f8b452fc444 100644
--- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h
+++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h
@@ -28,7 +28,7 @@ public:
 
     using ValueT = MaxFieldType<Value, Weight>;
 
-    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         const auto& weights = static_cast<const DecimalOrVectorCol<Weight> &>(*columns[1]);
 
diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.h b/src/AggregateFunctions/AggregateFunctionBitwise.h
index 6d9eb3c36e1..3ba8e045069 100644
--- a/src/AggregateFunctions/AggregateFunctionBitwise.h
+++ b/src/AggregateFunctions/AggregateFunctionBitwise.h
@@ -54,27 +54,27 @@ public:
         return std::make_shared<DataTypeNumber<T>>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).update(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).update(this->data(rhs).value);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         writeBinary(this->data(place).value, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         readBinary(this->data(place).value, buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).value);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h
index 7c254668f8d..32ae22fd573 100644
--- a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h
+++ b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h
@@ -127,7 +127,7 @@ public:
         return std::make_shared<DataTypeFloat64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
     {
         /// NOTE Slightly inefficient.
         const auto x = columns[0]->getFloat64(row_num);
@@ -135,22 +135,22 @@ public:
         data(place).add(x, y);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         data(place).merge(data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnFloat64 &>(to).getData().push_back(getBoundingRatio(data(place)));
     }
diff --git a/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h b/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h
index aa205a71c97..ba8acb208ea 100644
--- a/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h
+++ b/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h
@@ -33,7 +33,7 @@ public:
         return "categoricalInformationValue";
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         memset(place, 0, sizeOfData());
     }
diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h
index 63d3d34a0fd..1b3a0acb528 100644
--- a/src/AggregateFunctions/AggregateFunctionCount.h
+++ b/src/AggregateFunctions/AggregateFunctionCount.h
@@ -38,7 +38,7 @@ public:
         return std::make_shared<DataTypeUInt64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn **, size_t, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn **, size_t, Arena *) const override
     {
         ++data(place).count;
     }
@@ -76,28 +76,28 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         data(place).count += data(rhs).count;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         writeVarUInt(data(place).count, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         readVarUInt(data(place).count, buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(data(place).count);
     }
 
     /// Reset the state to specified value. This function is not the part of common interface.
-    void set(AggregateDataPtr place, UInt64 new_count)
+    void set(AggregateDataPtr __restrict place, UInt64 new_count)
     {
         data(place).count = new_count;
     }
@@ -126,27 +126,27 @@ public:
         return std::make_shared<DataTypeUInt64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         data(place).count += !assert_cast<const ColumnNullable &>(*columns[0]).isNullAt(row_num);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         data(place).count += data(rhs).count;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         writeVarUInt(data(place).count, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         readVarUInt(data(place).count, buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(data(place).count);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h
index f9c8f2651dc..b481e2a28e7 100644
--- a/src/AggregateFunctions/AggregateFunctionDistinct.h
+++ b/src/AggregateFunctions/AggregateFunctionDistinct.h
@@ -156,12 +156,12 @@ private:
     AggregateFunctionPtr nested_func;
     size_t arguments_num;
 
-    AggregateDataPtr getNestedPlace(AggregateDataPtr place) const noexcept
+    AggregateDataPtr getNestedPlace(AggregateDataPtr __restrict place) const noexcept
     {
         return place + prefix_size;
     }
 
-    ConstAggregateDataPtr getNestedPlace(ConstAggregateDataPtr place) const noexcept
+    ConstAggregateDataPtr getNestedPlace(ConstAggregateDataPtr __restrict place) const noexcept
     {
         return place + prefix_size;
     }
@@ -172,27 +172,27 @@ public:
     , nested_func(nested_func_)
     , arguments_num(arguments.size()) {}
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         this->data(place).add(columns, arguments_num, row_num, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         this->data(place).merge(this->data(rhs), arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         this->data(place).deserialize(buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         auto arguments = this->data(place).getArguments(this->argument_types);
         ColumnRawPtrs arguments_raw(arguments.size());
@@ -209,13 +209,13 @@ public:
         return prefix_size + nested_func->sizeOfData();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         new (place) Data;
         nested_func->create(getNestedPlace(place));
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         this->data(place).~Data();
         nested_func->destroy(getNestedPlace(place));
diff --git a/src/AggregateFunctions/AggregateFunctionEntropy.h b/src/AggregateFunctions/AggregateFunctionEntropy.h
index 656aca43f60..9bb1bc039c5 100644
--- a/src/AggregateFunctions/AggregateFunctionEntropy.h
+++ b/src/AggregateFunctions/AggregateFunctionEntropy.h
@@ -103,7 +103,7 @@ public:
         return std::make_shared<DataTypeNumber<Float64>>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         if constexpr (!std::is_same_v<UInt128, Value>)
         {
@@ -117,22 +117,22 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto & column = assert_cast<ColumnVector<Float64> &>(to);
         column.getData().push_back(this->data(place).get());
diff --git a/src/AggregateFunctions/AggregateFunctionForEach.h b/src/AggregateFunctions/AggregateFunctionForEach.h
index c3b1b09ab3c..8d99e2e8af3 100644
--- a/src/AggregateFunctions/AggregateFunctionForEach.h
+++ b/src/AggregateFunctions/AggregateFunctionForEach.h
@@ -50,7 +50,7 @@ private:
     size_t nested_size_of_data = 0;
     size_t num_arguments;
 
-    AggregateFunctionForEachData & ensureAggregateData(AggregateDataPtr place, size_t new_size, Arena & arena) const
+    AggregateFunctionForEachData & ensureAggregateData(AggregateDataPtr __restrict place, size_t new_size, Arena & arena) const
     {
         AggregateFunctionForEachData & state = data(place);
 
@@ -128,7 +128,7 @@ public:
         return std::make_shared<DataTypeArray>(nested_func->getReturnType());
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         AggregateFunctionForEachData & state = data(place);
 
@@ -145,7 +145,7 @@ public:
         return nested_func->hasTrivialDestructor();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         const IColumn * nested[num_arguments];
 
@@ -178,7 +178,7 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         const AggregateFunctionForEachData & rhs_state = data(rhs);
         AggregateFunctionForEachData & state = ensureAggregateData(place, rhs_state.dynamic_array_size, *arena);
@@ -195,7 +195,7 @@ public:
         }
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         const AggregateFunctionForEachData & state = data(place);
         writeBinary(state.dynamic_array_size, buf);
@@ -208,7 +208,7 @@ public:
         }
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         AggregateFunctionForEachData & state = data(place);
 
@@ -225,7 +225,7 @@ public:
         }
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         AggregateFunctionForEachData & state = data(place);
 
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h
index 27a8cf0b1ee..921274f7d59 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h
@@ -142,14 +142,14 @@ public:
         }
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         [[maybe_unused]] auto a = new (place) Data;
         if constexpr (Trait::sampler == Sampler::RNG)
             a->rng.seed(seed);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         if constexpr (Trait::sampler == Sampler::NONE)
         {
@@ -176,7 +176,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         if constexpr (Trait::sampler == Sampler::NONE)
         {
@@ -235,7 +235,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         const auto & value = this->data(place).value;
         size_t size = value.size();
@@ -254,7 +254,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         size_t size = 0;
         readVarUInt(size, buf);
@@ -283,7 +283,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         const auto & value = this->data(place).value;
         size_t size = value.size();
@@ -416,8 +416,8 @@ class GroupArrayGeneralImpl final
 {
     static constexpr bool limit_num_elems = Trait::has_limit;
     using Data = GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>;
-    static Data & data(AggregateDataPtr place) { return *reinterpret_cast<Data *>(place); }
-    static const Data & data(ConstAggregateDataPtr place) { return *reinterpret_cast<const Data *>(place); }
+    static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
+    static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
 
     DataTypePtr & data_type;
     UInt64 max_elems;
@@ -450,14 +450,14 @@ public:
         }
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         [[maybe_unused]] auto a = new (place) Data;
         if constexpr (Trait::sampler == Sampler::RNG)
             a->rng.seed(seed);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         if constexpr (Trait::sampler == Sampler::NONE)
         {
@@ -485,7 +485,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         if constexpr (Trait::sampler == Sampler::NONE)
             mergeNoSampler(place, rhs, arena);
@@ -495,7 +495,7 @@ public:
         // else if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void ALWAYS_INLINE mergeNoSampler(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const
+    void ALWAYS_INLINE mergeNoSampler(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const
     {
         if (data(rhs).value.empty()) /// rhs state is empty
             return;
@@ -517,7 +517,7 @@ public:
             a.push_back(b[i]->clone(arena), arena);
     }
 
-    void ALWAYS_INLINE mergeWithRNGSampler(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const
+    void ALWAYS_INLINE mergeWithRNGSampler(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const
     {
         if (data(rhs).value.empty()) /// rhs state is empty
             return;
@@ -553,7 +553,7 @@ public:
         }
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         writeVarUInt(data(place).value.size(), buf);
 
@@ -573,7 +573,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         UInt64 elems;
         readVarUInt(elems, buf);
@@ -606,7 +606,7 @@ public:
         // if constexpr (Trait::sampler == Sampler::DETERMINATOR)
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto & column_array = assert_cast<ColumnArray &>(to);
 
@@ -692,8 +692,8 @@ class GroupArrayGeneralListImpl final
 {
     static constexpr bool limit_num_elems = Trait::has_limit;
     using Data = GroupArrayGeneralListData<Node>;
-    static Data & data(AggregateDataPtr place) { return *reinterpret_cast<Data *>(place); }
-    static const Data & data(ConstAggregateDataPtr place) { return *reinterpret_cast<const Data *>(place); }
+    static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
+    static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
 
     DataTypePtr & data_type;
     UInt64 max_elems;
@@ -710,7 +710,7 @@ public:
 
     DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(data_type); }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         if (limit_num_elems && data(place).elems >= max_elems)
             return;
@@ -731,7 +731,7 @@ public:
         ++data(place).elems;
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         /// It is sadly, but rhs's Arena could be destroyed
 
@@ -780,7 +780,7 @@ public:
         data(place).elems = new_elems;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         writeVarUInt(data(place).elems, buf);
 
@@ -792,7 +792,7 @@ public:
         }
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         UInt64 elems;
         readVarUInt(elems, buf);
@@ -821,7 +821,7 @@ public:
         data(place).last = prev;
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto & column_array = assert_cast<ColumnArray &>(to);
 
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
index a4800dd715e..42005659a36 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
@@ -102,7 +102,7 @@ public:
         return std::make_shared<DataTypeArray>(type);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         /// TODO Do positions need to be 1-based for this function?
         size_t position = columns[1]->getUInt(row_num);
@@ -126,7 +126,7 @@ public:
         columns[0]->get(row_num, arr[position]);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         Array & arr_lhs = data(place).value;
         const Array & arr_rhs = data(rhs).value;
@@ -139,7 +139,7 @@ public:
                 arr_lhs[i] = arr_rhs[i];
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         const Array & arr = data(place).value;
         size_t size = arr.size();
@@ -159,7 +159,7 @@ public:
         }
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         size_t size = 0;
         readVarUInt(size, buf);
@@ -179,7 +179,7 @@ public:
         }
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         ColumnArray & to_array = assert_cast<ColumnArray &>(to);
         IColumn & to_data = to_array.getData();
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
index 3281738e66d..eecf97e1e8c 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
@@ -114,13 +114,13 @@ public:
             return std::make_shared<DataTypeArray>(std::make_shared<DataTypeResult>());
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
         this->data(place).add(static_cast<ResultT>(value), arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         auto & cur_elems = this->data(place);
         auto & rhs_elems = this->data(rhs);
@@ -138,7 +138,7 @@ public:
         cur_elems.sum += rhs_elems.sum;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         const auto & value = this->data(place).value;
         size_t size = value.size();
@@ -146,7 +146,7 @@ public:
         buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         size_t size = 0;
         readVarUInt(size, buf);
@@ -163,7 +163,7 @@ public:
         }
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         const auto & data = this->data(place);
         size_t size = data.value.size();
diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h
index 315132cf05c..4628410286d 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h
@@ -22,21 +22,21 @@ public:
 
     DataTypePtr getReturnType() const override { return std::make_shared<DataTypeNumber<T>>(); }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).rbs.add(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).rbs.merge(this->data(rhs).rbs);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { this->data(place).rbs.write(buf); }
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { this->data(place).rbs.write(buf); }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).rbs.read(buf); }
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override { this->data(place).rbs.read(buf); }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).rbs.size());
     }
@@ -56,7 +56,7 @@ public:
 
     DataTypePtr getReturnType() const override { return std::make_shared<DataTypeNumber<T>>(); }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         Data & data_lhs = this->data(place);
         const Data & data_rhs = this->data(assert_cast<const ColumnAggregateFunction &>(*columns[0]).getData()[row_num]);
@@ -71,7 +71,7 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         Data & data_lhs = this->data(place);
         const Data & data_rhs = this->data(rhs);
@@ -90,11 +90,11 @@ public:
         }
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { this->data(place).rbs.write(buf); }
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { this->data(place).rbs.write(buf); }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).rbs.read(buf); }
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override { this->data(place).rbs.read(buf); }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).rbs.size());
     }
diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
index 1dc7dcde9c3..435efdd2373 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
@@ -59,14 +59,14 @@ public:
         return std::make_shared<DataTypeArray>(this->argument_types[0]);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         if (limit_num_elems && this->data(place).value.size() >= max_elems)
             return;
         this->data(place).value.insert(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         if (!limit_num_elems)
             this->data(place).value.merge(this->data(rhs).value);
@@ -84,7 +84,7 @@ public:
         }
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         auto & set = this->data(place).value;
         size_t size = set.size();
@@ -93,12 +93,12 @@ public:
             writeIntBinary(elem, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).value.read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
         ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
@@ -166,7 +166,7 @@ public:
         return true;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         auto & set = this->data(place).value;
         writeVarUInt(set.size(), buf);
@@ -177,7 +177,7 @@ public:
         }
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         auto & set = this->data(place).value;
         size_t size;
@@ -188,7 +188,7 @@ public:
             set.insert(readStringBinaryInto(*arena, buf));
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         auto & set = this->data(place).value;
         if (limit_num_elems && set.size() >= max_elems)
@@ -200,7 +200,7 @@ public:
         set.emplace(key_holder, it, inserted);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         auto & cur_set = this->data(place).value;
         auto & rhs_set = this->data(rhs).value;
@@ -218,7 +218,7 @@ public:
         }
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
         ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h
index bc9c95ecf2a..76aa96ba663 100644
--- a/src/AggregateFunctions/AggregateFunctionHistogram.h
+++ b/src/AggregateFunctions/AggregateFunctionHistogram.h
@@ -332,28 +332,28 @@ public:
         return std::make_shared<DataTypeArray>(tuple);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         auto val = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
         this->data(place).add(static_cast<Data::Mean>(val), 1, max_bins);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs), max_bins);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).read(buf, max_bins);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto & data = this->data(place);
 
diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp
index 5e7e3844956..45fead18d29 100644
--- a/src/AggregateFunctions/AggregateFunctionIf.cpp
+++ b/src/AggregateFunctions/AggregateFunctionIf.cpp
@@ -97,7 +97,7 @@ public:
         return assert_cast<const ColumnUInt8 &>(*filter_column).getData()[row_num];
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
         const IColumn * nested_column = &column->getNestedColumn();
@@ -140,7 +140,7 @@ public:
         return assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num];
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         /// This container stores the columns we really pass to the nested function.
         const IColumn * nested_columns[number_of_arguments];
diff --git a/src/AggregateFunctions/AggregateFunctionIf.h b/src/AggregateFunctions/AggregateFunctionIf.h
index e3a23a432c7..8144ae355ba 100644
--- a/src/AggregateFunctions/AggregateFunctionIf.h
+++ b/src/AggregateFunctions/AggregateFunctionIf.h
@@ -49,12 +49,12 @@ public:
         return nested_func->getReturnType();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         nested_func->create(place);
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         nested_func->destroy(place);
     }
@@ -74,7 +74,7 @@ public:
         return nested_func->alignOfData();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         if (assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num])
             nested_func->add(place, columns, row_num, arena);
@@ -108,22 +108,22 @@ public:
         nested_func->addBatchSinglePlaceNotNull(batch_size, place, columns, null_map, arena, num_arguments - 1);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         nested_func->merge(place, rhs, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         nested_func->serialize(place, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         nested_func->deserialize(place, buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         nested_func->insertResultInto(place, to, arena);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionMLMethod.h b/src/AggregateFunctions/AggregateFunctionMLMethod.h
index b6912405fef..0c88f9d877d 100644
--- a/src/AggregateFunctions/AggregateFunctionMLMethod.h
+++ b/src/AggregateFunctions/AggregateFunctionMLMethod.h
@@ -329,7 +329,7 @@ public:
         return std::make_shared<DataTypeNumber<Float64>>();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         std::shared_ptr<IWeightsUpdater> new_weights_updater;
         if (weights_updater_name == "SGD")
@@ -346,16 +346,16 @@ public:
         new (place) Data(learning_rate, l2_reg_coef, param_num, batch_size, gradient_computer, new_weights_updater);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).add(columns, row_num);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).merge(this->data(rhs)); }
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).merge(this->data(rhs)); }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { this->data(place).write(buf); }
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { this->data(place).write(buf); }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).read(buf); }
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override { this->data(place).read(buf); }
 
     void predictValues(
         ConstAggregateDataPtr place,
@@ -383,7 +383,7 @@ public:
     /** This function is called if aggregate function without State modifier is selected in a query.
      *  Inserts all weights of the model into the column 'to', so user may use such information if needed
      */
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         this->data(place).returnWeights(to);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.h b/src/AggregateFunctions/AggregateFunctionMannWhitney.h
index 82a15c6cfae..403f628a9ff 100644
--- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h
+++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.h
@@ -194,7 +194,7 @@ public:
         );
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         Float64 value = columns[0]->getFloat64(row_num);
         UInt8 is_second = columns[1]->getUInt(row_num);
@@ -205,7 +205,7 @@ public:
             this->data(place).addX(value, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         auto & a = this->data(place);
         auto & b = this->data(rhs);
@@ -213,17 +213,17 @@ public:
         a.merge(b, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         this->data(place).read(buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         if (!this->data(place).size_x || !this->data(place).size_y)
             throw Exception("Aggregate function " + getName() + " require both samples to be non empty", ErrorCodes::BAD_ARGUMENTS);
diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
index b8a4dd63eea..d4946ad2c9d 100644
--- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
+++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
@@ -87,7 +87,7 @@ public:
             return std::make_shared<DataTypeNumber<PointType>>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         PointType left = assert_cast<const ColumnVector<PointType> &>(*columns[0]).getData()[row_num];
         PointType right = assert_cast<const ColumnVector<PointType> &>(*columns[1]).getData()[row_num];
@@ -99,7 +99,7 @@ public:
             this->data(place).value.push_back(std::make_pair(right, Int64(-1)), arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         auto & cur_elems = this->data(place);
         auto & rhs_elems = this->data(rhs);
@@ -107,7 +107,7 @@ public:
         cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         const auto & value = this->data(place).value;
         size_t size = value.size();
@@ -115,7 +115,7 @@ public:
         buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         size_t size = 0;
         readVarUInt(size, buf);
@@ -129,7 +129,7 @@ public:
         buf.read(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         Int64 current_intersections = 0;
         Int64 max_intersections = 0;
diff --git a/src/AggregateFunctions/AggregateFunctionMerge.h b/src/AggregateFunctions/AggregateFunctionMerge.h
index 721a736fcb7..7f6a23f1845 100644
--- a/src/AggregateFunctions/AggregateFunctionMerge.h
+++ b/src/AggregateFunctions/AggregateFunctionMerge.h
@@ -48,12 +48,12 @@ public:
         return nested_func->getReturnType();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         nested_func->create(place);
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         nested_func->destroy(place);
     }
@@ -73,27 +73,27 @@ public:
         return nested_func->alignOfData();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         nested_func->merge(place, assert_cast<const ColumnAggregateFunction &>(*columns[0]).getData()[row_num], arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         nested_func->merge(place, rhs, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         nested_func->serialize(place, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         nested_func->deserialize(place, buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         nested_func->insertResultInto(place, to, arena);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
index a21a64af9a4..a39d9af000b 100644
--- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
+++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
@@ -721,22 +721,22 @@ public:
         return type;
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         this->data(place).changeIfBetter(*columns[0], row_num, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         this->data(place).changeIfBetter(this->data(rhs), arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf, *type.get());
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         this->data(place).read(buf, *type.get(), arena);
     }
@@ -746,7 +746,7 @@ public:
         return Data::allocatesMemoryInArena();
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         this->data(place).insertResultInto(to);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionNull.h b/src/AggregateFunctions/AggregateFunctionNull.h
index 5c94e68cb26..e1238182ab5 100644
--- a/src/AggregateFunctions/AggregateFunctionNull.h
+++ b/src/AggregateFunctions/AggregateFunctionNull.h
@@ -45,29 +45,29 @@ protected:
       * We use prefix_size bytes for flag to satisfy the alignment requirement of nested state.
       */
 
-    AggregateDataPtr nestedPlace(AggregateDataPtr place) const noexcept
+    AggregateDataPtr nestedPlace(AggregateDataPtr __restrict place) const noexcept
     {
         return place + prefix_size;
     }
 
-    ConstAggregateDataPtr nestedPlace(ConstAggregateDataPtr place) const noexcept
+    ConstAggregateDataPtr nestedPlace(ConstAggregateDataPtr __restrict place) const noexcept
     {
         return place + prefix_size;
     }
 
-    static void initFlag(AggregateDataPtr place) noexcept
+    static void initFlag(AggregateDataPtr __restrict place) noexcept
     {
         if constexpr (result_is_nullable)
             place[0] = 0;
     }
 
-    static void setFlag(AggregateDataPtr place) noexcept
+    static void setFlag(AggregateDataPtr __restrict place) noexcept
     {
         if constexpr (result_is_nullable)
             place[0] = 1;
     }
 
-    static bool getFlag(ConstAggregateDataPtr place) noexcept
+    static bool getFlag(ConstAggregateDataPtr __restrict place) noexcept
     {
         return result_is_nullable ? place[0] : 1;
     }
@@ -95,13 +95,13 @@ public:
             : nested_function->getReturnType();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         initFlag(place);
         nested_function->create(nestedPlace(place));
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         nested_function->destroy(nestedPlace(place));
     }
@@ -121,7 +121,7 @@ public:
         return nested_function->alignOfData();
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         if (result_is_nullable && getFlag(rhs))
             setFlag(place);
@@ -129,7 +129,7 @@ public:
         nested_function->merge(nestedPlace(place), nestedPlace(rhs), arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         bool flag = getFlag(place);
         if constexpr (serialize_flag)
@@ -138,7 +138,7 @@ public:
             nested_function->serialize(nestedPlace(place), buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         bool flag = 1;
         if constexpr (serialize_flag)
@@ -150,7 +150,7 @@ public:
         }
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         if constexpr (result_is_nullable)
         {
@@ -200,7 +200,7 @@ public:
     {
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
         const IColumn * nested_column = &column->getNestedColumn();
@@ -250,7 +250,7 @@ public:
             is_nullable[i] = arguments[i]->isNullable();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         /// This container stores the columns we really pass to the nested function.
         const IColumn * nested_columns[number_of_arguments];
diff --git a/src/AggregateFunctions/AggregateFunctionOrFill.h b/src/AggregateFunctions/AggregateFunctionOrFill.h
index 0b0b5e717a2..93fe84a036a 100644
--- a/src/AggregateFunctions/AggregateFunctionOrFill.h
+++ b/src/AggregateFunctions/AggregateFunctionOrFill.h
@@ -76,13 +76,13 @@ public:
         return nested_function->alignOfData();
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         nested_function->create(place);
         place[size_of_data] = 0;
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         nested_function->destroy(place);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h
index 9d2232892f0..edd24add736 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantile.h
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.h
@@ -103,7 +103,7 @@ public:
             return res;
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         auto value = static_cast<const ColVecType &>(*columns[0]).getData()[row_num];
 
@@ -122,23 +122,23 @@ public:
             this->data(place).add(value);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         /// const_cast is required because some data structures apply finalizaton (like compactization) before serializing.
         this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         /// const_cast is required because some data structures apply finalizaton (like sorting) for obtain a result.
         auto & data = this->data(place);
diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
index bdec03d5975..a7e0852378c 100644
--- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
+++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
@@ -63,7 +63,7 @@ public:
         return std::make_shared<DataTypeNumber<Float64>>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         Float64 new_x = columns[0]->getFloat64(row_num);
         Float64 new_y = columns[1]->getFloat64(row_num);
@@ -71,7 +71,7 @@ public:
         this->data(place).addY(new_y, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         auto & a = this->data(place);
         auto & b = this->data(rhs);
@@ -79,17 +79,17 @@ public:
         a.merge(b, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         this->data(place).read(buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto answer = this->data(place).getResult();
 
diff --git a/src/AggregateFunctions/AggregateFunctionResample.h b/src/AggregateFunctions/AggregateFunctionResample.h
index 51252fe0b89..4b29b921b45 100644
--- a/src/AggregateFunctions/AggregateFunctionResample.h
+++ b/src/AggregateFunctions/AggregateFunctionResample.h
@@ -110,7 +110,7 @@ public:
         return align_of_data;
     }
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         for (size_t i = 0; i < total; ++i)
         {
@@ -127,7 +127,7 @@ public:
         }
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         for (size_t i = 0; i < total; ++i)
             nested_function->destroy(place + i * size_of_data);
diff --git a/src/AggregateFunctions/AggregateFunctionRetention.h b/src/AggregateFunctions/AggregateFunctionRetention.h
index f8a2163ccb9..5f0d9907280 100644
--- a/src/AggregateFunctions/AggregateFunctionRetention.h
+++ b/src/AggregateFunctions/AggregateFunctionRetention.h
@@ -94,7 +94,7 @@ public:
         return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt8>());
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
     {
         for (const auto i : ext::range(0, events_size))
         {
@@ -106,22 +106,22 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto & data_to = assert_cast<ColumnUInt8 &>(assert_cast<ColumnArray &>(to).getData()).getData();
         auto & offsets_to = assert_cast<ColumnArray &>(to).getOffsets();
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
index 1a3da20e347..48015a6d282 100644
--- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
+++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
@@ -149,7 +149,7 @@ public:
         parsePattern();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
     {
         const auto timestamp = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
 
@@ -163,17 +163,17 @@ public:
         this->data(place).add(timestamp, events);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
@@ -560,7 +560,7 @@ public:
 
     DataTypePtr getReturnType() const override { return std::make_shared<DataTypeUInt8>(); }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         this->data(place).sort();
 
@@ -588,14 +588,14 @@ public:
 
     DataTypePtr getReturnType() const override { return std::make_shared<DataTypeUInt64>(); }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         this->data(place).sort();
         assert_cast<ColumnUInt64 &>(to).getData().push_back(count(place));
     }
 
 private:
-    UInt64 count(const ConstAggregateDataPtr & place) const
+    UInt64 count(ConstAggregateDataPtr __restrict place) const
     {
         const auto & data_ref = this->data(place);
 
diff --git a/src/AggregateFunctions/AggregateFunctionSimpleState.h b/src/AggregateFunctions/AggregateFunctionSimpleState.h
index 0b47e21e570..612f4ae76c1 100644
--- a/src/AggregateFunctions/AggregateFunctionSimpleState.h
+++ b/src/AggregateFunctions/AggregateFunctionSimpleState.h
@@ -48,9 +48,9 @@ public:
         return storage_type;
     }
 
-    void create(AggregateDataPtr place) const override { nested_func->create(place); }
+    void create(AggregateDataPtr __restrict place) const override { nested_func->create(place); }
 
-    void destroy(AggregateDataPtr place) const noexcept override { nested_func->destroy(place); }
+    void destroy(AggregateDataPtr __restrict place) const noexcept override { nested_func->destroy(place); }
 
     bool hasTrivialDestructor() const override { return nested_func->hasTrivialDestructor(); }
 
@@ -58,21 +58,21 @@ public:
 
     size_t alignOfData() const override { return nested_func->alignOfData(); }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         nested_func->add(place, columns, row_num, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { nested_func->merge(place, rhs, arena); }
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { nested_func->merge(place, rhs, arena); }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { nested_func->serialize(place, buf); }
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { nested_func->serialize(place, buf); }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         nested_func->deserialize(place, buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
     {
         nested_func->insertResultInto(place, to, arena);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionState.h b/src/AggregateFunctions/AggregateFunctionState.h
index 01614b7f0ab..e1a2c542fe8 100644
--- a/src/AggregateFunctions/AggregateFunctionState.h
+++ b/src/AggregateFunctions/AggregateFunctionState.h
@@ -34,12 +34,12 @@ public:
 
     DataTypePtr getReturnType() const override;
 
-    void create(AggregateDataPtr place) const override
+    void create(AggregateDataPtr __restrict place) const override
     {
         nested_func->create(place);
     }
 
-    void destroy(AggregateDataPtr place) const noexcept override
+    void destroy(AggregateDataPtr __restrict place) const noexcept override
     {
         nested_func->destroy(place);
     }
@@ -59,27 +59,27 @@ public:
         return nested_func->alignOfData();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         nested_func->add(place, columns, row_num, arena);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         nested_func->merge(place, rhs, arena);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         nested_func->serialize(place, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         nested_func->deserialize(place, buf, arena);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnAggregateFunction &>(to).getData().push_back(place);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.h b/src/AggregateFunctions/AggregateFunctionStatistics.h
index b0ff57665da..76b6e843c15 100644
--- a/src/AggregateFunctions/AggregateFunctionStatistics.h
+++ b/src/AggregateFunctions/AggregateFunctionStatistics.h
@@ -123,27 +123,27 @@ public:
         return std::make_shared<DataTypeFloat64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).update(*columns[0], row_num);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).mergeWith(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         this->data(place).publish(to);
     }
@@ -375,27 +375,27 @@ public:
         return std::make_shared<DataTypeFloat64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).update(*columns[0], *columns[1], row_num);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).mergeWith(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         this->data(place).publish(to);
     }
diff --git a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h
index 5caa30dbdab..9903e2f6eaa 100644
--- a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h
+++ b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h
@@ -121,7 +121,7 @@ public:
         return std::make_shared<DataTypeNumber<ResultType>>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         if constexpr (StatFunc::num_args == 2)
             this->data(place).add(
@@ -141,22 +141,22 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         const auto & data = this->data(place);
         auto & dst = static_cast<ColVecResult &>(to).getData();
diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h
index ffd8a077390..bd1f9fc302e 100644
--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@@ -314,7 +314,7 @@ public:
             return std::make_shared<ResultDataType>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         const auto & column = assert_cast<const ColVecType &>(*columns[0]);
         if constexpr (is_big_int_v<T>)
@@ -361,22 +361,22 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto & column = assert_cast<ColVecResult &>(to);
         column.getData().push_back(this->data(place).get());
diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h
index 6c8155f967c..3079da36cda 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.h
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.h
@@ -136,7 +136,7 @@ public:
         }
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns_, const size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns_, const size_t row_num, Arena *) const override
     {
         const auto & columns = getArgumentColumns(columns_);
 
@@ -212,7 +212,7 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         auto & merged_maps = this->data(place).merged_maps;
         const auto & rhs_maps = this->data(rhs).merged_maps;
@@ -231,7 +231,7 @@ public:
         }
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         const auto & merged_maps = this->data(place).merged_maps;
         size_t size = merged_maps.size();
@@ -245,7 +245,7 @@ public:
         }
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         auto & merged_maps = this->data(place).merged_maps;
         size_t size = 0;
@@ -268,7 +268,7 @@ public:
         }
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         size_t num_columns = values_types.size();
 
diff --git a/src/AggregateFunctions/AggregateFunctionTTest.h b/src/AggregateFunctions/AggregateFunctionTTest.h
index e73ae5e4c36..3c9873ebd1e 100644
--- a/src/AggregateFunctions/AggregateFunctionTTest.h
+++ b/src/AggregateFunctions/AggregateFunctionTTest.h
@@ -109,7 +109,7 @@ public:
         );
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         Float64 value = columns[0]->getFloat64(row_num);
         UInt8 is_second = columns[1]->getUInt(row_num);
@@ -120,22 +120,22 @@ public:
             this->data(place).addX(value);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto [t_statistic, p_value] = this->data(place).getResult();
 
diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h
index 791df21d1a7..43320a96b99 100644
--- a/src/AggregateFunctions/AggregateFunctionTopK.h
+++ b/src/AggregateFunctions/AggregateFunctionTopK.h
@@ -50,7 +50,7 @@ public:
         return std::make_shared<DataTypeArray>(this->argument_types[0]);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         auto & set = this->data(place).value;
         if (set.capacity() != reserved)
@@ -62,7 +62,7 @@ public:
             set.insert(assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         auto & set = this->data(place).value;
         if (set.capacity() != reserved)
@@ -70,19 +70,19 @@ public:
         set.merge(this->data(rhs).value);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).value.write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         auto & set = this->data(place).value;
         set.resize(reserved);
         set.read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
         ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
@@ -145,12 +145,12 @@ public:
         return true;
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).value.write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
     {
         auto & set = this->data(place).value;
         set.clear();
@@ -173,7 +173,7 @@ public:
         set.readAlphaMap(buf);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         auto & set = this->data(place).value;
         if (set.capacity() != reserved)
@@ -198,7 +198,7 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         auto & set = this->data(place).value;
         if (set.capacity() != reserved)
@@ -206,7 +206,7 @@ public:
         set.merge(this->data(rhs).value);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
         ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h
index c8f7b155ace..4e27922ba7c 100644
--- a/src/AggregateFunctions/AggregateFunctionUniq.h
+++ b/src/AggregateFunctions/AggregateFunctionUniq.h
@@ -211,27 +211,27 @@ public:
     }
 
     /// ALWAYS_INLINE is required to have better code layout for uniqHLL12 function
-    void ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         detail::OneAdder<T, Data>::add(this->data(place), *columns[0], row_num);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).set.merge(this->data(rhs).set);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).set.write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).set.read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
     }
@@ -265,28 +265,28 @@ public:
         return std::make_shared<DataTypeUInt64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).set.insert(typename Data::Set::value_type(
             UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)));
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).set.merge(this->data(rhs).set);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).set.write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).set.read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
     }
diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/src/AggregateFunctions/AggregateFunctionUniqCombined.h
index 10d8b3f512c..c9327594670 100644
--- a/src/AggregateFunctions/AggregateFunctionUniqCombined.h
+++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.h
@@ -141,7 +141,7 @@ public:
         return std::make_shared<DataTypeUInt64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         if constexpr (!std::is_same_v<T, String>)
         {
@@ -155,22 +155,22 @@ public:
         }
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).set.merge(this->data(rhs).set);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).set.write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).set.read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
     }
@@ -211,28 +211,28 @@ public:
         return std::make_shared<DataTypeUInt64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).set.insert(typename AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>::Set::value_type(
             UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)));
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).set.merge(this->data(rhs).set);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).set.write(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).set.read(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
     }
diff --git a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
index f6b96b561ac..d2c765137bc 100644
--- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
+++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
@@ -185,27 +185,27 @@ public:
     }
 
     /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function
-    void ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).add(*columns[0], row_num, threshold);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs), threshold);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf, threshold);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).read(buf, threshold);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).size());
     }
@@ -247,27 +247,27 @@ public:
         return std::make_shared<DataTypeUInt64>();
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         this->data(place).insert(UInt64(UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)), threshold);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs), threshold);
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).write(buf, threshold);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).read(buf, threshold);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).size());
     }
diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
index 2be9d874a05..de8f0f1e2e9 100644
--- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
+++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
@@ -250,7 +250,7 @@ public:
         return std::make_shared<AggregateFunctionNullVariadic<false, false, false>>(nested_function, arguments, params);
     }
 
-    void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
     {
         bool has_event = false;
         const auto timestamp = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
@@ -269,22 +269,22 @@ public:
             this->data(place).add(timestamp, 0);
     }
 
-    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         this->data(place).merge(this->data(rhs));
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         this->data(place).serialize(buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         this->data(place).deserialize(buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnUInt8 &>(to).getData().push_back(getEventLevel(this->data(place)));
     }
diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h
index c84be1a6b5b..f1bbfa40aac 100644
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@@ -72,10 +72,10 @@ public:
     /** Create empty data for aggregation with `placement new` at the specified location.
       * You will have to destroy them using the `destroy` method.
       */
-    virtual void create(AggregateDataPtr place) const = 0;
+    virtual void create(AggregateDataPtr __restrict place) const = 0;
 
     /// Delete data for aggregation.
-    virtual void destroy(AggregateDataPtr place) const noexcept = 0;
+    virtual void destroy(AggregateDataPtr __restrict place) const noexcept = 0;
 
     /// It is not necessary to delete data.
     virtual bool hasTrivialDestructor() const = 0;
@@ -91,16 +91,16 @@ public:
      *  row_num is number of row which should be added.
      *  Additional parameter arena should be used instead of standard memory allocator if the addition requires memory allocation.
      */
-    virtual void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const = 0;
+    virtual void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const = 0;
 
     /// Merges state (on which place points to) with other state of current aggregation function.
-    virtual void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const = 0;
+    virtual void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const = 0;
 
     /// Serializes state (to transmit it over the network, for example).
-    virtual void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const = 0;
+    virtual void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const = 0;
 
     /// Deserializes state. This function is called only for empty (just created) states.
-    virtual void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const = 0;
+    virtual void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const = 0;
 
     /// Returns true if a function requires Arena to handle own states (see add(), merge(), deserialize()).
     virtual bool allocatesMemoryInArena() const { return false; }
@@ -111,7 +111,7 @@ public:
     /// insertResultInto must work correctly. This kind of call sequence occurs
     /// in `runningAccumulate`, or when calculating an aggregate function as a
     /// window function.
-    virtual void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const = 0;
+    virtual void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const = 0;
 
     /// Used for machine learning methods. Predict result from trained model.
     /// Will insert result into `to` column for rows in range [offset, offset + limit).
@@ -387,8 +387,8 @@ class IAggregateFunctionDataHelper : public IAggregateFunctionHelper<Derived>
 protected:
     using Data = T;
 
-    static Data & data(AggregateDataPtr place) { return *reinterpret_cast<Data *>(place); }
-    static const Data & data(ConstAggregateDataPtr place) { return *reinterpret_cast<const Data *>(place); }
+    static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
+    static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
 
 public:
     // Derived class can `override` this to flag that DateTime64 is not supported.
@@ -399,9 +399,9 @@ public:
     {
     }
 
-    void create(AggregateDataPtr place) const override { new (place) Data; }
+    void create(AggregateDataPtr __restrict place) const override { new (place) Data; }
 
-    void destroy(AggregateDataPtr place) const noexcept override { data(place).~Data(); }
+    void destroy(AggregateDataPtr __restrict place) const noexcept override { data(place).~Data(); }
 
     bool hasTrivialDestructor() const override { return std::is_trivially_destructible_v<Data>; }
 

From a3f1b825cc8d419e8d082237d8331744f2dccbab Mon Sep 17 00:00:00 2001
From: Pavel Kruglov <avogar@sandbox-633380738>
Date: Mon, 1 Feb 2021 21:17:12 +0300
Subject: [PATCH 0492/1238] Fix build

---
 src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 9a32b29454d..457c9c04aa9 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -1549,7 +1549,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
         if (!out_projection)
             out_projection = createProjection(pipe.getHeader());
 
-        QueryPlanPtr plan = createPlanFromPipe(std::move(pipe), "with final");
+        QueryPlanPtr plan = createPlanFromPipe(std::move(pipe), query_id, data, "with final");
 
         auto expression_step = std::make_unique<ExpressionStep>(
             plan->getCurrentDataStream(),

From 9da445e740b45481da042d6e0264cdbe70245443 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Mon, 1 Feb 2021 22:29:47 +0300
Subject: [PATCH 0493/1238] execute initial query in the same thread

---
 src/Databases/DatabaseReplicated.cpp        | 12 ++--
 src/Databases/DatabaseReplicatedWorker.cpp  | 68 ++++++++++++++++++---
 src/Databases/DatabaseReplicatedWorker.h    |  7 ++-
 src/Interpreters/DDLTask.cpp                |  4 +-
 src/Interpreters/DDLTask.h                  |  2 +-
 src/Interpreters/DDLWorker.cpp              | 22 ++++++-
 src/Interpreters/InterpreterAlterQuery.cpp  |  3 +
 src/Interpreters/InterpreterCreateQuery.cpp |  7 ++-
 src/Interpreters/InterpreterDropQuery.cpp   | 33 ++++++----
 src/Interpreters/InterpreterRenameQuery.cpp |  5 +-
 src/Interpreters/InterpreterRenameQuery.h   |  3 +
 tests/clickhouse-test                       |  4 +-
 12 files changed, 128 insertions(+), 42 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 6f244ed7ec9..44746cd5716 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -42,9 +42,9 @@ zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
     return global_context.getZooKeeper();
 }
 
-static inline String getHostID(const Context & global_context)
+static inline String getHostID(const Context & global_context, const UUID & db_uuid)
 {
-    return Cluster::Address::toString(getFQDNOrHostName(), global_context.getTCPPort());
+    return Cluster::Address::toString(getFQDNOrHostName(), global_context.getTCPPort()) + ':' + toString(db_uuid);
 }
 
 
@@ -94,7 +94,7 @@ DatabaseReplicated::DatabaseReplicated(
     String replica_host_id;
     if (current_zookeeper->tryGet(replica_path, replica_host_id))
     {
-        String host_id = getHostID(global_context);
+        String host_id = getHostID(global_context, db_uuid);
         if (replica_host_id != host_id)
             throw Exception(ErrorCodes::REPLICA_IS_ALREADY_EXIST,
                             "Replica {} of shard {} of replicated database at {} already exists. Replica host ID: '{}', current host ID: '{}'",
@@ -144,7 +144,7 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt
     //log_entry_to_execute = 0;   //FIXME
 
     /// Write host name to replica_path, it will protect from multiple replicas with the same name
-    auto host_id = getHostID(global_context);
+    auto host_id = getHostID(global_context, db_uuid);
 
     /// On replica creation add empty entry to log. Can be used to trigger some actions on other replicas (e.g. update cluster info).
     DDLLogEntry entry;
@@ -221,11 +221,11 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query)
 
     LOG_DEBUG(log, "Proposing query: {}", queryToString(query));
 
+    /// TODO maybe write current settings to log entry?
     DDLLogEntry entry;
-    entry.hosts = {};
     entry.query = queryToString(query);
     entry.initiator = ddl_worker->getCommonHostID();
-    String node_path = ddl_worker->enqueueQuery(entry);
+    String node_path = ddl_worker->tryEnqueueAndExecuteEntry(entry);
 
     BlockIO io;
     //FIXME use query context
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index 0c2368cdcf6..a1cdff204c7 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -8,13 +8,16 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int DATABASE_REPLICATION_FAILED;
 }
 
 DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db, const Context & context_)
     : DDLWorker(/* pool_size */ 1, db->zookeeper_path + "/log", context_, nullptr, {}, fmt::format("DDLWorker({})", db->getDatabaseName()))
     , database(db)
 {
-    /// Pool size must be 1 (to avoid reordering of log entries)
+    /// Pool size must be 1 to avoid reordering of log entries.
+    /// TODO Make a dependency graph of DDL queries. It will allow to execute independent entries in parallel.
+    /// We also need similar graph to load tables on server startup in order of topsort.
 }
 
 void DatabaseReplicatedDDLWorker::initializeMainThread()
@@ -72,8 +75,51 @@ String DatabaseReplicatedDDLWorker::enqueueQuery(DDLLogEntry & entry)
     return node_path;
 }
 
+String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entry)
+{
+    auto zookeeper = getAndSetZooKeeper();
+    // TODO do not enqueue query if we have big replication lag
+
+    String entry_path = enqueueQuery(entry);
+    auto try_node = zkutil::EphemeralNodeHolder::existing(entry_path + "/try", *zookeeper);
+    String entry_name = entry_path.substr(entry_path.rfind('/') + 1);
+    auto task = std::make_unique<DatabaseReplicatedTask>(entry_name, entry_path, database);
+    task->entry = entry;
+    task->parseQueryFromEntry(context);
+    assert(!task->entry.query.empty());
+    assert(!zookeeper->exists(task->getFinishedNodePath()));
+    task->is_initial_query = true;
+
+    LOG_DEBUG(log, "Waiting for worker thread to process all entries before {}", entry_name);
+    {
+        std::unique_lock lock{mutex};
+        wait_current_task_change.wait(lock, [&]() { assert(current_task <= entry_name); return zookeeper->expired() || current_task == entry_name; });
+    }
+
+    if (zookeeper->expired())
+        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "ZooKeeper session expired, try again");
+
+    processTask(*task);
+
+    if (!task->was_executed)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} was executed, but was not committed: code {}: {}",
+                        task->execution_status.code, task->execution_status.message);
+    }
+
+    try_node->reset();
+
+    return entry_path;
+}
+
 DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper)
 {
+    {
+        std::lock_guard lock{mutex};
+        current_task = entry_name;
+        wait_current_task_change.notify_all();
+    }
+
     UInt32 our_log_ptr = parse<UInt32>(current_zookeeper->get(database->replica_path + "/log_ptr"));
     UInt32 entry_num = DatabaseReplicatedTask::getLogEntryNumber(entry_name);
 
@@ -91,27 +137,31 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
 
     if (zookeeper->tryGet(entry_path + "/try", initiator_name, nullptr, wait_committed_or_failed))
     {
-        task->we_are_initiator = initiator_name == task->host_id_str;
+        task->is_initial_query = initiator_name == task->host_id_str;
         /// Query is not committed yet. We cannot just skip it and execute next one, because reordering may break replication.
         //FIXME add some timeouts
-        if (!task->we_are_initiator)
-        {
-            LOG_TRACE(log, "Waiting for initiator {} to commit or rollback entry {}", initiator_name, entry_path);
-            wait_committed_or_failed->wait();
-        }
+        LOG_TRACE(log, "Waiting for initiator {} to commit or rollback entry {}", initiator_name, entry_path);
+        wait_committed_or_failed->wait();
     }
 
-    if (!task->we_are_initiator && !zookeeper->exists(entry_path + "/committed"))
+    if (!zookeeper->exists(entry_path + "/committed"))
     {
         out_reason = "Entry " + entry_name + " hasn't been committed";
         return {};
     }
 
+    if (task->is_initial_query)
+    {
+        assert(!zookeeper->exists(entry_path + "/try"));
+        assert(zookeeper->exists(entry_path + "/committed") == (zookeeper->get(task->getFinishedNodePath()) == "0"));
+        out_reason = "Entry " + entry_name + " has been executed as initial query";
+        return {};
+    }
+
     String node_data;
     if (!zookeeper->tryGet(entry_path, node_data))
     {
         LOG_ERROR(log, "Cannot get log entry {}", entry_path);
-        database->onUnexpectedLogEntry(entry_name, zookeeper);
         throw Exception(ErrorCodes::LOGICAL_ERROR, "should be unreachable");
     }
 
diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h
index 7994104331e..7e6d64dab0b 100644
--- a/src/Databases/DatabaseReplicatedWorker.h
+++ b/src/Databases/DatabaseReplicatedWorker.h
@@ -1,7 +1,6 @@
 #pragma once
 #include <Interpreters/DDLWorker.h>
 
-
 namespace DB
 {
 
@@ -14,6 +13,8 @@ public:
 
     String enqueueQuery(DDLLogEntry & entry) override;
 
+    String tryEnqueueAndExecuteEntry(DDLLogEntry & entry);
+
 private:
     void initializeMainThread() override;
     void initializeReplication();
@@ -21,7 +22,9 @@ private:
     DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper) override;
 
     DatabaseReplicated * database;
-
+    mutable std::mutex mutex;
+    std::condition_variable wait_current_task_change;
+    String current_task;
 };
 
 }
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index fd2de014581..55e613648ae 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -303,9 +303,9 @@ std::unique_ptr<Context> DatabaseReplicatedTask::makeQueryContext(Context & from
     query_context->initMetadataTransaction(txn);
     txn->current_zookeeper = from_context.getZooKeeper();
     txn->zookeeper_path = database->zookeeper_path;
-    txn->is_initial_query = we_are_initiator;
+    txn->is_initial_query = is_initial_query;
 
-    if (we_are_initiator)
+    if (is_initial_query)
     {
         txn->ops.emplace_back(zkutil::makeRemoveRequest(entry_path + "/try", -1));
         txn->ops.emplace_back(zkutil::makeCreateRequest(entry_path + "/committed", host_id_str, zkutil::CreateMode::Persistent));
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 5b50413b975..49f6d74a931 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -77,6 +77,7 @@ struct DDLTaskBase
     String host_id_str;
     ASTPtr query;
 
+    bool is_initial_query = false;
     bool is_circular_replicated = false;
     bool execute_on_leader = false;
 
@@ -136,7 +137,6 @@ struct DatabaseReplicatedTask : public DDLTaskBase
     static UInt32 getLogEntryNumber(const String & log_entry_name);
 
     DatabaseReplicated * database;
-    bool we_are_initiator = false;
 };
 
 
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 7b9d3ef8f5b..fabb9f9563e 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -51,6 +51,7 @@ namespace ErrorCodes
     extern const int CANNOT_ASSIGN_ALTER;
     extern const int CANNOT_ALLOCATE_MEMORY;
     extern const int MEMORY_LIMIT_EXCEEDED;
+    extern const int INCORRECT_QUERY;
 }
 
 
@@ -398,8 +399,9 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task)
     try
     {
         auto query_context = task.makeQueryContext(context);
-        query_scope.emplace(*query_context);
-        executeQuery(istr, ostr, false, *query_context, {});
+        if (!task.is_initial_query)
+            query_scope.emplace(*query_context);
+        executeQuery(istr, ostr, !task.is_initial_query, *query_context, {});
 
         if (auto txn = query_context->getMetadataTransaction())
         {
@@ -409,6 +411,9 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task)
     }
     catch (const DB::Exception & e)
     {
+        if (task.is_initial_query)
+            throw;
+
         task.execution_status = ExecutionStatus::fromCurrentException();
         tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
 
@@ -426,6 +431,9 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task)
     }
     catch (...)
     {
+        if (task.is_initial_query)
+            throw;
+
         task.execution_status = ExecutionStatus::fromCurrentException();
         tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
 
@@ -474,7 +482,10 @@ void DDLWorker::processTask(DDLTaskBase & task)
                 {
                     /// It's not CREATE DATABASE
                     auto table_id = context.tryResolveStorageID(*query_with_table, Context::ResolveOrdinary);
-                    storage = DatabaseCatalog::instance().tryGetTable(table_id, context);
+                    DatabasePtr database;
+                    std::tie(database, storage) = DatabaseCatalog::instance().tryGetDatabaseAndTable(table_id, context);
+                    if (database && database->getEngineName() == "Replicated")
+                        throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER queries are not allowed for Replicated databases");
                 }
 
                 task.execute_on_leader = storage && taskShouldBeExecutedOnLeader(task.query, storage) && !task.is_circular_replicated;
@@ -496,6 +507,8 @@ void DDLWorker::processTask(DDLTaskBase & task)
         }
         catch (...)
         {
+            if (task.is_initial_query)
+                throw;
             tryLogCurrentException(log, "An error occurred before execution of DDL task: ");
             task.execution_status = ExecutionStatus::fromCurrentException("An error occurred before execution");
         }
@@ -628,6 +641,9 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
         StorageReplicatedMergeTree::Status status;
         replicated_storage->getStatus(status);
 
+        if (task.is_initial_query && !status.is_leader)
+            throw Exception(ErrorCodes::NOT_A_LEADER, "Cannot execute initial query on non-leader replica");
+
         /// Any replica which is leader tries to take lock
         if (status.is_leader && lock->tryLock())
         {
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index db380bca2b1..0edd1a401b3 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -53,7 +53,10 @@ BlockIO InterpreterAlterQuery::execute()
 
     DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
     if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+    {
+        alter_lock.reset();
         return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
+    }
 
     //FIXME commit MetadataTransaction for all ALTER kinds. Now its' implemented only for metadata alter.
 
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 926737ef888..d91f3140a96 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -572,6 +572,10 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(AS
     validateTableStructure(create, properties);
     /// Set the table engine if it was not specified explicitly.
     setEngine(create);
+
+    create.as_database.clear();
+    create.as_table.clear();
+
     return properties;
 }
 
@@ -835,7 +839,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
         /// Data path must be relative to root_path
         create.attach_from_path = fs::relative(data_path, root_path) / "";
     }
-    else if (create.attach && !create.attach_short_syntax)
+    else if (create.attach && !create.attach_short_syntax && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
     {
         auto * log = &Poco::Logger::get("InterpreterCreateQuery");
         LOG_WARNING(log, "ATTACH TABLE query with full table definition is not recommended: "
@@ -881,6 +885,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
         if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
         {
             assertOrSetUUID(create, database);
+            guard.reset();
             return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
         }
     }
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index ff7b6ef8387..eed7337b9ab 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -33,6 +33,7 @@ namespace ErrorCodes
     extern const int UNKNOWN_TABLE;
     extern const int UNKNOWN_DICTIONARY;
     extern const int NOT_IMPLEMENTED;
+    extern const int INCORRECT_QUERY;
 }
 
 
@@ -119,12 +120,28 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
 
     if (database && table)
     {
-        if (query_ptr->as<ASTDropQuery &>().is_view && !table->isView())
+        if (query.as<ASTDropQuery &>().is_view && !table->isView())
             throw Exception("Table " + table_id.getNameForLogs() + " is not a View", ErrorCodes::LOGICAL_ERROR);
 
         /// Now get UUID, so we can wait for table data to be finally dropped
         table_id.uuid = database->tryGetTableUUID(table_id.table_name);
 
+        /// Prevents recursive drop from drop database query. The original query must specify a table.
+        bool is_drop_or_detach_database = query.table.empty();
+        bool is_replicated_ddl_query = typeid_cast<DatabaseReplicated *>(database.get()) &&
+                                       context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY &&
+                                       !is_drop_or_detach_database;
+        if (is_replicated_ddl_query)
+        {
+            if (query.kind == ASTDropQuery::Kind::Detach && !query.permanently)
+                throw Exception(ErrorCodes::INCORRECT_QUERY, "DETACH TABLE is not allowed for Replicated databases. "
+                                                             "Use DETACH TABLE PERMANENTLY or SYSTEM RESTART REPLICA");
+
+            ddl_guard.reset();
+            table.reset();
+            return typeid_cast<DatabaseReplicated *>(database.get())->propose(query.clone());
+        }
+
         if (query.kind == ASTDropQuery::Kind::Detach)
         {
             context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id);
@@ -135,9 +152,6 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
             if (database->getUUID() == UUIDHelpers::Nil)
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
 
-            if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
-                return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
-
             if (query.permanently)
             {
                 /// Drop table from memory, don't touch data, metadata file renamed and will be skipped during server restart
@@ -157,10 +171,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
             auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
             auto metadata_snapshot = table->getInMemoryMetadataPtr();
             /// Drop table data, don't touch metadata
-            if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
-                return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
-            else
-                table->truncate(query_ptr, metadata_snapshot, context, table_lock);
+            table->truncate(query_ptr, metadata_snapshot, context, table_lock);
         }
         else if (query.kind == ASTDropQuery::Kind::Drop)
         {
@@ -173,11 +184,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
             if (database->getUUID() == UUIDHelpers::Nil)
                 table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
 
-            /// Prevents recursive drop from drop database query. The original query must specify a table.
-            if (typeid_cast<DatabaseReplicated *>(database.get()) && !query_ptr->as<ASTDropQuery &>().table.empty() && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
-                return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
-            else
-                database->dropTable(context, table_id.table_name, query.no_delay);
+            database->dropTable(context, table_id.table_name, query.no_delay);
         }
 
         db = database;
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index a6075643a96..52faa89eff1 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -43,9 +43,6 @@ BlockIO InterpreterRenameQuery::execute()
     RenameDescriptions descriptions;
     descriptions.reserve(rename.elements.size());
 
-    /// Don't allow to drop tables (that we are renaming); don't allow to create tables in places where tables will be renamed.
-    TableGuards table_guards;
-
     for (const auto & elem : rename.elements)
     {
         descriptions.emplace_back(elem, current_database);
@@ -85,6 +82,8 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
             if (1 < descriptions.size())
                 throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Database {} is Replicated, "
                                 "it does not support renaming of multiple tables in single query.", elem.from_database_name);
+
+            table_guards.clear();
             return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
         }
         else
diff --git a/src/Interpreters/InterpreterRenameQuery.h b/src/Interpreters/InterpreterRenameQuery.h
index 055c15181c1..2bc84514b4c 100644
--- a/src/Interpreters/InterpreterRenameQuery.h
+++ b/src/Interpreters/InterpreterRenameQuery.h
@@ -64,6 +64,9 @@ private:
 
     ASTPtr query_ptr;
     Context & context;
+
+    /// Don't allow to drop tables (that we are renaming); don't allow to create tables in places where tables will be renamed.
+    TableGuards table_guards;
 };
 
 }
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 13e7b4be001..3bfbd5d3e7f 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -186,9 +186,9 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
 
     total_time = (datetime.now() - start_time).total_seconds()
 
+    # Normalize randomized database names in stdout, stderr files.
+    os.system("LC_ALL=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=database, file=stdout_file))
     if not args.show_db_name:
-        # Normalize randomized database names in stdout, stderr files.
-        os.system("LC_ALL=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=database, file=stdout_file))
         os.system("LC_ALL=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=database, file=stderr_file))
 
     stdout = open(stdout_file, 'rb').read() if os.path.exists(stdout_file) else b''

From 63a8f463f9c3cda60a4a83f0b594741b6024e576 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 31 Jan 2021 22:39:05 +0300
Subject: [PATCH 0494/1238] Allow writing partial buffer

In #6012 the behavior of the WriteBuffer has been changed.

Before, WriteBuffer write the data when next() is called.
After, WriteBuffer will write the data if, and only if, the buffer is
full.

See the discussion here [1].

  [1]: https://github.com/ClickHouse/ClickHouse/pull/6012#discussion_r567320267
---
 src/IO/WriteBuffer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h
index c513b22b0a5..da610fd9516 100644
--- a/src/IO/WriteBuffer.h
+++ b/src/IO/WriteBuffer.h
@@ -37,7 +37,7 @@ public:
       */
     inline void next()
     {
-        if (!offset() && available())
+        if (!offset())
             return;
         bytes += offset();
 

From 8b92749e1c3177c757388ae383a2169ca5a2bd53 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 2 Feb 2021 01:52:19 +0300
Subject: [PATCH 0495/1238] Fix ParallelFormattingOutputFormat after allowed
 partial writes

The problem was that ParallelFormattingOutputFormat creats
BufferWithOutsideMemory with Memory<> whos size is 0, and this create
WriteBuffer with zero size, which will have endless loop.
---
 .../Formats/Impl/ParallelFormattingOutputFormat.cpp          | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp
index eda2665119a..0ebca3661b4 100644
--- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp
@@ -147,12 +147,13 @@ namespace DB
             /// We want to preallocate memory buffer (increase capacity)
             /// and put the pointer at the beginning of the buffer
             unit.segment.resize(DBMS_DEFAULT_BUFFER_SIZE);
-            /// The second invocation won't release memory, only set size equals to 0.
-            unit.segment.resize(0);
 
             unit.actual_memory_size = 0;
             BufferWithOutsideMemory<WriteBuffer> out_buffer(unit.segment);
 
+            /// The second invocation won't release memory, only set size equals to 0.
+            unit.segment.resize(0);
+
             auto formatter = internal_formatter_creator(out_buffer);
 
             switch (unit.type)

From f3a56b965c541c7527b3505da6f4d0f6bfc16f46 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 2 Feb 2021 01:55:26 +0300
Subject: [PATCH 0496/1238] Add deebug assertion for empty working buffer in
 WriteBuffer

---
 src/IO/WriteBuffer.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h
index da610fd9516..6abcc1c8eed 100644
--- a/src/IO/WriteBuffer.h
+++ b/src/IO/WriteBuffer.h
@@ -4,6 +4,7 @@
 #include <cstring>
 #include <memory>
 #include <iostream>
+#include <cassert>
 
 #include <Common/Exception.h>
 #include <IO/BufferBase.h>
@@ -73,6 +74,9 @@ public:
     {
         size_t bytes_copied = 0;
 
+        /// Produces endless loop
+        assert(working_buffer.size() > 0);
+
         while (bytes_copied < n)
         {
             nextIfAtEnd();

From 18b6fb2a93857af97557e4314cea8ace4dda8aa3 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 2 Feb 2021 02:23:34 +0300
Subject: [PATCH 0497/1238] cleanup 1

---
 src/Interpreters/WindowDescription.h     | 26 ++++++++----------------
 src/Parsers/ExpressionElementParsers.cpp |  1 +
 2 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h
index cbdfc56ba4e..d34b7721a5e 100644
--- a/src/Interpreters/WindowDescription.h
+++ b/src/Interpreters/WindowDescription.h
@@ -1,23 +1,5 @@
 #pragma once
 
-// ASTLiteral.h
-#include <Core/Field.h>
-#include <Parsers/ASTWithAlias.h>
-#include <Parsers/TokenIterator.h>
-#include <Common/FieldVisitors.h>
-
-#include <optional>
-
-// ASTLiteral.cpp
-#include <Common/SipHash.h>
-#include <Common/FieldVisitors.h>
-#include <Parsers/ASTLiteral.h>
-#include <IO/WriteHelpers.h>
-#include <IO/WriteBufferFromString.h>
-#include <IO/Operators.h>
-
-
-// The really needed includes follow
 #include <Common/FieldVisitors.h>
 #include <Core/Field.h>
 #include <Parsers/IAST_fwd.h>
@@ -94,6 +76,10 @@ struct WindowFrame
             case FrameType::Range:
                 return "RANGE";
         }
+
+        // Somehow GCC 10 doesn't understand that the above switch is exhaustive.
+        assert(false);
+        return "<unknown frame>";
     }
 
     static std::string toString(BoundaryType type)
@@ -107,6 +93,10 @@ struct WindowFrame
             case BoundaryType::Current:
                 return "CURRENT ROW";
         }
+
+        // Somehow GCC 10 doesn't understand that the above switch is exhaustive.
+        assert(false);
+        return "<unknown frame boundary>";
     }
 };
 
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 664cf30425c..ada2e3849ea 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -47,6 +47,7 @@ namespace ErrorCodes
 {
     extern const int SYNTAX_ERROR;
     extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
 }
 
 
From 5e781057860d0a2f3ae35f45debe13ac62ed865e Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 2 Feb 2021 02:26:44 +0300
Subject: [PATCH 0498/1238] cleanup 3

---
 src/Processors/Transforms/WindowTransform.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 172c2c89578..bf1a962c51f 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -9,7 +9,7 @@ namespace DB
 
 namespace ErrorCodes
 {
-    const extern int NOT_IMPLEMENTED;
+    extern const int NOT_IMPLEMENTED;
 }
 
 WindowTransform::WindowTransform(const Block & input_header_,

From ef46c3631787aa16e56e4420ab168342674cbe0b Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 2 Feb 2021 02:26:14 +0300
Subject: [PATCH 0499/1238] start offset for ROWS frame

---
 src/Parsers/ExpressionElementParsers.cpp      |  20 +-
 src/Processors/Transforms/WindowTransform.cpp | 256 ++++++++++++++++--
 src/Processors/Transforms/WindowTransform.h   |  45 ++-
 .../01591_window_functions.reference          |  40 +++
 .../0_stateless/01591_window_functions.sql    |   9 +
 5 files changed, 343 insertions(+), 27 deletions(-)

diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index ada2e3849ea..a9b38b45c1c 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -557,7 +557,15 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
         }
         else if (parser_literal.parse(pos, ast_literal, expected))
         {
-            node->frame.begin_offset = ast_literal->as<ASTLiteral &>().value.safeGet<Int64>();
+            const Field & value = ast_literal->as<ASTLiteral &>().value;
+            if (!isInt64FieldType(value.getType()))
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                    "Only integer frame offsets are supported, '{}' is not supported.",
+                    Field::Types::toString(value.getType()));
+            }
+            node->frame.begin_offset = value.get<Int64>();
+            node->frame.begin_type = WindowFrame::BoundaryType::Offset;
         }
         else
         {
@@ -603,7 +611,15 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
             }
             else if (parser_literal.parse(pos, ast_literal, expected))
             {
-                node->frame.end_offset = ast_literal->as<ASTLiteral &>().value.safeGet<Int64>();
+                const Field & value = ast_literal->as<ASTLiteral &>().value;
+                if (!isInt64FieldType(value.getType()))
+                {
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Only integer frame offsets are supported, '{}' is not supported.",
+                        Field::Types::toString(value.getType()));
+                }
+                node->frame.end_offset = value.get<Int64>();
+                node->frame.end_type = WindowFrame::BoundaryType::Offset;
             }
             else
             {
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 388e7a4af3b..5b942f089c1 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -165,16 +165,214 @@ void WindowTransform::advancePartitionEnd()
     assert(!partition_ended && partition_end == blocksEnd());
 }
 
-void WindowTransform::advanceFrameStart() const
+auto WindowTransform::moveRowNumberNoCheck(const RowNumber & _x, int offset) const
 {
-    // Frame start is always UNBOUNDED PRECEDING for now, so we don't have to
-    // move it. It is initialized when the new partition starts.
-    if (window_description.frame.begin_type
-        != WindowFrame::BoundaryType::Unbounded)
+    RowNumber x = _x;
+
+    if (offset > 0)
     {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-            "Frame start type '{}' is not implemented",
-            WindowFrame::toString(window_description.frame.begin_type));
+        for (;;)
+        {
+            assertValid(x);
+            assert(offset >= 0);
+
+            const auto block_rows = blockRowsNumber(x);
+            x.row += offset;
+            if (x.row >= block_rows)
+            {
+                offset = x.row - block_rows;
+                x.row = 0;
+                x.block++;
+
+                if (x == blocksEnd())
+                {
+                    break;
+                }
+            }
+            else
+            {
+                offset = 0;
+                break;
+            }
+        }
+    }
+    else if (offset < 0)
+    {
+        for (;;)
+        {
+            assertValid(x);
+            assert(offset <= 0);
+
+            if (x.row >= static_cast<uint64_t>(-offset))
+            {
+                x.row -= -offset;
+                offset = 0;
+                break;
+            }
+
+            if (x.block == first_block_number)
+            {
+                break;
+            }
+
+            // offset is negative
+            offset += (x.row + 1);
+            --x.block;
+            x.row = blockRowsNumber(x) - 1;
+        }
+    }
+
+    return std::tuple{x, offset};
+}
+
+auto WindowTransform::moveRowNumber(const RowNumber & _x, int offset) const
+{
+    auto [x, o] = moveRowNumberNoCheck(_x, offset);
+
+#ifndef NDEBUG
+    // Check that it was reversible.
+    auto [xx, oo] = moveRowNumberNoCheck(x, -(offset - o));
+
+//    fmt::print(stderr, "{} -> {}, result {}, {}, new offset {}, twice {}, {}\n",
+//        _x, offset, x, o, -(offset - o), xx, oo);
+    assert(xx == _x);
+    assert(oo == 0);
+#endif
+
+    return std::tuple{x, o};
+}
+
+
+void WindowTransform::advanceFrameStartRowsOffset()
+{
+    // Just recalculate it each time by walking blocks.
+    const auto [moved_row, offset_left] = moveRowNumber(current_row,
+        window_description.frame.begin_offset);
+
+    frame_start = moved_row;
+
+    assertValid(frame_start);
+
+//    fmt::print(stderr, "frame start {} partition start {}\n", frame_start,
+//        partition_start);
+
+    if (moved_row <= partition_start)
+    {
+        // Got to the beginning of partition and can't go further back.
+        frame_start = partition_start;
+        frame_started = true;
+        return;
+    }
+
+    assert(frame_start <= partition_end);
+    if (frame_start == partition_end && partition_ended)
+    {
+        // A FOLLOWING frame start ran into the end of partition.
+        frame_started = true;
+    }
+
+    assert(partition_start < frame_start);
+    frame_start = moved_row;
+    frame_started = offset_left == 0;
+}
+
+void WindowTransform::advanceFrameStartChoose()
+{
+    switch (window_description.frame.begin_type)
+    {
+        case WindowFrame::BoundaryType::Unbounded:
+            // UNBOUNDED PRECEDING, just mark it valid. It is initialized when
+            // the new partition starts.
+            frame_started = true;
+            return;
+        case WindowFrame::BoundaryType::Offset:
+            switch (window_description.frame.type)
+            {
+                case WindowFrame::FrameType::Rows:
+                    advanceFrameStartRowsOffset();
+                    return;
+                default:
+                    // Fallthrough to the "not implemented" error.
+                    break;
+            }
+            break;
+        default:
+            // Fallthrough to the "not implemented" error.
+            break;
+    }
+
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+        "Frame start type '{}' for frame '{}' is not implemented",
+        WindowFrame::toString(window_description.frame.begin_type),
+        WindowFrame::toString(window_description.frame.type));
+}
+
+void WindowTransform::advanceFrameStart()
+{
+    if (frame_started)
+    {
+        return;
+    }
+
+    const auto frame_start_before = frame_start;
+    advanceFrameStartChoose();
+    if (frame_start == frame_start_before)
+    {
+        return;
+    }
+
+    assert(frame_start_before < frame_start);
+    assert(partition_start <= frame_start);
+    assert(frame_start <= partition_end);
+    if (partition_ended && frame_start == partition_end)
+    {
+        // Check that if the start of frame (e.g. FOLLOWING) runs into the end
+        // of partition, it is marked as valid -- we can't advance it any
+        // further.
+        assert(frame_started);
+    }
+
+    // We're very dumb and have to reinitialize aggregate functions if the frame
+    // start changed. No point in doing it if we don't yet know where the frame
+    // starts.
+    if (!frame_started)
+    {
+        return;
+    }
+
+    // frame_end value might not be valid yet, but we know that it is greater or
+    // equal than frame_start. If it's less than the new frame_start, we have to
+    // skip rows between frame_end and frame_start, because they are not in the
+    // frame and must not contribute to the value of aggregate functions.
+    if (frame_end < frame_start)
+    {
+        frame_end = frame_start;
+    }
+
+    for (auto & ws : workspaces)
+    {
+        const auto & f = ws.window_function;
+        const auto * a = f.aggregate_function.get();
+        auto * buf = ws.aggregate_function_state.data();
+
+        a->destroy(buf);
+        a->create(buf);
+
+        for (auto row = frame_start; row < frame_end; advanceRowNumber(row))
+        {
+            if (row.block != ws.cached_block_number)
+            {
+                ws.argument_columns.clear();
+                for (const auto i : ws.argument_column_indices)
+                {
+                    ws.argument_columns.push_back(inputAt(row)[i].get());
+                }
+                ws.cached_block_number = row.block;
+            }
+
+            a->add(buf, ws.argument_columns.data(), row.row, arena.get());
+//            fmt::print(stderr, "(1) add row {}\n", row.row);
+        }
     }
 }
 
@@ -356,6 +554,7 @@ void WindowTransform::advanceFrameEnd()
         auto * columns = ws.argument_columns.data();
         for (auto row = frame_end_before.row; row < rows_end; ++row)
         {
+//            fmt::print(stderr, "(2) add row {}\n", row);
             a->add(buf, columns, row, arena.get());
         }
     }
@@ -414,8 +613,8 @@ void WindowTransform::appendChunk(Chunk & chunk)
     for (;;)
     {
         advancePartitionEnd();
-//        fmt::print(stderr, "partition [?, {}), {}\n",
-//            partition_end, partition_ended);
+//        fmt::print(stderr, "partition [{}, {}), {}\n",
+//            partition_start, partition_end, partition_ended);
 
         // Either we ran out of data or we found the end of partition (maybe
         // both, but this only happens at the total end of data).
@@ -433,12 +632,21 @@ void WindowTransform::appendChunk(Chunk & chunk)
             // Advance the frame start, updating the state of the aggregate
             // functions.
             advanceFrameStart();
+
+            if (!frame_started)
+            {
+                // Wait for more input data to find the start of frame.
+                assert(!input_is_finished);
+                assert(!partition_ended);
+            }
+
             // Advance the frame end, updating the state of the aggregate
             // functions.
             advanceFrameEnd();
 
-//            fmt::print(stderr, "row {} frame [{}, {}) {}\n",
-//                current_row, frame_start, frame_end, frame_ended);
+//            fmt::print(stderr, "row {} frame [{}, {}) {}, {}\n",
+//                current_row, frame_start, frame_end,
+//                frame_started, frame_ended);
 
             if (!frame_ended)
             {
@@ -448,8 +656,10 @@ void WindowTransform::appendChunk(Chunk & chunk)
                 return;
             }
 
-            // The frame shouldn't be empty (probably?).
-            assert(frame_start < frame_end);
+            // The frame can be empty sometimes, e.g. the boundaries coincide
+            // or the start is after the partition end. But hopefully start is
+            // not after end.
+            assert(frame_start <= frame_end);
 
             // Write out the aggregation results.
             writeOutCurrentRow();
@@ -458,6 +668,7 @@ void WindowTransform::appendChunk(Chunk & chunk)
             advanceRowNumber(current_row);
             first_not_ready_row = current_row;
             frame_ended = false;
+            frame_started = false;
         }
 
         if (input_is_finished)
@@ -478,15 +689,15 @@ void WindowTransform::appendChunk(Chunk & chunk)
         }
 
         // Start the next partition.
-        const auto new_partition_start = partition_end;
+        partition_start = partition_end;
         advanceRowNumber(partition_end);
         partition_ended = false;
         // We have to reset the frame when the new partition starts. This is not a
         // generally correct way to do so, but we don't really support moving frame
         // for now.
-        frame_start = new_partition_start;
-        frame_end = new_partition_start;
-        assert(current_row == new_partition_start);
+        frame_start = partition_start;
+        frame_end = partition_start;
+        assert(current_row == partition_start);
 
 //        fmt::print(stderr, "reinitialize agg data at start of {}\n",
 //            new_partition_start);
@@ -534,6 +745,15 @@ IProcessor::Status WindowTransform::prepare()
         return Status::Finished;
     }
 
+    if (output_data.exception)
+    {
+        // An exception occurred during processing.
+        output.pushData(std::move(output_data));
+        output.finish();
+        input.close();
+        return Status::Finished;
+    }
+
     assert(first_not_ready_row.block >= first_block_number);
     // The first_not_ready_row might be past-the-end if we have already
     // calculated the window functions for all input rows. That's why the
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 39ccd4f96f9..afc44b2f706 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -53,6 +53,11 @@ struct RowNumber
     {
         return block == other.block && row == other.row;
     }
+
+    bool operator <= (const RowNumber & other) const
+    {
+        return *this < other || *this == other;
+    }
 };
 
 /*
@@ -101,7 +106,9 @@ public:
 
 private:
     void advancePartitionEnd();
-    void advanceFrameStart() const;
+    void advanceFrameStart();
+    void advanceFrameStartChoose();
+    void advanceFrameStartRowsOffset();
     void advanceFrameEnd();
     void advanceFrameEndCurrentRow();
     void advanceFrameEndUnbounded();
@@ -169,9 +176,28 @@ private:
 #endif
     }
 
+    auto moveRowNumber(const RowNumber & _x, int offset) const;
+    auto moveRowNumberNoCheck(const RowNumber & _x, int offset) const;
+
+    void assertValid(const RowNumber & x) const
+    {
+        assert(x.block >= first_block_number);
+        if (x.block == first_block_number + blocks.size())
+        {
+            assert(x.row == 0);
+        }
+        else
+        {
+            assert(x.row < blockRowsNumber(x));
+        }
+    }
+
     RowNumber blocksEnd() const
     { return RowNumber{first_block_number + blocks.size(), 0}; }
 
+    RowNumber blocksBegin() const
+    { return RowNumber{first_block_number, 0}; }
+
 public:
     /*
      * Data (formerly) inherited from ISimpleTransform, needed for the
@@ -217,18 +243,22 @@ public:
     // Used to determine which resulting blocks we can pass to the consumer.
     RowNumber first_not_ready_row;
 
-    // We don't keep the pointer to start of partition, because we don't really
-    // need it, and we want to be able to drop the starting blocks to save memory.
-    // The `partition_end` is past-the-end, as usual. When partition_ended = false,
-    // it still haven't ended, and partition_end is the next row to check.
+    // Boundaries of the current partition.
+    // partition_start doesn't point to a valid block, because we want to drop
+    // the blocks early to save memory. We still have track it so that we can
+    // cut off a PRECEDING frame at the partition start.
+    // The `partition_end` is past-the-end, as usual. When
+    // partition_ended = false, it still haven't ended, and partition_end is the
+    // next row to check.
+    RowNumber partition_start;
     RowNumber partition_end;
     bool partition_ended = false;
 
     // This is the row for which we are computing the window functions now.
     RowNumber current_row;
 
-    // The frame is [frame_start, frame_end) if frame_ended, and unknown
-    // otherwise. Note that when we move to the next row, both the
+    // The frame is [frame_start, frame_end) if frame_ended && frame_started,
+    // and unknown otherwise. Note that when we move to the next row, both the
     // frame_start and the frame_end may jump forward by an unknown amount of
     // blocks, e.g. if we use a RANGE frame. This means that sometimes we don't
     // know neither frame_end nor frame_start.
@@ -239,6 +269,7 @@ public:
     RowNumber frame_start;
     RowNumber frame_end;
     bool frame_ended = false;
+    bool frame_started = false;
 };
 
 }
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 1e9b83b9983..7108d8fda8c 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -516,3 +516,43 @@ settings max_block_size = 2;
 27	27	29	29
 27	27	29	29
 30	30	30	30
+-- ROWS offset frame start
+select number, p,
+    count(*) over (partition by p order by number
+        rows between 1 preceding and unbounded following),
+    count(*) over (partition by p order by number
+        rows between 1 following and unbounded following)
+from (select number, intDiv(number, 5) p from numbers(31))
+order by p, number
+settings max_block_size = 2;
+0	0	5	4
+1	0	5	3
+2	0	4	2
+3	0	3	1
+4	0	2	0
+5	1	5	4
+6	1	5	3
+7	1	4	2
+8	1	3	1
+9	1	2	0
+10	2	5	4
+11	2	5	3
+12	2	4	2
+13	2	3	1
+14	2	2	0
+15	3	5	4
+16	3	5	3
+17	3	4	2
+18	3	3	1
+19	3	2	0
+20	4	5	4
+21	4	5	3
+22	4	4	2
+23	4	3	1
+24	4	2	0
+25	5	5	4
+26	5	5	3
+27	5	4	2
+28	5	3	1
+29	5	2	0
+30	6	1	0
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index a5b30fb884a..c4f1cfec5d5 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -163,3 +163,12 @@ window
         rows between unbounded preceding and unbounded following)
 settings max_block_size = 2;
 
+-- ROWS offset frame start
+select number, p,
+    count(*) over (partition by p order by number
+        rows between 1 preceding and unbounded following),
+    count(*) over (partition by p order by number
+        rows between 1 following and unbounded following)
+from (select number, intDiv(number, 5) p from numbers(31))
+order by p, number
+settings max_block_size = 2;

From 5e99b4461d55eaf0d32bddb41df9fbfe81d797b3 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 2 Feb 2021 02:25:37 +0300
Subject: [PATCH 0500/1238] cleanup 2

---
 src/Processors/Transforms/WindowTransform.cpp | 2 +-
 src/Processors/Transforms/WindowTransform.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index bf1a962c51f..388e7a4af3b 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -165,7 +165,7 @@ void WindowTransform::advancePartitionEnd()
     assert(!partition_ended && partition_end == blocksEnd());
 }
 
-void WindowTransform::advanceFrameStart()
+void WindowTransform::advanceFrameStart() const
 {
     // Frame start is always UNBOUNDED PRECEDING for now, so we don't have to
     // move it. It is initialized when the new partition starts.
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 34f4e9b6e9f..39ccd4f96f9 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -101,7 +101,7 @@ public:
 
 private:
     void advancePartitionEnd();
-    void advanceFrameStart();
+    void advanceFrameStart() const;
     void advanceFrameEnd();
     void advanceFrameEndCurrentRow();
     void advanceFrameEndUnbounded();

From 78a9624fc57c3053ab584aff18f0e9b8a674f07f Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 2 Feb 2021 02:11:40 +0300
Subject: [PATCH 0501/1238] CURRENT ROW frame start for ROWS frame

---
 src/Processors/Transforms/WindowTransform.cpp    | 16 +++++++++++++---
 .../0_stateless/01591_window_functions.reference |  2 +-
 .../0_stateless/01591_window_functions.sql       |  2 ++
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 5b942f089c1..4eb47f435d1 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -285,6 +285,19 @@ void WindowTransform::advanceFrameStartChoose()
             // the new partition starts.
             frame_started = true;
             return;
+        case WindowFrame::BoundaryType::Current:
+            switch (window_description.frame.type)
+            {
+                case WindowFrame::FrameType::Rows:
+                    // CURRENT ROW
+                    frame_start = current_row;
+                    frame_started = true;
+                    return;
+                default:
+                    // Fallthrough to the "not implemented" error.
+                    break;
+            }
+            break;
         case WindowFrame::BoundaryType::Offset:
             switch (window_description.frame.type)
             {
@@ -296,9 +309,6 @@ void WindowTransform::advanceFrameStartChoose()
                     break;
             }
             break;
-        default:
-            // Fallthrough to the "not implemented" error.
-            break;
     }
 
     throw Exception(ErrorCodes::NOT_IMPLEMENTED,
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 7108d8fda8c..8c0ef9ecaa6 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -516,7 +516,7 @@ settings max_block_size = 2;
 27	27	29	29
 27	27	29	29
 30	30	30	30
--- ROWS offset frame start
+-- CURRENT ROW and offset for ROWS frame start
 select number, p,
     count(*) over (partition by p order by number
         rows between 1 preceding and unbounded following),
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index c4f1cfec5d5..3b4bdd03724 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -167,6 +167,8 @@ settings max_block_size = 2;
 select number, p,
     count(*) over (partition by p order by number
         rows between 1 preceding and unbounded following),
+    count(*) over (partition by p order by number
+        rows between current row and unbounded following),
     count(*) over (partition by p order by number
         rows between 1 following and unbounded following)
 from (select number, intDiv(number, 5) p from numbers(31))

From fb98aeeae1274f746c67d504fc0ec2933e16f4c8 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Tue, 2 Feb 2021 02:44:40 +0300
Subject: [PATCH 0502/1238] Fixes

---
 .../table-engines/mergetree-family/mergetree.md     | 13 ++++++-------
 .../table-engines/mergetree-family/mergetree.md     | 13 ++++++-------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 75fc42b6fc6..202c476bba8 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -83,7 +83,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
     Expression must have one `Date` or `DateTime` column as a result. Example:
     `TTL date + INTERVAL 1 DAY`
 
-    Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`). Default type of the rule is removal (`DELETE`). List of multiple rules can specified, but there should be no more than one `DELETE` rule.
+    Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can specified, but there should be no more than one `DELETE` rule.
 
     For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)
 
@@ -468,15 +468,14 @@ Type of TTL rule may follow each TTL expression. It affects an action which is t
 
 -   `DELETE` - delete expired rows (default action);
 -   `TO DISK 'aaa'` - move part to the disk `aaa`;
--   `TO VOLUME 'bbb'` - move part to the disk `bbb`.
+-   `TO VOLUME 'bbb'` - move part to the disk `bbb`;
+-   `GROUP BY` - aggregate expired rows.
 
-With `WHERE` clause you may specify which of the expired rows to delete or move.
+With `WHERE` clause you may specify which of the expired rows to delete or aggregate (it cannot be applied to moves).
 
-With `GROUP BY` clause you may [aggregate](../../../sql-reference/aggregate-functions/index.md) expired rows. `GROUP BY` key expression must be a prefix of the table primary key. 
+`GROUP BY` expression must be a prefix of the table primary key. 
 
-If a column is part of primary key, but not present in `GROUP BY` key expression, in result rows it contains aggregated value across grouped rows. 
-
-If a column is not present neither in primary key, nor in `SET` clause, in result row it contains any occasional value from grouped rows.
+If a column is not part of the `GROUP BY` expression and is not set explicitely in the `SET` clause, in result row it contains an occasional value from the grouped rows (as if aggregate function `any` is applied to it).
 
 **Examples**
 
diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
index e21d4bc47e2..72a354bce22 100644
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@@ -74,7 +74,7 @@ ORDER BY expr
     
     Выражение должно возвращать столбец `Date` или `DateTime`. Пример: `TTL date + INTERVAL 1 DAY`.   
 
-    Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` указывает действие, которое будет выполнено с частью, удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`). Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`.
+    Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` указывает действие, которое будет выполнено с частью: удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`), или агрегирование данных в устаревших строках. Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`.
     
     Дополнительные сведения смотрите в разделе [TTL для столбцов и таблиц](#table_engine-mergetree-ttl)
 
@@ -456,15 +456,14 @@ TTL expr
 
 -   `DELETE` - удалить данные (действие по умолчанию);
 -   `TO DISK 'aaa'` - переместить данные на диск `aaa`;
--   `TO VOLUME 'bbb'` - переместить данные на том `bbb`.
+-   `TO VOLUME 'bbb'` - переместить данные на том `bbb`;
+-   `GROUP BY` -  агрегировать данные.
 
-В секции `WHERE` можно задать условие удаления или перемещения устаревших строк.
+В секции `WHERE` можно задать условие удаления или агрегирования устаревших строк (для перемещения условие `WHERE` не применимо).
 
-В секции `GROUP BY` можно [агрегировать](../../../sql-reference/aggregate-functions/index.md) данные из устаревших строк. Колонки, по которым агрегируются данные в `GROUP BY`, должны являться префиксом первичного ключа таблицы. 
+Колонки, по которым агрегируются данные в `GROUP BY`, должны являться префиксом первичного ключа таблицы. 
 
-Если колонка является частью первичного ключа, но не фигурирует в списке полей в `GROUP BY`, в результирующих строках она будет содержать агрегированные данные по сгруппированным строкам. 
-
-Если колонка не является частью первичного ключа и не указана в секции `SET`, в результирующих строках она будет содержать случайное значение, взятое из одной из сгруппированных строк.
+Если колонка не является частью выражения `GROUP BY` и не задается напрямую в секции `SET`, в результирующих строках она будет содержать случайное значение, взятое из одной из сгруппированных строк (как будто к ней применяется агрегирующая функция `any`).
 
 **Примеры**
 

From 297d106f1446790a1a065e2b0ccc416eda658bb8 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Tue, 2 Feb 2021 03:24:01 +0300
Subject: [PATCH 0503/1238] Fixed, translated. Symbolic links added.

---
 docs/en/sql-reference/data-types/map.md       | 11 ++--
 .../functions/tuple-map-functions.md          |  4 +-
 docs/es/sql-reference/data-types/map.md       | 57 ++++++++++++++++++
 docs/fr/sql-reference/data-types/map.md       | 57 ++++++++++++++++++
 docs/ja/sql-reference/data-types/map.md       | 57 ++++++++++++++++++
 docs/ru/sql-reference/data-types/map.md       | 57 ++++++++++++++++++
 .../functions/tuple-map-functions.md          | 60 +++++++++++++++++++
 docs/zh/sql-reference/data-types/map.md       | 57 ++++++++++++++++++
 8 files changed, 353 insertions(+), 7 deletions(-)
 create mode 100644 docs/es/sql-reference/data-types/map.md
 create mode 100644 docs/fr/sql-reference/data-types/map.md
 create mode 100644 docs/ja/sql-reference/data-types/map.md
 create mode 100644 docs/ru/sql-reference/data-types/map.md
 create mode 100644 docs/zh/sql-reference/data-types/map.md

diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md
index 5f1300896e8..0f0f69d421d 100644
--- a/docs/en/sql-reference/data-types/map.md
+++ b/docs/en/sql-reference/data-types/map.md
@@ -8,8 +8,8 @@ toc_title: Map(key, value)
 `Map(key, value)` data type stores `key:value` pairs in structures like JSON. 
 
 **Parameters** 
--   `key` — Key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
--   `value` — Value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
+-   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
 
 !!! warning "Warning"
     Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
@@ -35,9 +35,9 @@ Result:
 └─────────────────────────┘
 ```
 
-## Map() and Tuple() Types {#map-and-tuple}
+## Convert Tuple to Map Type {#map-and-tuple}
 
-You can cast `Tuple()` as `Map()`:
+You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function:
 
 ``` sql
 SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
@@ -52,5 +52,6 @@ SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map
 **See Also**
 
 -   [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
+-   [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
 
-[Original article](https://clickhouse.tech/docs/en/data_types/map/) <!--hide-->
+[Original article](https://clickhouse.tech/docs/en/data-types/map/) <!--hide-->
diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md
index 3de570e6dcc..b81f971196a 100644
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@@ -17,8 +17,8 @@ map(key1, value1[, key2, value2, ...])
 
 **Parameters** 
 
--   `key` — Key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
--   `value` — Value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
+-   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
 
 **Returned value**
 
diff --git a/docs/es/sql-reference/data-types/map.md b/docs/es/sql-reference/data-types/map.md
new file mode 100644
index 00000000000..0f0f69d421d
--- /dev/null
+++ b/docs/es/sql-reference/data-types/map.md
@@ -0,0 +1,57 @@
+---
+toc_priority: 65
+toc_title: Map(key, value)
+---
+
+# Map(key, value) {#data_type-map}
+
+`Map(key, value)` data type stores `key:value` pairs in structures like JSON. 
+
+**Parameters** 
+-   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
+
+!!! warning "Warning"
+    Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
+
+To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax.
+
+**Example**
+
+Query:
+
+``` sql
+CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
+INSERT INTO table_map VALUES ({'key1':1, 'key2':100}), ({'key1':2,'key2':200}), ({'key1':3,'key2':300});
+SELECT a['key2'] FROM table_map;
+```
+Result:
+
+```text
+┌─arrayElement(a, 'key2')─┐
+│                     100 │
+│                     200 │
+│                     300 │
+└─────────────────────────┘
+```
+
+## Convert Tuple to Map Type {#map-and-tuple}
+
+You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function:
+
+``` sql
+SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
+```
+
+``` text
+┌─map───────────────────────────┐
+│ {1:'Ready',2:'Steady',3:'Go'} │
+└───────────────────────────────┘
+```
+
+**See Also**
+
+-   [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
+-   [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
+
+[Original article](https://clickhouse.tech/docs/en/data-types/map/) <!--hide-->
diff --git a/docs/fr/sql-reference/data-types/map.md b/docs/fr/sql-reference/data-types/map.md
new file mode 100644
index 00000000000..0f0f69d421d
--- /dev/null
+++ b/docs/fr/sql-reference/data-types/map.md
@@ -0,0 +1,57 @@
+---
+toc_priority: 65
+toc_title: Map(key, value)
+---
+
+# Map(key, value) {#data_type-map}
+
+`Map(key, value)` data type stores `key:value` pairs in structures like JSON. 
+
+**Parameters** 
+-   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
+
+!!! warning "Warning"
+    Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
+
+To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax.
+
+**Example**
+
+Query:
+
+``` sql
+CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
+INSERT INTO table_map VALUES ({'key1':1, 'key2':100}), ({'key1':2,'key2':200}), ({'key1':3,'key2':300});
+SELECT a['key2'] FROM table_map;
+```
+Result:
+
+```text
+┌─arrayElement(a, 'key2')─┐
+│                     100 │
+│                     200 │
+│                     300 │
+└─────────────────────────┘
+```
+
+## Convert Tuple to Map Type {#map-and-tuple}
+
+You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function:
+
+``` sql
+SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
+```
+
+``` text
+┌─map───────────────────────────┐
+│ {1:'Ready',2:'Steady',3:'Go'} │
+└───────────────────────────────┘
+```
+
+**See Also**
+
+-   [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
+-   [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
+
+[Original article](https://clickhouse.tech/docs/en/data-types/map/) <!--hide-->
diff --git a/docs/ja/sql-reference/data-types/map.md b/docs/ja/sql-reference/data-types/map.md
new file mode 100644
index 00000000000..0f0f69d421d
--- /dev/null
+++ b/docs/ja/sql-reference/data-types/map.md
@@ -0,0 +1,57 @@
+---
+toc_priority: 65
+toc_title: Map(key, value)
+---
+
+# Map(key, value) {#data_type-map}
+
+`Map(key, value)` data type stores `key:value` pairs in structures like JSON. 
+
+**Parameters** 
+-   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
+
+!!! warning "Warning"
+    Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
+
+To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax.
+
+**Example**
+
+Query:
+
+``` sql
+CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
+INSERT INTO table_map VALUES ({'key1':1, 'key2':100}), ({'key1':2,'key2':200}), ({'key1':3,'key2':300});
+SELECT a['key2'] FROM table_map;
+```
+Result:
+
+```text
+┌─arrayElement(a, 'key2')─┐
+│                     100 │
+│                     200 │
+│                     300 │
+└─────────────────────────┘
+```
+
+## Convert Tuple to Map Type {#map-and-tuple}
+
+You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function:
+
+``` sql
+SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
+```
+
+``` text
+┌─map───────────────────────────┐
+│ {1:'Ready',2:'Steady',3:'Go'} │
+└───────────────────────────────┘
+```
+
+**See Also**
+
+-   [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
+-   [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
+
+[Original article](https://clickhouse.tech/docs/en/data-types/map/) <!--hide-->
diff --git a/docs/ru/sql-reference/data-types/map.md b/docs/ru/sql-reference/data-types/map.md
new file mode 100644
index 00000000000..c1391e37133
--- /dev/null
+++ b/docs/ru/sql-reference/data-types/map.md
@@ -0,0 +1,57 @@
+---
+toc_priority: 65
+toc_title: Map(key, value)
+---
+
+# Map(key, value) {#data_type-map}
+
+Тип данных `Map(key, value)` хранит пары `ключ:значение` в структурах типа JSON. 
+
+**Параметры** 
+-   `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md).
+
+!!! warning "Предупреждение"
+    Сейчас использование типа данных `Map` является экспериментальной возможностью. Чтобы использовать этот тип данных, включите настройку `allow_experimental_map_type = 1`.
+
+Чтобы получить значение из колонки `a Map('key', 'value')`, используйте синтаксис `a['key']`.
+
+**Пример**
+
+Запрос:
+
+``` sql
+CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
+INSERT INTO table_map VALUES ({'key1':1, 'key2':100}), ({'key1':2,'key2':200}), ({'key1':3,'key2':300});
+SELECT a['key2'] FROM table_map;
+```
+Результат:
+
+```text
+┌─arrayElement(a, 'key2')─┐
+│                     100 │
+│                     200 │
+│                     300 │
+└─────────────────────────┘
+```
+
+## Преобразование типа данных Tuple в Map {#map-and-tuple}
+
+Для преобразования данных с типом `Tuple()` в тип `Map()` можно использовать функцию [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast):
+
+``` sql
+SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
+```
+
+``` text
+┌─map───────────────────────────┐
+│ {1:'Ready',2:'Steady',3:'Go'} │
+└───────────────────────────────┘
+```
+
+**См. также**
+
+-   функция [map()](../../sql-reference/functions/tuple-map-functions.md#function-map)
+-   функция [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast)
+
+[Original article](https://clickhouse.tech/docs/ru/data-types/map/) <!--hide-->
diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md
index a2b25e68fe5..65e44698008 100644
--- a/docs/ru/sql-reference/functions/tuple-map-functions.md
+++ b/docs/ru/sql-reference/functions/tuple-map-functions.md
@@ -5,6 +5,66 @@ toc_title: Работа с контейнерами map
 
 # Функции для работы с контейнерами map {#functions-for-working-with-tuple-maps}
 
+## map {#function-map}
+
+Преобразовывает пары `ключ:значение` в структуру JSON.
+
+**Синтаксис** 
+
+``` sql
+map(key1, value1[, key2, value2, ...])
+```
+
+**Параметры** 
+
+-   `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md).
+
+**Возвращаемое значение**
+
+-   Структура JSON с парами `ключ:значение`.
+
+Тип: [Map(key, value)](../../sql-reference/data-types/map.md).
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
+```
+
+Результат:
+
+``` text
+┌─map('key1', number, 'key2', multiply(number, 2))─┐
+│ {'key1':0,'key2':0}                              │
+│ {'key1':1,'key2':2}                              │
+│ {'key1':2,'key2':4}                              │
+└──────────────────────────────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a;
+INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
+SELECT a['key2'] FROM table_map;
+```
+
+Результат:
+
+``` text
+┌─arrayElement(a, 'key2')─┐
+│                       0 │
+│                       2 │
+│                       4 │
+└─────────────────────────┘
+```
+
+**См. также** 
+
+-   тип данных [Map(key, value)](../../sql-reference/data-types/map.md)
 ## mapAdd {#function-mapadd}
 
 Собирает все ключи и суммирует соответствующие значения.
diff --git a/docs/zh/sql-reference/data-types/map.md b/docs/zh/sql-reference/data-types/map.md
new file mode 100644
index 00000000000..0f0f69d421d
--- /dev/null
+++ b/docs/zh/sql-reference/data-types/map.md
@@ -0,0 +1,57 @@
+---
+toc_priority: 65
+toc_title: Map(key, value)
+---
+
+# Map(key, value) {#data_type-map}
+
+`Map(key, value)` data type stores `key:value` pairs in structures like JSON. 
+
+**Parameters** 
+-   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
+-   `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
+
+!!! warning "Warning"
+    Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
+
+To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax.
+
+**Example**
+
+Query:
+
+``` sql
+CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
+INSERT INTO table_map VALUES ({'key1':1, 'key2':100}), ({'key1':2,'key2':200}), ({'key1':3,'key2':300});
+SELECT a['key2'] FROM table_map;
+```
+Result:
+
+```text
+┌─arrayElement(a, 'key2')─┐
+│                     100 │
+│                     200 │
+│                     300 │
+└─────────────────────────┘
+```
+
+## Convert Tuple to Map Type {#map-and-tuple}
+
+You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function:
+
+``` sql
+SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
+```
+
+``` text
+┌─map───────────────────────────┐
+│ {1:'Ready',2:'Steady',3:'Go'} │
+└───────────────────────────────┘
+```
+
+**See Also**
+
+-   [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
+-   [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
+
+[Original article](https://clickhouse.tech/docs/en/data-types/map/) <!--hide-->

From d084625436eae7b7e58b4214aa454d97e4e97f4e Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 2 Feb 2021 03:51:35 +0300
Subject: [PATCH 0504/1238] cleanup

---
 src/Parsers/ExpressionElementParsers.cpp      |  1 +
 .../01591_window_functions.reference          | 66 ++++++++++---------
 2 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index a9b38b45c1c..de327e3f0f3 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -45,6 +45,7 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int BAD_ARGUMENTS;
     extern const int SYNTAX_ERROR;
     extern const int LOGICAL_ERROR;
     extern const int NOT_IMPLEMENTED;
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 8c0ef9ecaa6..7e286f753e5 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -516,43 +516,45 @@ settings max_block_size = 2;
 27	27	29	29
 27	27	29	29
 30	30	30	30
--- CURRENT ROW and offset for ROWS frame start
+-- ROWS offset frame start
 select number, p,
     count(*) over (partition by p order by number
         rows between 1 preceding and unbounded following),
+    count(*) over (partition by p order by number
+        rows between current row and unbounded following),
     count(*) over (partition by p order by number
         rows between 1 following and unbounded following)
 from (select number, intDiv(number, 5) p from numbers(31))
 order by p, number
 settings max_block_size = 2;
-0	0	5	4
-1	0	5	3
-2	0	4	2
-3	0	3	1
-4	0	2	0
-5	1	5	4
-6	1	5	3
-7	1	4	2
-8	1	3	1
-9	1	2	0
-10	2	5	4
-11	2	5	3
-12	2	4	2
-13	2	3	1
-14	2	2	0
-15	3	5	4
-16	3	5	3
-17	3	4	2
-18	3	3	1
-19	3	2	0
-20	4	5	4
-21	4	5	3
-22	4	4	2
-23	4	3	1
-24	4	2	0
-25	5	5	4
-26	5	5	3
-27	5	4	2
-28	5	3	1
-29	5	2	0
-30	6	1	0
+0	0	5	5	4
+1	0	5	4	3
+2	0	4	3	2
+3	0	3	2	1
+4	0	2	1	0
+5	1	5	5	4
+6	1	5	4	3
+7	1	4	3	2
+8	1	3	2	1
+9	1	2	1	0
+10	2	5	5	4
+11	2	5	4	3
+12	2	4	3	2
+13	2	3	2	1
+14	2	2	1	0
+15	3	5	5	4
+16	3	5	4	3
+17	3	4	3	2
+18	3	3	2	1
+19	3	2	1	0
+20	4	5	5	4
+21	4	5	4	3
+22	4	4	3	2
+23	4	3	2	1
+24	4	2	1	0
+25	5	5	5	4
+26	5	5	4	3
+27	5	4	3	2
+28	5	3	2	1
+29	5	2	1	0
+30	6	1	1	0

From 8bd026271a08b501a4852e1bd74c95632ff1aa37 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 2 Feb 2021 04:16:44 +0300
Subject: [PATCH 0505/1238] more cleanup

---
 src/Processors/Transforms/WindowTransform.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 4eb47f435d1..23acc85aef0 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -326,12 +326,19 @@ void WindowTransform::advanceFrameStart()
 
     const auto frame_start_before = frame_start;
     advanceFrameStartChoose();
+    assert(frame_start_before <= frame_start);
     if (frame_start == frame_start_before)
     {
-        return;
+        // If the frame start didn't move, this means we validated that the frame
+        // starts at the point we reached earlier but were unable to validate.
+        // This probably only happens in degenerate cases where the frame start
+        // is further than the end of partition, and the partition ends at the
+        // last row of the block, but we can only tell for sure after a new
+        // block arrives. We still have to update the state of aggregate
+        // functions when the frame start becomes valid, so we continue.
+        assert(frame_started);
     }
 
-    assert(frame_start_before < frame_start);
     assert(partition_start <= frame_start);
     assert(frame_start <= partition_end);
     if (partition_ended && frame_start == partition_end)
@@ -669,6 +676,8 @@ void WindowTransform::appendChunk(Chunk & chunk)
             // The frame can be empty sometimes, e.g. the boundaries coincide
             // or the start is after the partition end. But hopefully start is
             // not after end.
+            assert(frame_started);
+            assert(frame_ended);
             assert(frame_start <= frame_end);
 
             // Write out the aggregation results.

From e6367de2d9df4a00e7efeb18533230d98168ae8f Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Tue, 2 Feb 2021 05:23:02 +0300
Subject: [PATCH 0506/1238] Added documentation

---
 .../table-engines/mergetree-family/mergetree.md       |  3 ++-
 docs/en/operations/settings/merge-tree-settings.md    | 11 +++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 084d05ec0a0..6781961254e 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -101,7 +101,8 @@ For a description of parameters, see the [CREATE query description](../../../sql
     -   `max_parts_in_total` — Maximum number of parts in all partitions.
 	-   `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
 	-   `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
-
+    -   `max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. Can be overriden with query-level [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) setting.
+    
 **Example of Sections Setting**
 
 ``` sql
diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md
index e0f7c79dcab..88ce026476f 100644
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@@ -186,5 +186,16 @@ Possible values:
 Default value: auto (number of CPU cores).
 
 During startup ClickHouse reads all parts of all tables (reads files with metadata of parts) to build a list of all parts in memory. In some systems with a large number of parts this process can take a long time, and this time might be shortened by increasing `max_part_loading_threads` (if this process is not CPU and disk I/O bound).
+## max_partitions_to_read {#max-partitions-to-read}
+
+Limits the maximum number of partitions that can be accessed in one query.
+
+The setting value specified when table is created can be overridden via query-level setting.
+
+Possible values:
+
+-   Any positive integer.
+
+Default value: -1 (unlimited).
 
 [Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) <!--hide-->

From a7eb2ce6d05fbd0cb649583a67e1f6771accb192 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 1 Feb 2021 20:32:57 +0800
Subject: [PATCH 0507/1238] initialize MaxDDLEntryId upon restarting

---
 src/Interpreters/DDLWorker.cpp | 37 ++++++++++++++++++++--------------
 src/Interpreters/DDLWorker.h   |  1 +
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index b1d9f872daa..b07dd5c3fb0 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -462,6 +462,7 @@ void DDLWorker::scheduleTasks()
         else
         {
             LOG_DEBUG(log, "Task {} ({}) has been already processed", entry_name, task->entry.query);
+            updateMaxDDLEntryID(task);
         }
 
         saveTask(entry_name);
@@ -680,6 +681,26 @@ void DDLWorker::enqueueTask(DDLTaskPtr task_ptr)
         }
     }
 }
+
+
+void DDLWorker::updateMaxDDLEntryID(DDLTask & task)
+{
+    DB::ReadBufferFromString in(task.entry_name);
+    DB::assertString("query-", in);
+    UInt64 id;
+    readText(id, in);
+    auto prev_id = max_id.load(std::memory_order_relaxed);
+    while (prev_id < id)
+    {
+        if (max_id.compare_exchange_weak(prev_id, id))
+        {
+            CurrentMetrics::set(CurrentMetrics::MaxDDLEntryID, id);
+            break;
+        }
+    }
+}
+
+
 void DDLWorker::processTask(DDLTask & task)
 {
     auto zookeeper = tryGetZooKeeper();
@@ -754,21 +775,7 @@ void DDLWorker::processTask(DDLTask & task)
         task.was_executed = true;
     }
 
-    {
-        DB::ReadBufferFromString in(task.entry_name);
-        DB::assertString("query-", in);
-        UInt64 id;
-        readText(id, in);
-        auto prev_id = max_id.load(std::memory_order_relaxed);
-        while (prev_id < id)
-        {
-            if (max_id.compare_exchange_weak(prev_id, id))
-            {
-                CurrentMetrics::set(CurrentMetrics::MaxDDLEntryID, id);
-                break;
-            }
-        }
-    }
+    updateMaxDDLEntryID(task);
 
     /// FIXME: if server fails right here, the task will be executed twice. We need WAL here.
 
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 9a4e55dcfc4..53032d879d0 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -129,6 +129,7 @@ private:
     /// Returns non-empty DDLTaskPtr if entry parsed and the check is passed
     DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper);
 
+    void updateMaxDDLEntryID(DDLTask & task);
     void enqueueTask(DDLTaskPtr task);
     void processTask(DDLTask & task);
 

From 902ae7894eb912c5c98acaf29f2a48967d7061d4 Mon Sep 17 00:00:00 2001
From: tavplubix <tavplubix@gmail.com>
Date: Mon, 1 Feb 2021 18:59:06 +0300
Subject: [PATCH 0508/1238] Update DDLWorker.h

---
 src/Interpreters/DDLWorker.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 53032d879d0..9771954817d 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -129,7 +129,7 @@ private:
     /// Returns non-empty DDLTaskPtr if entry parsed and the check is passed
     DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper);
 
-    void updateMaxDDLEntryID(DDLTask & task);
+    void updateMaxDDLEntryID(const DDLTask & task);
     void enqueueTask(DDLTaskPtr task);
     void processTask(DDLTask & task);
 

From 8e6b09b1aefd799a7398563f3d3248ae4ab42cb8 Mon Sep 17 00:00:00 2001
From: tavplubix <tavplubix@gmail.com>
Date: Mon, 1 Feb 2021 18:59:38 +0300
Subject: [PATCH 0509/1238] Update DDLWorker.cpp

---
 src/Interpreters/DDLWorker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index b07dd5c3fb0..4fc87005553 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -683,7 +683,7 @@ void DDLWorker::enqueueTask(DDLTaskPtr task_ptr)
 }
 
 
-void DDLWorker::updateMaxDDLEntryID(DDLTask & task)
+void DDLWorker::updateMaxDDLEntryID(const DDLTask & task)
 {
     DB::ReadBufferFromString in(task.entry_name);
     DB::assertString("query-", in);

From 37c178f5e1f63f1959c95bfbfb272e590d0d539a Mon Sep 17 00:00:00 2001
From: tavplubix <tavplubix@gmail.com>
Date: Mon, 1 Feb 2021 22:35:37 +0300
Subject: [PATCH 0510/1238] Update DDLWorker.cpp

---
 src/Interpreters/DDLWorker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 4fc87005553..b53c7c76d27 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -462,7 +462,7 @@ void DDLWorker::scheduleTasks()
         else
         {
             LOG_DEBUG(log, "Task {} ({}) has been already processed", entry_name, task->entry.query);
-            updateMaxDDLEntryID(task);
+            updateMaxDDLEntryID(*task);
         }
 
         saveTask(entry_name);

From 1f6a686f4ec39f81d926d568ebbcba369654da6e Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Tue, 2 Feb 2021 05:54:38 +0300
Subject: [PATCH 0511/1238] Some fixes

---
 .../functions/other-functions.md              | 34 +++++------------
 .../functions/other-functions.md              | 38 ++++++-------------
 2 files changed, 22 insertions(+), 50 deletions(-)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 73b7f6a1078..7308ed60b5c 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -186,8 +186,6 @@ In ClickHouse, queries are always run on blocks (sets of column parts). This fun
 
 Returns estimation of uncompressed byte size of its arguments in memory.
 
-Use case: suppose you have a service that stores data for multiple clients in one table. Users will pay per data volume. So, you need to implement accounting of users data volume. The function will allow to calculate the data size on per-row basis.
-
 **Syntax**
 
 ```sql
@@ -244,40 +242,28 @@ ORDER BY key;
 
 INSERT INTO test VALUES(1, 8, 16, 32, 64,  -8, -16, -32, -64, 32.32, 64.64);
 
-SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16)`, byteSize(u32) AS `byteSize(UInt32)`, byteSize(u64) AS `byteSize(UInt64)` FROM test ORDER BY key ASC FORMAT Vertical;
-
-SELECT key, byteSize(i8) AS `byteSize(Int8)`, byteSize(i16) AS `byteSize(Int16)`, byteSize(i32) AS `byteSize(Int32)`, byteSize(i64) AS `byteSize(Int64)` FROM test ORDER BY key ASC FORMAT Vertical;
-
-SELECT key, byteSize(f32) AS `byteSize(Float32)`,  byteSize(f64) AS `byteSize(Float64)` FROM test ORDER BY key ASC FORMAT Vertical;
+SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16)`, byteSize(u32) AS `byteSize(UInt32)`, byteSize(u64) AS `byteSize(UInt64)`, byteSize(i8) AS `byteSize(Int8)`, byteSize(i16) AS `byteSize(Int16)`, byteSize(i32) AS `byteSize(Int32)`, byteSize(i64) AS `byteSize(Int64)`, byteSize(f32) AS `byteSize(Float32)`, byteSize(f64) AS `byteSize(Float64)` FROM test ORDER BY key ASC FORMAT Vertical;
 ```
 
 Result:
 
 ``` text
-Row 1:
-──────
-key:              1
-byteSize(UInt8):  1
-byteSize(UInt16): 2
-byteSize(UInt32): 4
-byteSize(UInt64): 8
-
-Row 1:
-──────
-key:             1
-byteSize(Int8):  1
-byteSize(Int16): 2
-byteSize(Int32): 4
-byteSize(Int64): 8
-
 Row 1:
 ──────
 key:               1
+byteSize(UInt8):   1
+byteSize(UInt16):  2
+byteSize(UInt32):  4
+byteSize(UInt64):  8
+byteSize(Int8):    1
+byteSize(Int16):   2
+byteSize(Int32):   4
+byteSize(Int64):   8
 byteSize(Float32): 4
 byteSize(Float64): 8
 ```
 
-The function can take multiple arguments and will return their combined byte size.
+If the function takes multiple arguments, it returns their combined byte size.
 
 Query:
 
diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md
index c92f85e3e00..a738ba755b1 100644
--- a/docs/ru/sql-reference/functions/other-functions.md
+++ b/docs/ru/sql-reference/functions/other-functions.md
@@ -185,9 +185,7 @@ SELECT visibleWidth(NULL)
 
 ## byteSize {#function-bytesize}
 
-Возвращает примерный размер аргументов в памяти в байтах в несжатом виде.
-
-Пример использования: в сервисе, хранящим данные для нескольких клиентов в одной таблице, пользователь платит за хранение данных. Поэтому требуется узнать количество памяти, занимаемое ими. Функция позволяет вычислить размер данных для каждой строки.
+Возвращает оценку в байтах размера аргументов в памяти в несжатом виде.
 
 **Синтаксис**
 
@@ -245,40 +243,28 @@ ORDER BY key;
 
 INSERT INTO test VALUES(1, 8, 16, 32, 64,  -8, -16, -32, -64, 32.32, 64.64);
 
-SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16)`, byteSize(u32) AS `byteSize(UInt32)`, byteSize(u64) AS `byteSize(UInt64)` FROM test ORDER BY key ASC FORMAT Vertical;
-
-SELECT key, byteSize(i8) AS `byteSize(Int8)`, byteSize(i16) AS `byteSize(Int16)`, byteSize(i32) AS `byteSize(Int32)`, byteSize(i64) AS `byteSize(Int64)` FROM test ORDER BY key ASC FORMAT Vertical;
-
-SELECT key, byteSize(f32) AS `byteSize(Float32)`,  byteSize(f64) AS `byteSize(Float64)` FROM test ORDER BY key ASC FORMAT Vertical;
+SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16)`, byteSize(u32) AS `byteSize(UInt32)`, byteSize(u64) AS `byteSize(UInt64)`, byteSize(i8) AS `byteSize(Int8)`, byteSize(i16) AS `byteSize(Int16)`, byteSize(i32) AS `byteSize(Int32)`, byteSize(i64) AS `byteSize(Int64)`, byteSize(f32) AS `byteSize(Float32)`, byteSize(f64) AS `byteSize(Float64)` FROM test ORDER BY key ASC FORMAT Vertical;
 ```
 
-Результат:
+Result:
 
 ``` text
-Row 1:
-──────
-key:              1
-byteSize(UInt8):  1
-byteSize(UInt16): 2
-byteSize(UInt32): 4
-byteSize(UInt64): 8
-
-Row 1:
-──────
-key:             1
-byteSize(Int8):  1
-byteSize(Int16): 2
-byteSize(Int32): 4
-byteSize(Int64): 8
-
 Row 1:
 ──────
 key:               1
+byteSize(UInt8):   1
+byteSize(UInt16):  2
+byteSize(UInt32):  4
+byteSize(UInt64):  8
+byteSize(Int8):    1
+byteSize(Int16):   2
+byteSize(Int32):   4
+byteSize(Int64):   8
 byteSize(Float32): 4
 byteSize(Float64): 8
 ```
 
-Функция может принимать несколько аргументов и будет возвращать их совокупный размер в байтах.
+Если функция принимает несколько аргументов, то она возвращает их совокупный размер в байтах.
 
 Запрос:
 

From 35e7c15394e684da2b2744bbefae28617eb0c229 Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Tue, 2 Feb 2021 05:58:25 +0300
Subject: [PATCH 0512/1238] Update
 docs/ru/sql-reference/functions/array-functions.md

Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
 docs/ru/sql-reference/functions/array-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md
index 3bba6f799c3..68766cafe60 100644
--- a/docs/ru/sql-reference/functions/array-functions.md
+++ b/docs/ru/sql-reference/functions/array-functions.md
@@ -1149,7 +1149,7 @@ SELECT
 arrayMin([func,] arr)
 ```
 
-**Parameters**
+**Параметры**
 
 -   `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
 -   `arr` — массив. [Array](../../sql-reference/data-types/array.md).

From d251807ea4f80c5863c9035df53ae9ac220f6be5 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Tue, 2 Feb 2021 11:01:20 +0800
Subject: [PATCH 0513/1238] add function timezoneOffset

---
 base/common/DateLUTImpl.h                   | 17 +++++++++++++++++
 src/Functions/DateTimeTransforms.h          | 17 +++++++++++++++++
 src/Functions/registerFunctionsDateTime.cpp |  3 +++
 src/Functions/timezoneOffset.cpp            | 19 +++++++++++++++++++
 4 files changed, 56 insertions(+)
 create mode 100644 src/Functions/timezoneOffset.cpp

diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h
index 3698276d0e1..3ec397dc051 100644
--- a/base/common/DateLUTImpl.h
+++ b/base/common/DateLUTImpl.h
@@ -274,6 +274,23 @@ public:
         return res / 3600;
     }
 
+
+    inline time_t timezoneOffset(time_t t) const
+    {
+        DayNum index = findIndex(t);
+
+        /// Calculate daylight saving offset first, ignore the leap seconds
+        time_t res = (lut[index].date - lut[0].date) % 86400;
+        res = res > 43200 ? (86400 - res) : (0 - res);
+        /// Check if has a offset change during this day
+        if (lut[index].amount_of_offset_change != 0 && t >= lut[index].date + lut[index].time_at_offset_change)
+            res += lut[index].amount_of_offset_change;
+
+        return res + offset_at_start_of_epoch;
+    }
+
+
+
     /** Only for time zones with/when offset from UTC is multiple of five minutes.
       * This is true for all time zones: right now, all time zones have an offset that is multiple of 15 minutes.
       *
diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index 4ad99b528ea..883c6ed07a3 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -407,6 +407,23 @@ struct ToHourImpl
     using FactorTransform = ToDateImpl;
 };
 
+struct timezoneOffsetImpl
+{
+    static constexpr auto name = "timezoneOffset";
+
+    static inline time_t execute(UInt32 t, const DateLUTImpl & time_zone)
+    {
+        return time_zone.timezoneOffset(t);
+    }
+
+    static inline time_t execute(UInt16, const DateLUTImpl &)
+    {
+        return dateIsNotSupported(name);
+    }
+/////need to do
+    using FactorTransform = ToDateImpl;
+};
+
 struct ToMinuteImpl
 {
     static constexpr auto name = "toMinute";
diff --git a/src/Functions/registerFunctionsDateTime.cpp b/src/Functions/registerFunctionsDateTime.cpp
index 2cb737b8c75..441f28bfb54 100644
--- a/src/Functions/registerFunctionsDateTime.cpp
+++ b/src/Functions/registerFunctionsDateTime.cpp
@@ -69,6 +69,8 @@ void registerFunctionFormatDateTime(FunctionFactory &);
 void registerFunctionFromModifiedJulianDay(FunctionFactory &);
 void registerFunctionDateTrunc(FunctionFactory &);
 
+void registerFunctiontimezoneOffset(FunctionFactory &);
+
 void registerFunctionsDateTime(FunctionFactory & factory)
 {
     registerFunctionToYear(factory);
@@ -136,6 +138,7 @@ void registerFunctionsDateTime(FunctionFactory & factory)
     registerFunctionFormatDateTime(factory);
     registerFunctionFromModifiedJulianDay(factory);
     registerFunctionDateTrunc(factory);
+    registerFunctiontimezoneOffset(factory);
 }
 
 }
diff --git a/src/Functions/timezoneOffset.cpp b/src/Functions/timezoneOffset.cpp
new file mode 100644
index 00000000000..62e844e691b
--- /dev/null
+++ b/src/Functions/timezoneOffset.cpp
@@ -0,0 +1,19 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/DateTimeTransforms.h>
+#include <Functions/FunctionDateOrDateTimeToSomething.h>
+#include <DataTypes/DataTypesNumber.h>
+
+
+namespace DB
+{
+
+using FunctiontimezoneOffset = FunctionDateOrDateTimeToSomething<DataTypeInt32, timezoneOffsetImpl>;
+
+void registerFunctiontimezoneOffset(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctiontimezoneOffset>();
+}
+
+}
+
+

From b9fff3676b460d97f786e42b25d2c9be6d6420b5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 2 Feb 2021 06:03:00 +0300
Subject: [PATCH 0514/1238] Add exception for ANTLR tests

---
 tests/queries/skip_list.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index dbde52f040a..26c1c71de62 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -311,7 +311,8 @@
         "01643_system_suspend",
         "01655_plan_optimizations",
         "01475_read_subcolumns_storages",
-        "01674_clickhouse_client_query_param_cte"
+        "01674_clickhouse_client_query_param_cte",
+        "01666_merge_tree_max_query_limit"
     ],
     "parallel":
     [

From bd0ec1b9f4ff7247c2176833f154e5486c589ba2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 2 Feb 2021 06:03:30 +0300
Subject: [PATCH 0515/1238] Remove useless header

---
 src/Formats/ProtobufWriter.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Formats/ProtobufWriter.h b/src/Formats/ProtobufWriter.h
index 44d3aac221e..52bb453aa73 100644
--- a/src/Formats/ProtobufWriter.h
+++ b/src/Formats/ProtobufWriter.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <Core/UUID.h>
-#include <Common/UInt128.h>
 #include <common/DayNum.h>
 #include <memory>
 

From 9f8f9087797afb4fa962d6fc2e053dd441f23a47 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 2 Feb 2021 06:05:40 +0300
Subject: [PATCH 0516/1238] Better code (add suggestion from Azat)

---
 src/Interpreters/SystemLog.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 101bc752f43..84a2c075355 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -246,7 +246,7 @@ void SystemLog<LogElement>::add(const LogElement & element)
     /// The size of allocation can be in order of a few megabytes.
     /// But this should not be accounted for query memory usage.
     /// Otherwise the tests like 01017_uniqCombined_memory_usage.sql will be flacky.
-    MemoryTracker::BlockerInThread temporarily_disable_memory_tracker;
+    MemoryTracker::BlockerInThread temporarily_disable_memory_tracker(VariableContext::Global);
 
     /// Should not log messages under mutex.
     bool queue_is_half_full = false;

From 9930bb0bf6159082c8cc5d304552dd3c1bd7d9b1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 2 Feb 2021 06:37:24 +0300
Subject: [PATCH 0517/1238] Fix UBSan report in geoHashesInBox

---
 src/Functions/GeoHash.cpp                     | 22 ++++++++++++-------
 .../0_stateless/01684_geohash_ubsan.reference |  1 +
 .../0_stateless/01684_geohash_ubsan.sql       |  1 +
 3 files changed, 16 insertions(+), 8 deletions(-)
 create mode 100644 tests/queries/0_stateless/01684_geohash_ubsan.reference
 create mode 100644 tests/queries/0_stateless/01684_geohash_ubsan.sql

diff --git a/src/Functions/GeoHash.cpp b/src/Functions/GeoHash.cpp
index 3ebc6f3d0fc..595bcacd41a 100644
--- a/src/Functions/GeoHash.cpp
+++ b/src/Functions/GeoHash.cpp
@@ -216,9 +216,7 @@ inline Float64 getSpan(uint8_t precision, CoordType type)
 inline uint8_t geohashPrecision(uint8_t precision)
 {
     if (precision == 0 || precision > MAX_PRECISION)
-    {
         precision = MAX_PRECISION;
-    }
 
     return precision;
 }
@@ -281,13 +279,21 @@ GeohashesInBoxPreparedArgs geohashesInBoxPrepare(
         return {};
     }
 
-    longitude_min = std::max(longitude_min, LON_MIN);
-    longitude_max = std::min(longitude_max, LON_MAX);
-    latitude_min = std::max(latitude_min, LAT_MIN);
-    latitude_max = std::min(latitude_max, LAT_MAX);
+    auto saturate = [](Float64 & value, Float64 min, Float64 max)
+    {
+        if (value < min)
+            value = min;
+        else if (value > max)
+            value = max;
+    };
 
-    const auto lon_step = getSpan(precision, LONGITUDE);
-    const auto lat_step = getSpan(precision, LATITUDE);
+    saturate(longitude_min, LON_MIN, LON_MAX);
+    saturate(longitude_max, LON_MIN, LON_MAX);
+    saturate(latitude_min, LAT_MIN, LAT_MAX);
+    saturate(latitude_max, LAT_MIN, LAT_MAX);
+
+    Float64 lon_step = getSpan(precision, LONGITUDE);
+    Float64 lat_step = getSpan(precision, LATITUDE);
 
     /// Align max to the right (or up) border of geohash grid cell to ensure that cell is in result.
     Float64 lon_min = floor(longitude_min / lon_step) * lon_step;
diff --git a/tests/queries/0_stateless/01684_geohash_ubsan.reference b/tests/queries/0_stateless/01684_geohash_ubsan.reference
new file mode 100644
index 00000000000..2ae4be53dea
--- /dev/null
+++ b/tests/queries/0_stateless/01684_geohash_ubsan.reference
@@ -0,0 +1 @@
+['ypzpgxczgpyr']
diff --git a/tests/queries/0_stateless/01684_geohash_ubsan.sql b/tests/queries/0_stateless/01684_geohash_ubsan.sql
new file mode 100644
index 00000000000..e7eb9c526b4
--- /dev/null
+++ b/tests/queries/0_stateless/01684_geohash_ubsan.sql
@@ -0,0 +1 @@
+SELECT geohashesInBox(100.0000991821289, 100.0000991821289, 1000.0001220703125, 1000.0001220703125, 0);

From 5f7a852b921f03fce00111f671edc55fb9eda856 Mon Sep 17 00:00:00 2001
From: PHO <pho@cielonegro.org>
Date: Tue, 2 Feb 2021 12:36:40 +0900
Subject: [PATCH 0518/1238] Fix a segfault in fromModifiedJulianDay()

It was crashing when the argument type was Nullable(T) where T was any integral type other than Int32.
---
 src/Functions/fromModifiedJulianDay.cpp                       | 4 ++--
 .../queries/0_stateless/01544_fromModifiedJulianDay.reference | 1 +
 tests/queries/0_stateless/01544_fromModifiedJulianDay.sql     | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Functions/fromModifiedJulianDay.cpp b/src/Functions/fromModifiedJulianDay.cpp
index 636512db0de..cd5699bfac5 100644
--- a/src/Functions/fromModifiedJulianDay.cpp
+++ b/src/Functions/fromModifiedJulianDay.cpp
@@ -163,7 +163,7 @@ namespace DB
 
         FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
         {
-            const DataTypePtr & from_type = arguments[0].type;
+            const DataTypePtr & from_type = removeNullable(arguments[0].type);
             DataTypes argument_types = { from_type };
             FunctionBaseImplPtr base;
             auto call = [&](const auto & types) -> bool
@@ -185,7 +185,7 @@ namespace DB
                 * here causes a SEGV. So we must somehow create a
                 * dummy implementation and return it.
                 */
-            if (WhichDataType(from_type).isNullable()) // Nullable(Nothing)
+            if (WhichDataType(from_type).isNothing()) // Nullable(Nothing)
                 return std::make_unique<FunctionBaseFromModifiedJulianDay<Name, DataTypeInt32, nullOnErrors>>(argument_types, return_type);
             else
                 // Should not happen.
diff --git a/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference b/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference
index 6f71b6263c0..443b90b80a5 100644
--- a/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference
+++ b/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference
@@ -3,6 +3,7 @@ Invocation with constant
 1858-11-17
 2020-11-01
 \N
+\N
 or null
 2020-11-01
 \N
diff --git a/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql b/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql
index 4e50351d191..5e682a942d5 100644
--- a/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql
+++ b/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql
@@ -5,6 +5,7 @@ SELECT fromModifiedJulianDay(-1);
 SELECT fromModifiedJulianDay(0);
 SELECT fromModifiedJulianDay(59154);
 SELECT fromModifiedJulianDay(NULL);
+SELECT fromModifiedJulianDay(CAST(NULL, 'Nullable(Int64)'));
 SELECT fromModifiedJulianDay(-678942); -- { serverError 490 }
 SELECT fromModifiedJulianDay(2973484); -- { serverError 490 }
 

From 656cd583f75fef2d50144f506d68749e107fd8dd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 2 Feb 2021 06:46:54 +0300
Subject: [PATCH 0519/1238] Add MSan annotation for system.stack_trace

---
 src/Common/StackTrace.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp
index b285a45bdc5..44f6b9e5443 100644
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@@ -261,6 +261,9 @@ StackTrace::StackTrace(const ucontext_t & signal_context)
 {
     tryCapture();
 
+    /// This variable from signal handler is not instrumented by Memory Sanitizer.
+    __msan_unpoison(&signal_context, sizeof(signal_context));
+
     void * caller_address = getCallerAddress(signal_context);
 
     if (size == 0 && caller_address)

From 8c0ec5105b67c73799a6d8774d7bef939ea2668c Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Tue, 2 Feb 2021 06:53:11 +0300
Subject: [PATCH 0520/1238] Add a patch from @FishermanZzhang, #19952

---
 programs/copier/ClusterCopier.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp
index ca09e7c1889..d44e24dca49 100644
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@@ -642,7 +642,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
                 query_deduplicate_ast_string += " OPTIMIZE TABLE " + getQuotedTable(original_table) +
                                                 ((partition_name == "'all'") ? " PARTITION ID " : " PARTITION ") + partition_name + " DEDUPLICATE;";
 
-                LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_alter_ast_string);
+                LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_deduplicate_ast_string);
 
                 UInt64 num_nodes = executeQueryOnCluster(
                         task_table.cluster_push,

From 078011ef2d15258b1f23b8b1b794e888b3078975 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 2 Feb 2021 07:16:47 +0300
Subject: [PATCH 0521/1238] Allow conversion from double to float in function
 JSONExtract beacuse the users want that

---
 src/Functions/FunctionsJSON.h                 | 12 +++++++++-
 ...685_json_extract_double_as_float.reference | 10 ++++++++
 .../01685_json_extract_double_as_float.sql    | 24 +++++++++++++++++++
 3 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01685_json_extract_double_as_float.reference
 create mode 100644 tests/queries/0_stateless/01685_json_extract_double_as_float.sql

diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h
index aea5829eaef..f066bb1029a 100644
--- a/src/Functions/FunctionsJSON.h
+++ b/src/Functions/FunctionsJSON.h
@@ -25,6 +25,7 @@
 #include <DataTypes/DataTypeTuple.h>
 #include <Interpreters/Context.h>
 #include <ext/range.h>
+#include <type_traits>
 #include <boost/tti/has_member_function.hpp>
 
 #if !defined(ARCADIA_BUILD)
@@ -507,11 +508,20 @@ public:
         }
         else if (element.isDouble())
         {
-            if (!accurate::convertNumeric(element.getDouble(), value))
+            if constexpr (std::is_floating_point_v<NumberType>)
+            {
+                /// We permit inaccurate conversion of double to float.
+                /// Example: double 0.1 from JSON is not representable in float.
+                /// But it will be more convenient for user to perform conversion.
+                value = element.getDouble();
+            }
+            else if (!accurate::convertNumeric(element.getDouble(), value))
                 return false;
         }
         else if (element.isBool() && is_integer_v<NumberType> && convert_bool_to_integer)
+        {
             value = static_cast<NumberType>(element.getBool());
+        }
         else
             return false;
 
diff --git a/tests/queries/0_stateless/01685_json_extract_double_as_float.reference b/tests/queries/0_stateless/01685_json_extract_double_as_float.reference
new file mode 100644
index 00000000000..f3f4206b425
--- /dev/null
+++ b/tests/queries/0_stateless/01685_json_extract_double_as_float.reference
@@ -0,0 +1,10 @@
+1.1	1.1	1.1	1.1
+0.01	0.01	0.01	0.01
+0
+\N
+-1e300
+-inf
+0
+0
+0
+0
diff --git a/tests/queries/0_stateless/01685_json_extract_double_as_float.sql b/tests/queries/0_stateless/01685_json_extract_double_as_float.sql
new file mode 100644
index 00000000000..c9aa2518085
--- /dev/null
+++ b/tests/queries/0_stateless/01685_json_extract_double_as_float.sql
@@ -0,0 +1,24 @@
+WITH '{ "v":1.1}' AS raw
+SELECT
+    JSONExtract(raw, 'v', 'float') AS float32_1,
+    JSONExtract(raw, 'v', 'Float32') AS float32_2,
+    JSONExtractFloat(raw, 'v') AS float64_1,
+    JSONExtract(raw, 'v', 'double') AS float64_2;
+
+WITH '{ "v":1E-2}' AS raw
+SELECT
+    JSONExtract(raw, 'v', 'float') AS float32_1,
+    JSONExtract(raw, 'v', 'Float32') AS float32_2,
+    JSONExtractFloat(raw, 'v') AS float64_1,
+    JSONExtract(raw, 'v', 'double') AS float64_2;
+
+SELECT JSONExtract('{"v":1.1}', 'v', 'UInt64');
+SELECT JSONExtract('{"v":1.1}', 'v', 'Nullable(UInt64)');
+
+SELECT JSONExtract('{"v":-1e300}', 'v', 'Float64');
+SELECT JSONExtract('{"v":-1e300}', 'v', 'Float32');
+
+SELECT JSONExtract('{"v":-1e300}', 'v', 'UInt64');
+SELECT JSONExtract('{"v":-1e300}', 'v', 'Int64');
+SELECT JSONExtract('{"v":-1e300}', 'v', 'UInt8');
+SELECT JSONExtract('{"v":-1e300}', 'v', 'Int8');

From f21e22ae2c66ed3a1e987a4561fe17cca762278c Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Tue, 2 Feb 2021 12:22:45 +0800
Subject: [PATCH 0522/1238] add test for function timezoneOffset

---
 .../01699_timezoneOffset.reference            | 1202 +++++++++++++++++
 .../0_stateless/01699_timezoneOffset.sql      |   20 +
 2 files changed, 1222 insertions(+)
 create mode 100644 tests/queries/0_stateless/01699_timezoneOffset.reference
 create mode 100644 tests/queries/0_stateless/01699_timezoneOffset.sql

diff --git a/tests/queries/0_stateless/01699_timezoneOffset.reference b/tests/queries/0_stateless/01699_timezoneOffset.reference
new file mode 100644
index 00000000000..f9a521dc460
--- /dev/null
+++ b/tests/queries/0_stateless/01699_timezoneOffset.reference
@@ -0,0 +1,1202 @@
+0	1981-04-01 00:00:00	10800	354920400
+1	1981-04-01 00:10:00	10800	354921000
+2	1981-04-01 00:20:00	10800	354921600
+3	1981-04-01 00:30:00	10800	354922200
+4	1981-04-01 00:40:00	10800	354922800
+5	1981-04-01 00:50:00	10800	354923400
+6	1981-04-01 01:00:00	10800	354924000
+7	1981-04-01 01:10:00	10800	354924600
+8	1981-04-01 01:20:00	10800	354925200
+9	1981-04-01 01:30:00	10800	354925800
+10	1981-04-01 01:40:00	10800	354926400
+11	1981-04-01 01:50:00	10800	354927000
+12	1981-04-01 02:00:00	10800	354927600
+13	1981-04-01 02:10:00	10800	354928200
+14	1981-04-01 02:20:00	10800	354928800
+15	1981-04-01 02:30:00	10800	354929400
+16	1981-04-01 02:40:00	10800	354930000
+17	1981-04-01 02:50:00	10800	354930600
+18	1981-04-01 03:00:00	10800	354931200
+19	1981-04-01 03:10:00	10800	354931800
+20	1981-04-01 03:20:00	10800	354932400
+21	1981-04-01 03:30:00	10800	354933000
+22	1981-04-01 03:40:00	10800	354933600
+23	1981-04-01 03:50:00	10800	354934200
+24	1981-04-01 04:00:00	10800	354934800
+25	1981-04-01 04:10:00	10800	354935400
+26	1981-04-01 04:20:00	10800	354936000
+27	1981-04-01 04:30:00	10800	354936600
+28	1981-04-01 04:40:00	10800	354937200
+29	1981-04-01 04:50:00	10800	354937800
+30	1981-04-01 05:00:00	10800	354938400
+31	1981-04-01 05:10:00	10800	354939000
+32	1981-04-01 05:20:00	10800	354939600
+33	1981-04-01 05:30:00	10800	354940200
+34	1981-04-01 05:40:00	10800	354940800
+35	1981-04-01 05:50:00	10800	354941400
+36	1981-04-01 06:00:00	10800	354942000
+37	1981-04-01 06:10:00	10800	354942600
+38	1981-04-01 06:20:00	10800	354943200
+39	1981-04-01 06:30:00	10800	354943800
+40	1981-04-01 06:40:00	10800	354944400
+41	1981-04-01 06:50:00	10800	354945000
+42	1981-04-01 07:00:00	10800	354945600
+43	1981-04-01 07:10:00	10800	354946200
+44	1981-04-01 07:20:00	10800	354946800
+45	1981-04-01 07:30:00	10800	354947400
+46	1981-04-01 07:40:00	10800	354948000
+47	1981-04-01 07:50:00	10800	354948600
+48	1981-04-01 08:00:00	10800	354949200
+49	1981-04-01 08:10:00	10800	354949800
+50	1981-04-01 08:20:00	10800	354950400
+51	1981-04-01 08:30:00	10800	354951000
+52	1981-04-01 08:40:00	10800	354951600
+53	1981-04-01 08:50:00	10800	354952200
+54	1981-04-01 09:00:00	10800	354952800
+55	1981-04-01 09:10:00	10800	354953400
+56	1981-04-01 09:20:00	10800	354954000
+57	1981-04-01 09:30:00	10800	354954600
+58	1981-04-01 09:40:00	10800	354955200
+59	1981-04-01 09:50:00	10800	354955800
+60	1981-04-01 10:00:00	10800	354956400
+61	1981-04-01 10:10:00	10800	354957000
+62	1981-04-01 10:20:00	10800	354957600
+63	1981-04-01 10:30:00	10800	354958200
+64	1981-04-01 10:40:00	10800	354958800
+65	1981-04-01 10:50:00	10800	354959400
+66	1981-04-01 11:00:00	10800	354960000
+67	1981-04-01 11:10:00	10800	354960600
+68	1981-04-01 11:20:00	10800	354961200
+69	1981-04-01 11:30:00	10800	354961800
+70	1981-04-01 11:40:00	10800	354962400
+71	1981-04-01 11:50:00	10800	354963000
+72	1981-04-01 12:00:00	10800	354963600
+73	1981-04-01 12:10:00	10800	354964200
+74	1981-04-01 12:20:00	10800	354964800
+75	1981-04-01 12:30:00	10800	354965400
+76	1981-04-01 12:40:00	10800	354966000
+77	1981-04-01 12:50:00	10800	354966600
+78	1981-04-01 13:00:00	10800	354967200
+79	1981-04-01 13:10:00	10800	354967800
+80	1981-04-01 13:20:00	10800	354968400
+81	1981-04-01 13:30:00	10800	354969000
+82	1981-04-01 13:40:00	10800	354969600
+83	1981-04-01 13:50:00	10800	354970200
+84	1981-04-01 14:00:00	10800	354970800
+85	1981-04-01 14:10:00	10800	354971400
+86	1981-04-01 14:20:00	10800	354972000
+87	1981-04-01 14:30:00	10800	354972600
+88	1981-04-01 14:40:00	10800	354973200
+89	1981-04-01 14:50:00	10800	354973800
+90	1981-04-01 15:00:00	10800	354974400
+91	1981-04-01 15:10:00	10800	354975000
+92	1981-04-01 15:20:00	10800	354975600
+93	1981-04-01 15:30:00	10800	354976200
+94	1981-04-01 15:40:00	10800	354976800
+95	1981-04-01 15:50:00	10800	354977400
+96	1981-04-01 16:00:00	10800	354978000
+97	1981-04-01 16:10:00	10800	354978600
+98	1981-04-01 16:20:00	10800	354979200
+99	1981-04-01 16:30:00	10800	354979800
+100	1981-04-01 16:40:00	10800	354980400
+101	1981-04-01 16:50:00	10800	354981000
+102	1981-04-01 17:00:00	10800	354981600
+103	1981-04-01 17:10:00	10800	354982200
+104	1981-04-01 17:20:00	10800	354982800
+105	1981-04-01 17:30:00	10800	354983400
+106	1981-04-01 17:40:00	10800	354984000
+107	1981-04-01 17:50:00	10800	354984600
+108	1981-04-01 18:00:00	10800	354985200
+109	1981-04-01 18:10:00	10800	354985800
+110	1981-04-01 18:20:00	10800	354986400
+111	1981-04-01 18:30:00	10800	354987000
+112	1981-04-01 18:40:00	10800	354987600
+113	1981-04-01 18:50:00	10800	354988200
+114	1981-04-01 19:00:00	10800	354988800
+115	1981-04-01 19:10:00	10800	354989400
+116	1981-04-01 19:20:00	10800	354990000
+117	1981-04-01 19:30:00	10800	354990600
+118	1981-04-01 19:40:00	10800	354991200
+119	1981-04-01 19:50:00	10800	354991800
+120	1981-04-01 20:00:00	10800	354992400
+121	1981-04-01 20:10:00	10800	354993000
+122	1981-04-01 20:20:00	10800	354993600
+123	1981-04-01 20:30:00	10800	354994200
+124	1981-04-01 20:40:00	10800	354994800
+125	1981-04-01 20:50:00	10800	354995400
+126	1981-04-01 21:00:00	10800	354996000
+127	1981-04-01 21:10:00	10800	354996600
+128	1981-04-01 21:20:00	10800	354997200
+129	1981-04-01 21:30:00	10800	354997800
+130	1981-04-01 21:40:00	10800	354998400
+131	1981-04-01 21:50:00	10800	354999000
+132	1981-04-01 22:00:00	10800	354999600
+133	1981-04-01 22:10:00	10800	355000200
+134	1981-04-01 22:20:00	10800	355000800
+135	1981-04-01 22:30:00	10800	355001400
+136	1981-04-01 22:40:00	10800	355002000
+137	1981-04-01 22:50:00	10800	355002600
+138	1981-04-02 00:00:00	14400	355003200
+139	1981-04-02 00:10:00	14400	355003800
+140	1981-04-02 00:20:00	14400	355004400
+141	1981-04-02 00:30:00	14400	355005000
+142	1981-04-02 00:40:00	14400	355005600
+143	1981-04-02 00:50:00	14400	355006200
+144	1981-04-02 01:00:00	14400	355006800
+145	1981-04-02 01:10:00	14400	355007400
+146	1981-04-02 01:20:00	14400	355008000
+147	1981-04-02 01:30:00	14400	355008600
+148	1981-04-02 01:40:00	14400	355009200
+149	1981-04-02 01:50:00	14400	355009800
+150	1981-04-02 02:00:00	14400	355010400
+151	1981-04-02 02:10:00	14400	355011000
+152	1981-04-02 02:20:00	14400	355011600
+153	1981-04-02 02:30:00	14400	355012200
+154	1981-04-02 02:40:00	14400	355012800
+155	1981-04-02 02:50:00	14400	355013400
+156	1981-04-02 03:00:00	14400	355014000
+157	1981-04-02 03:10:00	14400	355014600
+158	1981-04-02 03:20:00	14400	355015200
+159	1981-04-02 03:30:00	14400	355015800
+160	1981-04-02 03:40:00	14400	355016400
+161	1981-04-02 03:50:00	14400	355017000
+162	1981-04-02 04:00:00	14400	355017600
+163	1981-04-02 04:10:00	14400	355018200
+164	1981-04-02 04:20:00	14400	355018800
+165	1981-04-02 04:30:00	14400	355019400
+166	1981-04-02 04:40:00	14400	355020000
+167	1981-04-02 04:50:00	14400	355020600
+168	1981-04-02 05:00:00	14400	355021200
+169	1981-04-02 05:10:00	14400	355021800
+170	1981-04-02 05:20:00	14400	355022400
+171	1981-04-02 05:30:00	14400	355023000
+172	1981-04-02 05:40:00	14400	355023600
+173	1981-04-02 05:50:00	14400	355024200
+174	1981-04-02 06:00:00	14400	355024800
+175	1981-04-02 06:10:00	14400	355025400
+176	1981-04-02 06:20:00	14400	355026000
+177	1981-04-02 06:30:00	14400	355026600
+178	1981-04-02 06:40:00	14400	355027200
+179	1981-04-02 06:50:00	14400	355027800
+180	1981-04-02 07:00:00	14400	355028400
+181	1981-04-02 07:10:00	14400	355029000
+182	1981-04-02 07:20:00	14400	355029600
+183	1981-04-02 07:30:00	14400	355030200
+184	1981-04-02 07:40:00	14400	355030800
+185	1981-04-02 07:50:00	14400	355031400
+186	1981-04-02 08:00:00	14400	355032000
+187	1981-04-02 08:10:00	14400	355032600
+188	1981-04-02 08:20:00	14400	355033200
+189	1981-04-02 08:30:00	14400	355033800
+190	1981-04-02 08:40:00	14400	355034400
+191	1981-04-02 08:50:00	14400	355035000
+192	1981-04-02 09:00:00	14400	355035600
+193	1981-04-02 09:10:00	14400	355036200
+194	1981-04-02 09:20:00	14400	355036800
+195	1981-04-02 09:30:00	14400	355037400
+196	1981-04-02 09:40:00	14400	355038000
+197	1981-04-02 09:50:00	14400	355038600
+198	1981-04-02 10:00:00	14400	355039200
+199	1981-04-02 10:10:00	14400	355039800
+0	1981-09-30 00:00:00	14400	370641600
+1	1981-09-30 00:10:00	14400	370642200
+2	1981-09-30 00:20:00	14400	370642800
+3	1981-09-30 00:30:00	14400	370643400
+4	1981-09-30 00:40:00	14400	370644000
+5	1981-09-30 00:50:00	14400	370644600
+6	1981-09-30 01:00:00	14400	370645200
+7	1981-09-30 01:10:00	14400	370645800
+8	1981-09-30 01:20:00	14400	370646400
+9	1981-09-30 01:30:00	14400	370647000
+10	1981-09-30 01:40:00	14400	370647600
+11	1981-09-30 01:50:00	14400	370648200
+12	1981-09-30 02:00:00	14400	370648800
+13	1981-09-30 02:10:00	14400	370649400
+14	1981-09-30 02:20:00	14400	370650000
+15	1981-09-30 02:30:00	14400	370650600
+16	1981-09-30 02:40:00	14400	370651200
+17	1981-09-30 02:50:00	14400	370651800
+18	1981-09-30 03:00:00	14400	370652400
+19	1981-09-30 03:10:00	14400	370653000
+20	1981-09-30 03:20:00	14400	370653600
+21	1981-09-30 03:30:00	14400	370654200
+22	1981-09-30 03:40:00	14400	370654800
+23	1981-09-30 03:50:00	14400	370655400
+24	1981-09-30 04:00:00	14400	370656000
+25	1981-09-30 04:10:00	14400	370656600
+26	1981-09-30 04:20:00	14400	370657200
+27	1981-09-30 04:30:00	14400	370657800
+28	1981-09-30 04:40:00	14400	370658400
+29	1981-09-30 04:50:00	14400	370659000
+30	1981-09-30 05:00:00	14400	370659600
+31	1981-09-30 05:10:00	14400	370660200
+32	1981-09-30 05:20:00	14400	370660800
+33	1981-09-30 05:30:00	14400	370661400
+34	1981-09-30 05:40:00	14400	370662000
+35	1981-09-30 05:50:00	14400	370662600
+36	1981-09-30 06:00:00	14400	370663200
+37	1981-09-30 06:10:00	14400	370663800
+38	1981-09-30 06:20:00	14400	370664400
+39	1981-09-30 06:30:00	14400	370665000
+40	1981-09-30 06:40:00	14400	370665600
+41	1981-09-30 06:50:00	14400	370666200
+42	1981-09-30 07:00:00	14400	370666800
+43	1981-09-30 07:10:00	14400	370667400
+44	1981-09-30 07:20:00	14400	370668000
+45	1981-09-30 07:30:00	14400	370668600
+46	1981-09-30 07:40:00	14400	370669200
+47	1981-09-30 07:50:00	14400	370669800
+48	1981-09-30 08:00:00	14400	370670400
+49	1981-09-30 08:10:00	14400	370671000
+50	1981-09-30 08:20:00	14400	370671600
+51	1981-09-30 08:30:00	14400	370672200
+52	1981-09-30 08:40:00	14400	370672800
+53	1981-09-30 08:50:00	14400	370673400
+54	1981-09-30 09:00:00	14400	370674000
+55	1981-09-30 09:10:00	14400	370674600
+56	1981-09-30 09:20:00	14400	370675200
+57	1981-09-30 09:30:00	14400	370675800
+58	1981-09-30 09:40:00	14400	370676400
+59	1981-09-30 09:50:00	14400	370677000
+60	1981-09-30 10:00:00	14400	370677600
+61	1981-09-30 10:10:00	14400	370678200
+62	1981-09-30 10:20:00	14400	370678800
+63	1981-09-30 10:30:00	14400	370679400
+64	1981-09-30 10:40:00	14400	370680000
+65	1981-09-30 10:50:00	14400	370680600
+66	1981-09-30 11:00:00	14400	370681200
+67	1981-09-30 11:10:00	14400	370681800
+68	1981-09-30 11:20:00	14400	370682400
+69	1981-09-30 11:30:00	14400	370683000
+70	1981-09-30 11:40:00	14400	370683600
+71	1981-09-30 11:50:00	14400	370684200
+72	1981-09-30 12:00:00	14400	370684800
+73	1981-09-30 12:10:00	14400	370685400
+74	1981-09-30 12:20:00	14400	370686000
+75	1981-09-30 12:30:00	14400	370686600
+76	1981-09-30 12:40:00	14400	370687200
+77	1981-09-30 12:50:00	14400	370687800
+78	1981-09-30 13:00:00	14400	370688400
+79	1981-09-30 13:10:00	14400	370689000
+80	1981-09-30 13:20:00	14400	370689600
+81	1981-09-30 13:30:00	14400	370690200
+82	1981-09-30 13:40:00	14400	370690800
+83	1981-09-30 13:50:00	14400	370691400
+84	1981-09-30 14:00:00	14400	370692000
+85	1981-09-30 14:10:00	14400	370692600
+86	1981-09-30 14:20:00	14400	370693200
+87	1981-09-30 14:30:00	14400	370693800
+88	1981-09-30 14:40:00	14400	370694400
+89	1981-09-30 14:50:00	14400	370695000
+90	1981-09-30 15:00:00	14400	370695600
+91	1981-09-30 15:10:00	14400	370696200
+92	1981-09-30 15:20:00	14400	370696800
+93	1981-09-30 15:30:00	14400	370697400
+94	1981-09-30 15:40:00	14400	370698000
+95	1981-09-30 15:50:00	14400	370698600
+96	1981-09-30 16:00:00	14400	370699200
+97	1981-09-30 16:10:00	14400	370699800
+98	1981-09-30 16:20:00	14400	370700400
+99	1981-09-30 16:30:00	14400	370701000
+100	1981-09-30 16:40:00	14400	370701600
+101	1981-09-30 16:50:00	14400	370702200
+102	1981-09-30 17:00:00	14400	370702800
+103	1981-09-30 17:10:00	14400	370703400
+104	1981-09-30 17:20:00	14400	370704000
+105	1981-09-30 17:30:00	14400	370704600
+106	1981-09-30 17:40:00	14400	370705200
+107	1981-09-30 17:50:00	14400	370705800
+108	1981-09-30 18:00:00	14400	370706400
+109	1981-09-30 18:10:00	14400	370707000
+110	1981-09-30 18:20:00	14400	370707600
+111	1981-09-30 18:30:00	14400	370708200
+112	1981-09-30 18:40:00	14400	370708800
+113	1981-09-30 18:50:00	14400	370709400
+114	1981-09-30 19:00:00	14400	370710000
+115	1981-09-30 19:10:00	14400	370710600
+116	1981-09-30 19:20:00	14400	370711200
+117	1981-09-30 19:30:00	14400	370711800
+118	1981-09-30 19:40:00	14400	370712400
+119	1981-09-30 19:50:00	14400	370713000
+120	1981-09-30 20:00:00	14400	370713600
+121	1981-09-30 20:10:00	14400	370714200
+122	1981-09-30 20:20:00	14400	370714800
+123	1981-09-30 20:30:00	14400	370715400
+124	1981-09-30 20:40:00	14400	370716000
+125	1981-09-30 20:50:00	14400	370716600
+126	1981-09-30 21:00:00	14400	370717200
+127	1981-09-30 21:10:00	14400	370717800
+128	1981-09-30 21:20:00	14400	370718400
+129	1981-09-30 21:30:00	14400	370719000
+130	1981-09-30 21:40:00	14400	370719600
+131	1981-09-30 21:50:00	14400	370720200
+132	1981-09-30 22:00:00	14400	370720800
+133	1981-09-30 22:10:00	14400	370721400
+134	1981-09-30 22:20:00	14400	370722000
+135	1981-09-30 22:30:00	14400	370722600
+136	1981-09-30 22:40:00	14400	370723200
+137	1981-09-30 22:50:00	14400	370723800
+138	1981-09-30 23:00:00	14400	370724400
+139	1981-09-30 23:10:00	14400	370725000
+140	1981-09-30 23:20:00	14400	370725600
+141	1981-09-30 23:30:00	14400	370726200
+142	1981-09-30 23:40:00	14400	370726800
+143	1981-09-30 23:50:00	14400	370727400
+144	1981-09-30 23:00:00	10800	370728000
+145	1981-09-30 23:10:00	10800	370728600
+146	1981-09-30 23:20:00	10800	370729200
+147	1981-09-30 23:30:00	10800	370729800
+148	1981-09-30 23:40:00	10800	370730400
+149	1981-09-30 23:50:00	10800	370731000
+150	1981-10-01 00:00:00	10800	370731600
+151	1981-10-01 00:10:00	10800	370732200
+152	1981-10-01 00:20:00	10800	370732800
+153	1981-10-01 00:30:00	10800	370733400
+154	1981-10-01 00:40:00	10800	370734000
+155	1981-10-01 00:50:00	10800	370734600
+156	1981-10-01 01:00:00	10800	370735200
+157	1981-10-01 01:10:00	10800	370735800
+158	1981-10-01 01:20:00	10800	370736400
+159	1981-10-01 01:30:00	10800	370737000
+160	1981-10-01 01:40:00	10800	370737600
+161	1981-10-01 01:50:00	10800	370738200
+162	1981-10-01 02:00:00	10800	370738800
+163	1981-10-01 02:10:00	10800	370739400
+164	1981-10-01 02:20:00	10800	370740000
+165	1981-10-01 02:30:00	10800	370740600
+166	1981-10-01 02:40:00	10800	370741200
+167	1981-10-01 02:50:00	10800	370741800
+168	1981-10-01 03:00:00	10800	370742400
+169	1981-10-01 03:10:00	10800	370743000
+170	1981-10-01 03:20:00	10800	370743600
+171	1981-10-01 03:30:00	10800	370744200
+172	1981-10-01 03:40:00	10800	370744800
+173	1981-10-01 03:50:00	10800	370745400
+174	1981-10-01 04:00:00	10800	370746000
+175	1981-10-01 04:10:00	10800	370746600
+176	1981-10-01 04:20:00	10800	370747200
+177	1981-10-01 04:30:00	10800	370747800
+178	1981-10-01 04:40:00	10800	370748400
+179	1981-10-01 04:50:00	10800	370749000
+180	1981-10-01 05:00:00	10800	370749600
+181	1981-10-01 05:10:00	10800	370750200
+182	1981-10-01 05:20:00	10800	370750800
+183	1981-10-01 05:30:00	10800	370751400
+184	1981-10-01 05:40:00	10800	370752000
+185	1981-10-01 05:50:00	10800	370752600
+186	1981-10-01 06:00:00	10800	370753200
+187	1981-10-01 06:10:00	10800	370753800
+188	1981-10-01 06:20:00	10800	370754400
+189	1981-10-01 06:30:00	10800	370755000
+190	1981-10-01 06:40:00	10800	370755600
+191	1981-10-01 06:50:00	10800	370756200
+192	1981-10-01 07:00:00	10800	370756800
+193	1981-10-01 07:10:00	10800	370757400
+194	1981-10-01 07:20:00	10800	370758000
+195	1981-10-01 07:30:00	10800	370758600
+196	1981-10-01 07:40:00	10800	370759200
+197	1981-10-01 07:50:00	10800	370759800
+198	1981-10-01 08:00:00	10800	370760400
+199	1981-10-01 08:10:00	10800	370761000
+0	2020-03-21 00:00:00	12600	1584736200
+1	2020-03-21 00:10:00	12600	1584736800
+2	2020-03-21 00:20:00	12600	1584737400
+3	2020-03-21 00:30:00	12600	1584738000
+4	2020-03-21 00:40:00	12600	1584738600
+5	2020-03-21 00:50:00	12600	1584739200
+6	2020-03-21 01:00:00	12600	1584739800
+7	2020-03-21 01:10:00	12600	1584740400
+8	2020-03-21 01:20:00	12600	1584741000
+9	2020-03-21 01:30:00	12600	1584741600
+10	2020-03-21 01:40:00	12600	1584742200
+11	2020-03-21 01:50:00	12600	1584742800
+12	2020-03-21 02:00:00	12600	1584743400
+13	2020-03-21 02:10:00	12600	1584744000
+14	2020-03-21 02:20:00	12600	1584744600
+15	2020-03-21 02:30:00	12600	1584745200
+16	2020-03-21 02:40:00	12600	1584745800
+17	2020-03-21 02:50:00	12600	1584746400
+18	2020-03-21 03:00:00	12600	1584747000
+19	2020-03-21 03:10:00	12600	1584747600
+20	2020-03-21 03:20:00	12600	1584748200
+21	2020-03-21 03:30:00	12600	1584748800
+22	2020-03-21 03:40:00	12600	1584749400
+23	2020-03-21 03:50:00	12600	1584750000
+24	2020-03-21 04:00:00	12600	1584750600
+25	2020-03-21 04:10:00	12600	1584751200
+26	2020-03-21 04:20:00	12600	1584751800
+27	2020-03-21 04:30:00	12600	1584752400
+28	2020-03-21 04:40:00	12600	1584753000
+29	2020-03-21 04:50:00	12600	1584753600
+30	2020-03-21 05:00:00	12600	1584754200
+31	2020-03-21 05:10:00	12600	1584754800
+32	2020-03-21 05:20:00	12600	1584755400
+33	2020-03-21 05:30:00	12600	1584756000
+34	2020-03-21 05:40:00	12600	1584756600
+35	2020-03-21 05:50:00	12600	1584757200
+36	2020-03-21 06:00:00	12600	1584757800
+37	2020-03-21 06:10:00	12600	1584758400
+38	2020-03-21 06:20:00	12600	1584759000
+39	2020-03-21 06:30:00	12600	1584759600
+40	2020-03-21 06:40:00	12600	1584760200
+41	2020-03-21 06:50:00	12600	1584760800
+42	2020-03-21 07:00:00	12600	1584761400
+43	2020-03-21 07:10:00	12600	1584762000
+44	2020-03-21 07:20:00	12600	1584762600
+45	2020-03-21 07:30:00	12600	1584763200
+46	2020-03-21 07:40:00	12600	1584763800
+47	2020-03-21 07:50:00	12600	1584764400
+48	2020-03-21 08:00:00	12600	1584765000
+49	2020-03-21 08:10:00	12600	1584765600
+50	2020-03-21 08:20:00	12600	1584766200
+51	2020-03-21 08:30:00	12600	1584766800
+52	2020-03-21 08:40:00	12600	1584767400
+53	2020-03-21 08:50:00	12600	1584768000
+54	2020-03-21 09:00:00	12600	1584768600
+55	2020-03-21 09:10:00	12600	1584769200
+56	2020-03-21 09:20:00	12600	1584769800
+57	2020-03-21 09:30:00	12600	1584770400
+58	2020-03-21 09:40:00	12600	1584771000
+59	2020-03-21 09:50:00	12600	1584771600
+60	2020-03-21 10:00:00	12600	1584772200
+61	2020-03-21 10:10:00	12600	1584772800
+62	2020-03-21 10:20:00	12600	1584773400
+63	2020-03-21 10:30:00	12600	1584774000
+64	2020-03-21 10:40:00	12600	1584774600
+65	2020-03-21 10:50:00	12600	1584775200
+66	2020-03-21 11:00:00	12600	1584775800
+67	2020-03-21 11:10:00	12600	1584776400
+68	2020-03-21 11:20:00	12600	1584777000
+69	2020-03-21 11:30:00	12600	1584777600
+70	2020-03-21 11:40:00	12600	1584778200
+71	2020-03-21 11:50:00	12600	1584778800
+72	2020-03-21 12:00:00	12600	1584779400
+73	2020-03-21 12:10:00	12600	1584780000
+74	2020-03-21 12:20:00	12600	1584780600
+75	2020-03-21 12:30:00	12600	1584781200
+76	2020-03-21 12:40:00	12600	1584781800
+77	2020-03-21 12:50:00	12600	1584782400
+78	2020-03-21 13:00:00	12600	1584783000
+79	2020-03-21 13:10:00	12600	1584783600
+80	2020-03-21 13:20:00	12600	1584784200
+81	2020-03-21 13:30:00	12600	1584784800
+82	2020-03-21 13:40:00	12600	1584785400
+83	2020-03-21 13:50:00	12600	1584786000
+84	2020-03-21 14:00:00	12600	1584786600
+85	2020-03-21 14:10:00	12600	1584787200
+86	2020-03-21 14:20:00	12600	1584787800
+87	2020-03-21 14:30:00	12600	1584788400
+88	2020-03-21 14:40:00	12600	1584789000
+89	2020-03-21 14:50:00	12600	1584789600
+90	2020-03-21 15:00:00	12600	1584790200
+91	2020-03-21 15:10:00	12600	1584790800
+92	2020-03-21 15:20:00	12600	1584791400
+93	2020-03-21 15:30:00	12600	1584792000
+94	2020-03-21 15:40:00	12600	1584792600
+95	2020-03-21 15:50:00	12600	1584793200
+96	2020-03-21 16:00:00	12600	1584793800
+97	2020-03-21 16:10:00	12600	1584794400
+98	2020-03-21 16:20:00	12600	1584795000
+99	2020-03-21 16:30:00	12600	1584795600
+100	2020-03-21 16:40:00	12600	1584796200
+101	2020-03-21 16:50:00	12600	1584796800
+102	2020-03-21 17:00:00	12600	1584797400
+103	2020-03-21 17:10:00	12600	1584798000
+104	2020-03-21 17:20:00	12600	1584798600
+105	2020-03-21 17:30:00	12600	1584799200
+106	2020-03-21 17:40:00	12600	1584799800
+107	2020-03-21 17:50:00	12600	1584800400
+108	2020-03-21 18:00:00	12600	1584801000
+109	2020-03-21 18:10:00	12600	1584801600
+110	2020-03-21 18:20:00	12600	1584802200
+111	2020-03-21 18:30:00	12600	1584802800
+112	2020-03-21 18:40:00	12600	1584803400
+113	2020-03-21 18:50:00	12600	1584804000
+114	2020-03-21 19:00:00	12600	1584804600
+115	2020-03-21 19:10:00	12600	1584805200
+116	2020-03-21 19:20:00	12600	1584805800
+117	2020-03-21 19:30:00	12600	1584806400
+118	2020-03-21 19:40:00	12600	1584807000
+119	2020-03-21 19:50:00	12600	1584807600
+120	2020-03-21 20:00:00	12600	1584808200
+121	2020-03-21 20:10:00	12600	1584808800
+122	2020-03-21 20:20:00	12600	1584809400
+123	2020-03-21 20:30:00	12600	1584810000
+124	2020-03-21 20:40:00	12600	1584810600
+125	2020-03-21 20:50:00	12600	1584811200
+126	2020-03-21 21:00:00	12600	1584811800
+127	2020-03-21 21:10:00	12600	1584812400
+128	2020-03-21 21:20:00	12600	1584813000
+129	2020-03-21 21:30:00	12600	1584813600
+130	2020-03-21 21:40:00	12600	1584814200
+131	2020-03-21 21:50:00	12600	1584814800
+132	2020-03-21 22:00:00	12600	1584815400
+133	2020-03-21 22:10:00	12600	1584816000
+134	2020-03-21 22:20:00	12600	1584816600
+135	2020-03-21 22:30:00	12600	1584817200
+136	2020-03-21 22:40:00	12600	1584817800
+137	2020-03-21 22:50:00	12600	1584818400
+138	2020-03-22 00:00:00	16200	1584819000
+139	2020-03-22 00:10:00	16200	1584819600
+140	2020-03-22 00:20:00	16200	1584820200
+141	2020-03-22 00:30:00	16200	1584820800
+142	2020-03-22 00:40:00	16200	1584821400
+143	2020-03-22 00:50:00	16200	1584822000
+144	2020-03-22 01:00:00	16200	1584822600
+145	2020-03-22 01:10:00	16200	1584823200
+146	2020-03-22 01:20:00	16200	1584823800
+147	2020-03-22 01:30:00	16200	1584824400
+148	2020-03-22 01:40:00	16200	1584825000
+149	2020-03-22 01:50:00	16200	1584825600
+150	2020-03-22 02:00:00	16200	1584826200
+151	2020-03-22 02:10:00	16200	1584826800
+152	2020-03-22 02:20:00	16200	1584827400
+153	2020-03-22 02:30:00	16200	1584828000
+154	2020-03-22 02:40:00	16200	1584828600
+155	2020-03-22 02:50:00	16200	1584829200
+156	2020-03-22 03:00:00	16200	1584829800
+157	2020-03-22 03:10:00	16200	1584830400
+158	2020-03-22 03:20:00	16200	1584831000
+159	2020-03-22 03:30:00	16200	1584831600
+160	2020-03-22 03:40:00	16200	1584832200
+161	2020-03-22 03:50:00	16200	1584832800
+162	2020-03-22 04:00:00	16200	1584833400
+163	2020-03-22 04:10:00	16200	1584834000
+164	2020-03-22 04:20:00	16200	1584834600
+165	2020-03-22 04:30:00	16200	1584835200
+166	2020-03-22 04:40:00	16200	1584835800
+167	2020-03-22 04:50:00	16200	1584836400
+168	2020-03-22 05:00:00	16200	1584837000
+169	2020-03-22 05:10:00	16200	1584837600
+170	2020-03-22 05:20:00	16200	1584838200
+171	2020-03-22 05:30:00	16200	1584838800
+172	2020-03-22 05:40:00	16200	1584839400
+173	2020-03-22 05:50:00	16200	1584840000
+174	2020-03-22 06:00:00	16200	1584840600
+175	2020-03-22 06:10:00	16200	1584841200
+176	2020-03-22 06:20:00	16200	1584841800
+177	2020-03-22 06:30:00	16200	1584842400
+178	2020-03-22 06:40:00	16200	1584843000
+179	2020-03-22 06:50:00	16200	1584843600
+180	2020-03-22 07:00:00	16200	1584844200
+181	2020-03-22 07:10:00	16200	1584844800
+182	2020-03-22 07:20:00	16200	1584845400
+183	2020-03-22 07:30:00	16200	1584846000
+184	2020-03-22 07:40:00	16200	1584846600
+185	2020-03-22 07:50:00	16200	1584847200
+186	2020-03-22 08:00:00	16200	1584847800
+187	2020-03-22 08:10:00	16200	1584848400
+188	2020-03-22 08:20:00	16200	1584849000
+189	2020-03-22 08:30:00	16200	1584849600
+190	2020-03-22 08:40:00	16200	1584850200
+191	2020-03-22 08:50:00	16200	1584850800
+192	2020-03-22 09:00:00	16200	1584851400
+193	2020-03-22 09:10:00	16200	1584852000
+194	2020-03-22 09:20:00	16200	1584852600
+195	2020-03-22 09:30:00	16200	1584853200
+196	2020-03-22 09:40:00	16200	1584853800
+197	2020-03-22 09:50:00	16200	1584854400
+198	2020-03-22 10:00:00	16200	1584855000
+199	2020-03-22 10:10:00	16200	1584855600
+0	2020-09-20 00:00:00	16200	1600543800
+1	2020-09-20 00:10:00	16200	1600544400
+2	2020-09-20 00:20:00	16200	1600545000
+3	2020-09-20 00:30:00	16200	1600545600
+4	2020-09-20 00:40:00	16200	1600546200
+5	2020-09-20 00:50:00	16200	1600546800
+6	2020-09-20 01:00:00	16200	1600547400
+7	2020-09-20 01:10:00	16200	1600548000
+8	2020-09-20 01:20:00	16200	1600548600
+9	2020-09-20 01:30:00	16200	1600549200
+10	2020-09-20 01:40:00	16200	1600549800
+11	2020-09-20 01:50:00	16200	1600550400
+12	2020-09-20 02:00:00	16200	1600551000
+13	2020-09-20 02:10:00	16200	1600551600
+14	2020-09-20 02:20:00	16200	1600552200
+15	2020-09-20 02:30:00	16200	1600552800
+16	2020-09-20 02:40:00	16200	1600553400
+17	2020-09-20 02:50:00	16200	1600554000
+18	2020-09-20 03:00:00	16200	1600554600
+19	2020-09-20 03:10:00	16200	1600555200
+20	2020-09-20 03:20:00	16200	1600555800
+21	2020-09-20 03:30:00	16200	1600556400
+22	2020-09-20 03:40:00	16200	1600557000
+23	2020-09-20 03:50:00	16200	1600557600
+24	2020-09-20 04:00:00	16200	1600558200
+25	2020-09-20 04:10:00	16200	1600558800
+26	2020-09-20 04:20:00	16200	1600559400
+27	2020-09-20 04:30:00	16200	1600560000
+28	2020-09-20 04:40:00	16200	1600560600
+29	2020-09-20 04:50:00	16200	1600561200
+30	2020-09-20 05:00:00	16200	1600561800
+31	2020-09-20 05:10:00	16200	1600562400
+32	2020-09-20 05:20:00	16200	1600563000
+33	2020-09-20 05:30:00	16200	1600563600
+34	2020-09-20 05:40:00	16200	1600564200
+35	2020-09-20 05:50:00	16200	1600564800
+36	2020-09-20 06:00:00	16200	1600565400
+37	2020-09-20 06:10:00	16200	1600566000
+38	2020-09-20 06:20:00	16200	1600566600
+39	2020-09-20 06:30:00	16200	1600567200
+40	2020-09-20 06:40:00	16200	1600567800
+41	2020-09-20 06:50:00	16200	1600568400
+42	2020-09-20 07:00:00	16200	1600569000
+43	2020-09-20 07:10:00	16200	1600569600
+44	2020-09-20 07:20:00	16200	1600570200
+45	2020-09-20 07:30:00	16200	1600570800
+46	2020-09-20 07:40:00	16200	1600571400
+47	2020-09-20 07:50:00	16200	1600572000
+48	2020-09-20 08:00:00	16200	1600572600
+49	2020-09-20 08:10:00	16200	1600573200
+50	2020-09-20 08:20:00	16200	1600573800
+51	2020-09-20 08:30:00	16200	1600574400
+52	2020-09-20 08:40:00	16200	1600575000
+53	2020-09-20 08:50:00	16200	1600575600
+54	2020-09-20 09:00:00	16200	1600576200
+55	2020-09-20 09:10:00	16200	1600576800
+56	2020-09-20 09:20:00	16200	1600577400
+57	2020-09-20 09:30:00	16200	1600578000
+58	2020-09-20 09:40:00	16200	1600578600
+59	2020-09-20 09:50:00	16200	1600579200
+60	2020-09-20 10:00:00	16200	1600579800
+61	2020-09-20 10:10:00	16200	1600580400
+62	2020-09-20 10:20:00	16200	1600581000
+63	2020-09-20 10:30:00	16200	1600581600
+64	2020-09-20 10:40:00	16200	1600582200
+65	2020-09-20 10:50:00	16200	1600582800
+66	2020-09-20 11:00:00	16200	1600583400
+67	2020-09-20 11:10:00	16200	1600584000
+68	2020-09-20 11:20:00	16200	1600584600
+69	2020-09-20 11:30:00	16200	1600585200
+70	2020-09-20 11:40:00	16200	1600585800
+71	2020-09-20 11:50:00	16200	1600586400
+72	2020-09-20 12:00:00	16200	1600587000
+73	2020-09-20 12:10:00	16200	1600587600
+74	2020-09-20 12:20:00	16200	1600588200
+75	2020-09-20 12:30:00	16200	1600588800
+76	2020-09-20 12:40:00	16200	1600589400
+77	2020-09-20 12:50:00	16200	1600590000
+78	2020-09-20 13:00:00	16200	1600590600
+79	2020-09-20 13:10:00	16200	1600591200
+80	2020-09-20 13:20:00	16200	1600591800
+81	2020-09-20 13:30:00	16200	1600592400
+82	2020-09-20 13:40:00	16200	1600593000
+83	2020-09-20 13:50:00	16200	1600593600
+84	2020-09-20 14:00:00	16200	1600594200
+85	2020-09-20 14:10:00	16200	1600594800
+86	2020-09-20 14:20:00	16200	1600595400
+87	2020-09-20 14:30:00	16200	1600596000
+88	2020-09-20 14:40:00	16200	1600596600
+89	2020-09-20 14:50:00	16200	1600597200
+90	2020-09-20 15:00:00	16200	1600597800
+91	2020-09-20 15:10:00	16200	1600598400
+92	2020-09-20 15:20:00	16200	1600599000
+93	2020-09-20 15:30:00	16200	1600599600
+94	2020-09-20 15:40:00	16200	1600600200
+95	2020-09-20 15:50:00	16200	1600600800
+96	2020-09-20 16:00:00	16200	1600601400
+97	2020-09-20 16:10:00	16200	1600602000
+98	2020-09-20 16:20:00	16200	1600602600
+99	2020-09-20 16:30:00	16200	1600603200
+100	2020-09-20 16:40:00	16200	1600603800
+101	2020-09-20 16:50:00	16200	1600604400
+102	2020-09-20 17:00:00	16200	1600605000
+103	2020-09-20 17:10:00	16200	1600605600
+104	2020-09-20 17:20:00	16200	1600606200
+105	2020-09-20 17:30:00	16200	1600606800
+106	2020-09-20 17:40:00	16200	1600607400
+107	2020-09-20 17:50:00	16200	1600608000
+108	2020-09-20 18:00:00	16200	1600608600
+109	2020-09-20 18:10:00	16200	1600609200
+110	2020-09-20 18:20:00	16200	1600609800
+111	2020-09-20 18:30:00	16200	1600610400
+112	2020-09-20 18:40:00	16200	1600611000
+113	2020-09-20 18:50:00	16200	1600611600
+114	2020-09-20 19:00:00	16200	1600612200
+115	2020-09-20 19:10:00	16200	1600612800
+116	2020-09-20 19:20:00	16200	1600613400
+117	2020-09-20 19:30:00	16200	1600614000
+118	2020-09-20 19:40:00	16200	1600614600
+119	2020-09-20 19:50:00	16200	1600615200
+120	2020-09-20 20:00:00	16200	1600615800
+121	2020-09-20 20:10:00	16200	1600616400
+122	2020-09-20 20:20:00	16200	1600617000
+123	2020-09-20 20:30:00	16200	1600617600
+124	2020-09-20 20:40:00	16200	1600618200
+125	2020-09-20 20:50:00	16200	1600618800
+126	2020-09-20 21:00:00	16200	1600619400
+127	2020-09-20 21:10:00	16200	1600620000
+128	2020-09-20 21:20:00	16200	1600620600
+129	2020-09-20 21:30:00	16200	1600621200
+130	2020-09-20 21:40:00	16200	1600621800
+131	2020-09-20 21:50:00	16200	1600622400
+132	2020-09-20 22:00:00	16200	1600623000
+133	2020-09-20 22:10:00	16200	1600623600
+134	2020-09-20 22:20:00	16200	1600624200
+135	2020-09-20 22:30:00	16200	1600624800
+136	2020-09-20 22:40:00	16200	1600625400
+137	2020-09-20 22:50:00	16200	1600626000
+138	2020-09-20 23:00:00	16200	1600626600
+139	2020-09-20 23:10:00	16200	1600627200
+140	2020-09-20 23:20:00	16200	1600627800
+141	2020-09-20 23:30:00	16200	1600628400
+142	2020-09-20 23:40:00	16200	1600629000
+143	2020-09-20 23:50:00	16200	1600629600
+144	2020-09-20 23:00:00	12600	1600630200
+145	2020-09-20 23:10:00	12600	1600630800
+146	2020-09-20 23:20:00	12600	1600631400
+147	2020-09-20 23:30:00	12600	1600632000
+148	2020-09-20 23:40:00	12600	1600632600
+149	2020-09-20 23:50:00	12600	1600633200
+150	2020-09-21 00:00:00	12600	1600633800
+151	2020-09-21 00:10:00	12600	1600634400
+152	2020-09-21 00:20:00	12600	1600635000
+153	2020-09-21 00:30:00	12600	1600635600
+154	2020-09-21 00:40:00	12600	1600636200
+155	2020-09-21 00:50:00	12600	1600636800
+156	2020-09-21 01:00:00	12600	1600637400
+157	2020-09-21 01:10:00	12600	1600638000
+158	2020-09-21 01:20:00	12600	1600638600
+159	2020-09-21 01:30:00	12600	1600639200
+160	2020-09-21 01:40:00	12600	1600639800
+161	2020-09-21 01:50:00	12600	1600640400
+162	2020-09-21 02:00:00	12600	1600641000
+163	2020-09-21 02:10:00	12600	1600641600
+164	2020-09-21 02:20:00	12600	1600642200
+165	2020-09-21 02:30:00	12600	1600642800
+166	2020-09-21 02:40:00	12600	1600643400
+167	2020-09-21 02:50:00	12600	1600644000
+168	2020-09-21 03:00:00	12600	1600644600
+169	2020-09-21 03:10:00	12600	1600645200
+170	2020-09-21 03:20:00	12600	1600645800
+171	2020-09-21 03:30:00	12600	1600646400
+172	2020-09-21 03:40:00	12600	1600647000
+173	2020-09-21 03:50:00	12600	1600647600
+174	2020-09-21 04:00:00	12600	1600648200
+175	2020-09-21 04:10:00	12600	1600648800
+176	2020-09-21 04:20:00	12600	1600649400
+177	2020-09-21 04:30:00	12600	1600650000
+178	2020-09-21 04:40:00	12600	1600650600
+179	2020-09-21 04:50:00	12600	1600651200
+180	2020-09-21 05:00:00	12600	1600651800
+181	2020-09-21 05:10:00	12600	1600652400
+182	2020-09-21 05:20:00	12600	1600653000
+183	2020-09-21 05:30:00	12600	1600653600
+184	2020-09-21 05:40:00	12600	1600654200
+185	2020-09-21 05:50:00	12600	1600654800
+186	2020-09-21 06:00:00	12600	1600655400
+187	2020-09-21 06:10:00	12600	1600656000
+188	2020-09-21 06:20:00	12600	1600656600
+189	2020-09-21 06:30:00	12600	1600657200
+190	2020-09-21 06:40:00	12600	1600657800
+191	2020-09-21 06:50:00	12600	1600658400
+192	2020-09-21 07:00:00	12600	1600659000
+193	2020-09-21 07:10:00	12600	1600659600
+194	2020-09-21 07:20:00	12600	1600660200
+195	2020-09-21 07:30:00	12600	1600660800
+196	2020-09-21 07:40:00	12600	1600661400
+197	2020-09-21 07:50:00	12600	1600662000
+198	2020-09-21 08:00:00	12600	1600662600
+199	2020-09-21 08:10:00	12600	1600663200
+37800
+39600
+0	2020-10-03 00:00:00	37800	1601645400
+1	2020-10-03 00:10:00	37800	1601646000
+2	2020-10-03 00:20:00	37800	1601646600
+3	2020-10-03 00:30:00	37800	1601647200
+4	2020-10-03 00:40:00	37800	1601647800
+5	2020-10-03 00:50:00	37800	1601648400
+6	2020-10-03 01:00:00	37800	1601649000
+7	2020-10-03 01:10:00	37800	1601649600
+8	2020-10-03 01:20:00	37800	1601650200
+9	2020-10-03 01:30:00	37800	1601650800
+10	2020-10-03 01:40:00	37800	1601651400
+11	2020-10-03 01:50:00	37800	1601652000
+12	2020-10-03 02:00:00	37800	1601652600
+13	2020-10-03 02:10:00	37800	1601653200
+14	2020-10-03 02:20:00	37800	1601653800
+15	2020-10-03 02:30:00	37800	1601654400
+16	2020-10-03 02:40:00	37800	1601655000
+17	2020-10-03 02:50:00	37800	1601655600
+18	2020-10-03 03:00:00	37800	1601656200
+19	2020-10-03 03:10:00	37800	1601656800
+20	2020-10-03 03:20:00	37800	1601657400
+21	2020-10-03 03:30:00	37800	1601658000
+22	2020-10-03 03:40:00	37800	1601658600
+23	2020-10-03 03:50:00	37800	1601659200
+24	2020-10-03 04:00:00	37800	1601659800
+25	2020-10-03 04:10:00	37800	1601660400
+26	2020-10-03 04:20:00	37800	1601661000
+27	2020-10-03 04:30:00	37800	1601661600
+28	2020-10-03 04:40:00	37800	1601662200
+29	2020-10-03 04:50:00	37800	1601662800
+30	2020-10-03 05:00:00	37800	1601663400
+31	2020-10-03 05:10:00	37800	1601664000
+32	2020-10-03 05:20:00	37800	1601664600
+33	2020-10-03 05:30:00	37800	1601665200
+34	2020-10-03 05:40:00	37800	1601665800
+35	2020-10-03 05:50:00	37800	1601666400
+36	2020-10-03 06:00:00	37800	1601667000
+37	2020-10-03 06:10:00	37800	1601667600
+38	2020-10-03 06:20:00	37800	1601668200
+39	2020-10-03 06:30:00	37800	1601668800
+40	2020-10-03 06:40:00	37800	1601669400
+41	2020-10-03 06:50:00	37800	1601670000
+42	2020-10-03 07:00:00	37800	1601670600
+43	2020-10-03 07:10:00	37800	1601671200
+44	2020-10-03 07:20:00	37800	1601671800
+45	2020-10-03 07:30:00	37800	1601672400
+46	2020-10-03 07:40:00	37800	1601673000
+47	2020-10-03 07:50:00	37800	1601673600
+48	2020-10-03 08:00:00	37800	1601674200
+49	2020-10-03 08:10:00	37800	1601674800
+50	2020-10-03 08:20:00	37800	1601675400
+51	2020-10-03 08:30:00	37800	1601676000
+52	2020-10-03 08:40:00	37800	1601676600
+53	2020-10-03 08:50:00	37800	1601677200
+54	2020-10-03 09:00:00	37800	1601677800
+55	2020-10-03 09:10:00	37800	1601678400
+56	2020-10-03 09:20:00	37800	1601679000
+57	2020-10-03 09:30:00	37800	1601679600
+58	2020-10-03 09:40:00	37800	1601680200
+59	2020-10-03 09:50:00	37800	1601680800
+60	2020-10-03 10:00:00	37800	1601681400
+61	2020-10-03 10:10:00	37800	1601682000
+62	2020-10-03 10:20:00	37800	1601682600
+63	2020-10-03 10:30:00	37800	1601683200
+64	2020-10-03 10:40:00	37800	1601683800
+65	2020-10-03 10:50:00	37800	1601684400
+66	2020-10-03 11:00:00	37800	1601685000
+67	2020-10-03 11:10:00	37800	1601685600
+68	2020-10-03 11:20:00	37800	1601686200
+69	2020-10-03 11:30:00	37800	1601686800
+70	2020-10-03 11:40:00	37800	1601687400
+71	2020-10-03 11:50:00	37800	1601688000
+72	2020-10-03 12:00:00	37800	1601688600
+73	2020-10-03 12:10:00	37800	1601689200
+74	2020-10-03 12:20:00	37800	1601689800
+75	2020-10-03 12:30:00	37800	1601690400
+76	2020-10-03 12:40:00	37800	1601691000
+77	2020-10-03 12:50:00	37800	1601691600
+78	2020-10-03 13:00:00	37800	1601692200
+79	2020-10-03 13:10:00	37800	1601692800
+80	2020-10-03 13:20:00	37800	1601693400
+81	2020-10-03 13:30:00	37800	1601694000
+82	2020-10-03 13:40:00	37800	1601694600
+83	2020-10-03 13:50:00	37800	1601695200
+84	2020-10-03 14:00:00	37800	1601695800
+85	2020-10-03 14:10:00	37800	1601696400
+86	2020-10-03 14:20:00	37800	1601697000
+87	2020-10-03 14:30:00	37800	1601697600
+88	2020-10-03 14:40:00	37800	1601698200
+89	2020-10-03 14:50:00	37800	1601698800
+90	2020-10-03 15:00:00	37800	1601699400
+91	2020-10-03 15:10:00	37800	1601700000
+92	2020-10-03 15:20:00	37800	1601700600
+93	2020-10-03 15:30:00	37800	1601701200
+94	2020-10-03 15:40:00	37800	1601701800
+95	2020-10-03 15:50:00	37800	1601702400
+96	2020-10-03 16:00:00	37800	1601703000
+97	2020-10-03 16:10:00	37800	1601703600
+98	2020-10-03 16:20:00	37800	1601704200
+99	2020-10-03 16:30:00	37800	1601704800
+100	2020-10-03 16:40:00	37800	1601705400
+101	2020-10-03 16:50:00	37800	1601706000
+102	2020-10-03 17:00:00	37800	1601706600
+103	2020-10-03 17:10:00	37800	1601707200
+104	2020-10-03 17:20:00	37800	1601707800
+105	2020-10-03 17:30:00	37800	1601708400
+106	2020-10-03 17:40:00	37800	1601709000
+107	2020-10-03 17:50:00	37800	1601709600
+108	2020-10-03 18:00:00	37800	1601710200
+109	2020-10-03 18:10:00	37800	1601710800
+110	2020-10-03 18:20:00	37800	1601711400
+111	2020-10-03 18:30:00	37800	1601712000
+112	2020-10-03 18:40:00	37800	1601712600
+113	2020-10-03 18:50:00	37800	1601713200
+114	2020-10-03 19:00:00	37800	1601713800
+115	2020-10-03 19:10:00	37800	1601714400
+116	2020-10-03 19:20:00	37800	1601715000
+117	2020-10-03 19:30:00	37800	1601715600
+118	2020-10-03 19:40:00	37800	1601716200
+119	2020-10-03 19:50:00	37800	1601716800
+120	2020-10-03 20:00:00	37800	1601717400
+121	2020-10-03 20:10:00	37800	1601718000
+122	2020-10-03 20:20:00	37800	1601718600
+123	2020-10-03 20:30:00	37800	1601719200
+124	2020-10-03 20:40:00	37800	1601719800
+125	2020-10-03 20:50:00	37800	1601720400
+126	2020-10-03 21:00:00	37800	1601721000
+127	2020-10-03 21:10:00	37800	1601721600
+128	2020-10-03 21:20:00	37800	1601722200
+129	2020-10-03 21:30:00	37800	1601722800
+130	2020-10-03 21:40:00	37800	1601723400
+131	2020-10-03 21:50:00	37800	1601724000
+132	2020-10-03 22:00:00	37800	1601724600
+133	2020-10-03 22:10:00	37800	1601725200
+134	2020-10-03 22:20:00	37800	1601725800
+135	2020-10-03 22:30:00	37800	1601726400
+136	2020-10-03 22:40:00	37800	1601727000
+137	2020-10-03 22:50:00	37800	1601727600
+138	2020-10-03 23:00:00	37800	1601728200
+139	2020-10-03 23:10:00	37800	1601728800
+140	2020-10-03 23:20:00	37800	1601729400
+141	2020-10-03 23:30:00	37800	1601730000
+142	2020-10-03 23:40:00	37800	1601730600
+143	2020-10-03 23:50:00	37800	1601731200
+144	2020-10-04 00:00:00	37800	1601731800
+145	2020-10-04 00:10:00	37800	1601732400
+146	2020-10-04 00:20:00	37800	1601733000
+147	2020-10-04 00:30:00	37800	1601733600
+148	2020-10-04 00:40:00	37800	1601734200
+149	2020-10-04 00:50:00	37800	1601734800
+150	2020-10-04 01:00:00	37800	1601735400
+151	2020-10-04 01:10:00	37800	1601736000
+152	2020-10-04 01:20:00	37800	1601736600
+153	2020-10-04 01:30:00	37800	1601737200
+154	2020-10-04 01:40:00	37800	1601737800
+155	2020-10-04 01:50:00	37800	1601738400
+156	2020-10-04 02:00:00	39600	1601739000
+157	2020-10-04 02:10:00	39600	1601739600
+158	2020-10-04 02:20:00	39600	1601740200
+159	2020-10-04 03:30:00	39600	1601740800
+160	2020-10-04 03:40:00	39600	1601741400
+161	2020-10-04 03:50:00	39600	1601742000
+162	2020-10-04 03:00:00	39600	1601742600
+163	2020-10-04 03:10:00	39600	1601743200
+164	2020-10-04 03:20:00	39600	1601743800
+165	2020-10-04 04:30:00	39600	1601744400
+166	2020-10-04 04:40:00	39600	1601745000
+167	2020-10-04 04:50:00	39600	1601745600
+168	2020-10-04 04:00:00	39600	1601746200
+169	2020-10-04 04:10:00	39600	1601746800
+170	2020-10-04 04:20:00	39600	1601747400
+171	2020-10-04 05:30:00	39600	1601748000
+172	2020-10-04 05:40:00	39600	1601748600
+173	2020-10-04 05:50:00	39600	1601749200
+174	2020-10-04 05:00:00	39600	1601749800
+175	2020-10-04 05:10:00	39600	1601750400
+176	2020-10-04 05:20:00	39600	1601751000
+177	2020-10-04 06:30:00	39600	1601751600
+178	2020-10-04 06:40:00	39600	1601752200
+179	2020-10-04 06:50:00	39600	1601752800
+180	2020-10-04 06:00:00	39600	1601753400
+181	2020-10-04 06:10:00	39600	1601754000
+182	2020-10-04 06:20:00	39600	1601754600
+183	2020-10-04 07:30:00	39600	1601755200
+184	2020-10-04 07:40:00	39600	1601755800
+185	2020-10-04 07:50:00	39600	1601756400
+186	2020-10-04 07:00:00	39600	1601757000
+187	2020-10-04 07:10:00	39600	1601757600
+188	2020-10-04 07:20:00	39600	1601758200
+189	2020-10-04 08:30:00	39600	1601758800
+190	2020-10-04 08:40:00	39600	1601759400
+191	2020-10-04 08:50:00	39600	1601760000
+192	2020-10-04 08:00:00	39600	1601760600
+193	2020-10-04 08:10:00	39600	1601761200
+194	2020-10-04 08:20:00	39600	1601761800
+195	2020-10-04 09:30:00	39600	1601762400
+196	2020-10-04 09:40:00	39600	1601763000
+197	2020-10-04 09:50:00	39600	1601763600
+198	2020-10-04 09:00:00	39600	1601764200
+199	2020-10-04 09:10:00	39600	1601764800
+0	2019-04-06 00:00:00	39600	1554469200
+1	2019-04-06 00:10:00	39600	1554469800
+2	2019-04-06 00:20:00	39600	1554470400
+3	2019-04-06 00:30:00	39600	1554471000
+4	2019-04-06 00:40:00	39600	1554471600
+5	2019-04-06 00:50:00	39600	1554472200
+6	2019-04-06 01:00:00	39600	1554472800
+7	2019-04-06 01:10:00	39600	1554473400
+8	2019-04-06 01:20:00	39600	1554474000
+9	2019-04-06 01:30:00	39600	1554474600
+10	2019-04-06 01:40:00	39600	1554475200
+11	2019-04-06 01:50:00	39600	1554475800
+12	2019-04-06 02:00:00	39600	1554476400
+13	2019-04-06 02:10:00	39600	1554477000
+14	2019-04-06 02:20:00	39600	1554477600
+15	2019-04-06 02:30:00	39600	1554478200
+16	2019-04-06 02:40:00	39600	1554478800
+17	2019-04-06 02:50:00	39600	1554479400
+18	2019-04-06 03:00:00	39600	1554480000
+19	2019-04-06 03:10:00	39600	1554480600
+20	2019-04-06 03:20:00	39600	1554481200
+21	2019-04-06 03:30:00	39600	1554481800
+22	2019-04-06 03:40:00	39600	1554482400
+23	2019-04-06 03:50:00	39600	1554483000
+24	2019-04-06 04:00:00	39600	1554483600
+25	2019-04-06 04:10:00	39600	1554484200
+26	2019-04-06 04:20:00	39600	1554484800
+27	2019-04-06 04:30:00	39600	1554485400
+28	2019-04-06 04:40:00	39600	1554486000
+29	2019-04-06 04:50:00	39600	1554486600
+30	2019-04-06 05:00:00	39600	1554487200
+31	2019-04-06 05:10:00	39600	1554487800
+32	2019-04-06 05:20:00	39600	1554488400
+33	2019-04-06 05:30:00	39600	1554489000
+34	2019-04-06 05:40:00	39600	1554489600
+35	2019-04-06 05:50:00	39600	1554490200
+36	2019-04-06 06:00:00	39600	1554490800
+37	2019-04-06 06:10:00	39600	1554491400
+38	2019-04-06 06:20:00	39600	1554492000
+39	2019-04-06 06:30:00	39600	1554492600
+40	2019-04-06 06:40:00	39600	1554493200
+41	2019-04-06 06:50:00	39600	1554493800
+42	2019-04-06 07:00:00	39600	1554494400
+43	2019-04-06 07:10:00	39600	1554495000
+44	2019-04-06 07:20:00	39600	1554495600
+45	2019-04-06 07:30:00	39600	1554496200
+46	2019-04-06 07:40:00	39600	1554496800
+47	2019-04-06 07:50:00	39600	1554497400
+48	2019-04-06 08:00:00	39600	1554498000
+49	2019-04-06 08:10:00	39600	1554498600
+50	2019-04-06 08:20:00	39600	1554499200
+51	2019-04-06 08:30:00	39600	1554499800
+52	2019-04-06 08:40:00	39600	1554500400
+53	2019-04-06 08:50:00	39600	1554501000
+54	2019-04-06 09:00:00	39600	1554501600
+55	2019-04-06 09:10:00	39600	1554502200
+56	2019-04-06 09:20:00	39600	1554502800
+57	2019-04-06 09:30:00	39600	1554503400
+58	2019-04-06 09:40:00	39600	1554504000
+59	2019-04-06 09:50:00	39600	1554504600
+60	2019-04-06 10:00:00	39600	1554505200
+61	2019-04-06 10:10:00	39600	1554505800
+62	2019-04-06 10:20:00	39600	1554506400
+63	2019-04-06 10:30:00	39600	1554507000
+64	2019-04-06 10:40:00	39600	1554507600
+65	2019-04-06 10:50:00	39600	1554508200
+66	2019-04-06 11:00:00	39600	1554508800
+67	2019-04-06 11:10:00	39600	1554509400
+68	2019-04-06 11:20:00	39600	1554510000
+69	2019-04-06 11:30:00	39600	1554510600
+70	2019-04-06 11:40:00	39600	1554511200
+71	2019-04-06 11:50:00	39600	1554511800
+72	2019-04-06 12:00:00	39600	1554512400
+73	2019-04-06 12:10:00	39600	1554513000
+74	2019-04-06 12:20:00	39600	1554513600
+75	2019-04-06 12:30:00	39600	1554514200
+76	2019-04-06 12:40:00	39600	1554514800
+77	2019-04-06 12:50:00	39600	1554515400
+78	2019-04-06 13:00:00	39600	1554516000
+79	2019-04-06 13:10:00	39600	1554516600
+80	2019-04-06 13:20:00	39600	1554517200
+81	2019-04-06 13:30:00	39600	1554517800
+82	2019-04-06 13:40:00	39600	1554518400
+83	2019-04-06 13:50:00	39600	1554519000
+84	2019-04-06 14:00:00	39600	1554519600
+85	2019-04-06 14:10:00	39600	1554520200
+86	2019-04-06 14:20:00	39600	1554520800
+87	2019-04-06 14:30:00	39600	1554521400
+88	2019-04-06 14:40:00	39600	1554522000
+89	2019-04-06 14:50:00	39600	1554522600
+90	2019-04-06 15:00:00	39600	1554523200
+91	2019-04-06 15:10:00	39600	1554523800
+92	2019-04-06 15:20:00	39600	1554524400
+93	2019-04-06 15:30:00	39600	1554525000
+94	2019-04-06 15:40:00	39600	1554525600
+95	2019-04-06 15:50:00	39600	1554526200
+96	2019-04-06 16:00:00	39600	1554526800
+97	2019-04-06 16:10:00	39600	1554527400
+98	2019-04-06 16:20:00	39600	1554528000
+99	2019-04-06 16:30:00	39600	1554528600
+100	2019-04-06 16:40:00	39600	1554529200
+101	2019-04-06 16:50:00	39600	1554529800
+102	2019-04-06 17:00:00	39600	1554530400
+103	2019-04-06 17:10:00	39600	1554531000
+104	2019-04-06 17:20:00	39600	1554531600
+105	2019-04-06 17:30:00	39600	1554532200
+106	2019-04-06 17:40:00	39600	1554532800
+107	2019-04-06 17:50:00	39600	1554533400
+108	2019-04-06 18:00:00	39600	1554534000
+109	2019-04-06 18:10:00	39600	1554534600
+110	2019-04-06 18:20:00	39600	1554535200
+111	2019-04-06 18:30:00	39600	1554535800
+112	2019-04-06 18:40:00	39600	1554536400
+113	2019-04-06 18:50:00	39600	1554537000
+114	2019-04-06 19:00:00	39600	1554537600
+115	2019-04-06 19:10:00	39600	1554538200
+116	2019-04-06 19:20:00	39600	1554538800
+117	2019-04-06 19:30:00	39600	1554539400
+118	2019-04-06 19:40:00	39600	1554540000
+119	2019-04-06 19:50:00	39600	1554540600
+120	2019-04-06 20:00:00	39600	1554541200
+121	2019-04-06 20:10:00	39600	1554541800
+122	2019-04-06 20:20:00	39600	1554542400
+123	2019-04-06 20:30:00	39600	1554543000
+124	2019-04-06 20:40:00	39600	1554543600
+125	2019-04-06 20:50:00	39600	1554544200
+126	2019-04-06 21:00:00	39600	1554544800
+127	2019-04-06 21:10:00	39600	1554545400
+128	2019-04-06 21:20:00	39600	1554546000
+129	2019-04-06 21:30:00	39600	1554546600
+130	2019-04-06 21:40:00	39600	1554547200
+131	2019-04-06 21:50:00	39600	1554547800
+132	2019-04-06 22:00:00	39600	1554548400
+133	2019-04-06 22:10:00	39600	1554549000
+134	2019-04-06 22:20:00	39600	1554549600
+135	2019-04-06 22:30:00	39600	1554550200
+136	2019-04-06 22:40:00	39600	1554550800
+137	2019-04-06 22:50:00	39600	1554551400
+138	2019-04-06 23:00:00	39600	1554552000
+139	2019-04-06 23:10:00	39600	1554552600
+140	2019-04-06 23:20:00	39600	1554553200
+141	2019-04-06 23:30:00	39600	1554553800
+142	2019-04-06 23:40:00	39600	1554554400
+143	2019-04-06 23:50:00	39600	1554555000
+144	2019-04-07 00:00:00	39600	1554555600
+145	2019-04-07 00:10:00	39600	1554556200
+146	2019-04-07 00:20:00	39600	1554556800
+147	2019-04-07 00:30:00	39600	1554557400
+148	2019-04-07 00:40:00	39600	1554558000
+149	2019-04-07 00:50:00	39600	1554558600
+150	2019-04-07 01:00:00	39600	1554559200
+151	2019-04-07 01:10:00	39600	1554559800
+152	2019-04-07 01:20:00	39600	1554560400
+153	2019-04-07 01:30:00	39600	1554561000
+154	2019-04-07 01:40:00	39600	1554561600
+155	2019-04-07 01:50:00	39600	1554562200
+156	2019-04-07 01:00:00	37800	1554562800
+157	2019-04-07 01:10:00	37800	1554563400
+158	2019-04-07 01:20:00	37800	1554564000
+159	2019-04-07 02:30:00	37800	1554564600
+160	2019-04-07 02:40:00	37800	1554565200
+161	2019-04-07 02:50:00	37800	1554565800
+162	2019-04-07 02:00:00	37800	1554566400
+163	2019-04-07 02:10:00	37800	1554567000
+164	2019-04-07 02:20:00	37800	1554567600
+165	2019-04-07 03:30:00	37800	1554568200
+166	2019-04-07 03:40:00	37800	1554568800
+167	2019-04-07 03:50:00	37800	1554569400
+168	2019-04-07 03:00:00	37800	1554570000
+169	2019-04-07 03:10:00	37800	1554570600
+170	2019-04-07 03:20:00	37800	1554571200
+171	2019-04-07 04:30:00	37800	1554571800
+172	2019-04-07 04:40:00	37800	1554572400
+173	2019-04-07 04:50:00	37800	1554573000
+174	2019-04-07 04:00:00	37800	1554573600
+175	2019-04-07 04:10:00	37800	1554574200
+176	2019-04-07 04:20:00	37800	1554574800
+177	2019-04-07 05:30:00	37800	1554575400
+178	2019-04-07 05:40:00	37800	1554576000
+179	2019-04-07 05:50:00	37800	1554576600
+180	2019-04-07 05:00:00	37800	1554577200
+181	2019-04-07 05:10:00	37800	1554577800
+182	2019-04-07 05:20:00	37800	1554578400
+183	2019-04-07 06:30:00	37800	1554579000
+184	2019-04-07 06:40:00	37800	1554579600
+185	2019-04-07 06:50:00	37800	1554580200
+186	2019-04-07 06:00:00	37800	1554580800
+187	2019-04-07 06:10:00	37800	1554581400
+188	2019-04-07 06:20:00	37800	1554582000
+189	2019-04-07 07:30:00	37800	1554582600
+190	2019-04-07 07:40:00	37800	1554583200
+191	2019-04-07 07:50:00	37800	1554583800
+192	2019-04-07 07:00:00	37800	1554584400
+193	2019-04-07 07:10:00	37800	1554585000
+194	2019-04-07 07:20:00	37800	1554585600
+195	2019-04-07 08:30:00	37800	1554586200
+196	2019-04-07 08:40:00	37800	1554586800
+197	2019-04-07 08:50:00	37800	1554587400
+198	2019-04-07 08:00:00	37800	1554588000
+199	2019-04-07 08:10:00	37800	1554588600
diff --git a/tests/queries/0_stateless/01699_timezoneOffset.sql b/tests/queries/0_stateless/01699_timezoneOffset.sql
new file mode 100644
index 00000000000..5f6855d6732
--- /dev/null
+++ b/tests/queries/0_stateless/01699_timezoneOffset.sql
@@ -0,0 +1,20 @@
+
+/* timestamp  ==  (Europe/Moscow) */
+
+
+SELECT number,(toDateTime('1981-04-01 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(200);
+SELECT number,(toDateTime('1981-09-30 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(200);
+
+
+SELECT number,(toDateTime('2020-03-21 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(200);
+
+SELECT number,(toDateTime('2020-09-20 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(200);
+
+SELECT timezoneOffset(toDateTime('2018-08-21 22:20:00', 'Australia/Lord_Howe'));
+SELECT timezoneOffset(toDateTime('2018-02-21 22:20:00', 'Australia/Lord_Howe'));
+
+
+SELECT number,(toDateTime('2020-10-03 00:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(200);
+
+SELECT number,(toDateTime('2019-04-06 00:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(200);
+

From 4279c7da41dd66597992b1bd33ec885c3d6c4371 Mon Sep 17 00:00:00 2001
From: feng lv <fenglv15@mails.ucas.ac.cn>
Date: Tue, 2 Feb 2021 02:25:19 +0000
Subject: [PATCH 0523/1238] add setting insert_shard_id

add test

fix style

fix
---
 src/Common/ErrorCodes.cpp                     |  3 +-
 src/Core/Settings.h                           |  1 +
 .../DistributedBlockOutputStream.cpp          | 11 ++++-
 .../DistributedBlockOutputStream.h            |  4 +-
 src/Storages/StorageDistributed.cpp           | 20 +++++++---
 .../01684_insert_specify_shard_id.reference   | 40 +++++++++++++++++++
 .../01684_insert_specify_shard_id.sql         | 22 ++++++++++
 7 files changed, 90 insertions(+), 11 deletions(-)
 create mode 100644 tests/queries/0_stateless/01684_insert_specify_shard_id.reference
 create mode 100644 tests/queries/0_stateless/01684_insert_specify_shard_id.sql

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index a2cd65137c0..07df3f51546 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -537,7 +537,8 @@
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
     M(1001, STD_EXCEPTION) \
-    M(1002, UNKNOWN_EXCEPTION)
+    M(1002, UNKNOWN_EXCEPTION) \
+    M(1003, INVALID_SHARD_ID)
 
 /* See END */
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index c4cf3803913..6f30c4523e9 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -420,6 +420,7 @@ class IColumn;
     M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
     \
     M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
+    M(UInt64, insert_shard_id, 0, "If non zero, when insert intoi a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
     /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
     \
     M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \
diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index d21764bbb7d..c83133a55e6 100644
--- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -383,11 +383,18 @@ void DistributedBlockOutputStream::writeSync(const Block & block)
     bool random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key;
     size_t start = 0;
     size_t end = shards_info.size();
-    if (random_shard_insert)
+
+    if (settings.insert_shard_id)
+    {
+        start = settings.insert_shard_id - 1;
+        end = settings.insert_shard_id;
+    }
+    else if (random_shard_insert)
     {
         start = storage.getRandomShardIndex(shards_info);
         end = start + 1;
     }
+
     size_t num_shards = end - start;
 
     if (!pool)
@@ -545,7 +552,7 @@ void DistributedBlockOutputStream::writeSplitAsync(const Block & block)
 }
 
 
-void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const size_t shard_id)
+void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, size_t shard_id)
 {
     const auto & shard_info = cluster->getShardsInfo()[shard_id];
     const auto & settings = context.getSettingsRef();
diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.h b/src/Storages/Distributed/DistributedBlockOutputStream.h
index ef37776893a..ca57ad46fbb 100644
--- a/src/Storages/Distributed/DistributedBlockOutputStream.h
+++ b/src/Storages/Distributed/DistributedBlockOutputStream.h
@@ -62,10 +62,10 @@ private:
 
     void writeSplitAsync(const Block & block);
 
-    void writeAsyncImpl(const Block & block, const size_t shard_id = 0);
+    void writeAsyncImpl(const Block & block, size_t shard_id = 0);
 
     /// Increments finished_writings_count after each repeat.
-    void writeToLocal(const Block & block, const size_t repeats);
+    void writeToLocal(const Block & block, size_t repeats);
 
     void writeToShard(const Block & block, const std::vector<std::string> & dir_names);
 
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index afd7d6b876e..a9d691d8c96 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -83,6 +83,7 @@ namespace ErrorCodes
     extern const int TYPE_MISMATCH;
     extern const int TOO_MANY_ROWS;
     extern const int UNABLE_TO_SKIP_UNUSED_SHARDS;
+    extern const int INVALID_SHARD_ID;
 }
 
 namespace ActionLocks
@@ -541,22 +542,29 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const StorageMeta
     const auto & settings = context.getSettingsRef();
 
     /// Ban an attempt to make async insert into the table belonging to DatabaseMemory
-    if (!storage_policy && !owned_cluster && !settings.insert_distributed_sync)
+    if (!storage_policy && !owned_cluster && !settings.insert_distributed_sync && !settings.insert_shard_id)
     {
         throw Exception("Storage " + getName() + " must have own data directory to enable asynchronous inserts",
                         ErrorCodes::BAD_ARGUMENTS);
     }
 
+    auto shard_num = cluster->getLocalShardCount() + cluster->getRemoteShardCount();
+
     /// If sharding key is not specified, then you can only write to a shard containing only one shard
-    if (!settings.insert_distributed_one_random_shard && !has_sharding_key
-        && ((cluster->getLocalShardCount() + cluster->getRemoteShardCount()) >= 2))
+    if (!settings.insert_shard_id && !settings.insert_distributed_one_random_shard && !has_sharding_key && shard_num >= 2)
     {
-        throw Exception("Method write is not supported by storage " + getName() + " with more than one shard and no sharding key provided",
-                        ErrorCodes::STORAGE_REQUIRES_PARAMETER);
+        throw Exception(
+            "Method write is not supported by storage " + getName() + " with more than one shard and no sharding key provided",
+            ErrorCodes::STORAGE_REQUIRES_PARAMETER);
+    }
+
+    if (settings.insert_shard_id && settings.insert_shard_id > shard_num)
+    {
+        throw Exception("Shard id should be range from 1 to shard number", ErrorCodes::INVALID_SHARD_ID);
     }
 
     /// Force sync insertion if it is remote() table function
-    bool insert_sync = settings.insert_distributed_sync || owned_cluster;
+    bool insert_sync = settings.insert_distributed_sync || settings.insert_shard_id || owned_cluster;
     auto timeout = settings.insert_distributed_timeout;
 
     /// DistributedBlockOutputStream will not own cluster, but will own ConnectionPools of the cluster
diff --git a/tests/queries/0_stateless/01684_insert_specify_shard_id.reference b/tests/queries/0_stateless/01684_insert_specify_shard_id.reference
new file mode 100644
index 00000000000..e542790e401
--- /dev/null
+++ b/tests/queries/0_stateless/01684_insert_specify_shard_id.reference
@@ -0,0 +1,40 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
diff --git a/tests/queries/0_stateless/01684_insert_specify_shard_id.sql b/tests/queries/0_stateless/01684_insert_specify_shard_id.sql
new file mode 100644
index 00000000000..d4a3ae6a48d
--- /dev/null
+++ b/tests/queries/0_stateless/01684_insert_specify_shard_id.sql
@@ -0,0 +1,22 @@
+DROP TABLE IF EXISTS x;
+
+CREATE TABLE x AS system.numbers ENGINE = MergeTree ORDER BY number;
+
+CREATE TABLE x_dist as x ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), x);
+
+-- insert into first shard
+INSERT INTO x_dist SELECT * FROM numbers(10) settings insert_shard_id = 1;
+
+-- insert into second shard
+INSERT INTO x_dist SELECT * FROM numbers(10, 10) settings insert_shard_id = 2;
+
+-- invalid shard id
+INSERT INTO x_dist SELECT * FROM numbers(10) settings insert_shard_id = 3; -- { serverError 1003 }
+
+SELECT * FROM remote('127.0.0.1', currentDatabase(), x);
+SELECT * FROM remote('127.0.0.2', currentDatabase(), x);
+
+SELECT * FROM x_dist ORDER by number;
+
+DROP TABLE x;
+DROP TABLE x_dist;

From 3bc7a64abe65498eac3105622b9bb48e637ed7eb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 2 Feb 2021 08:27:07 +0300
Subject: [PATCH 0524/1238] Fix extremely inconvenient Markdown markup in
 Adopters page

---
 docs/en/introduction/adopters.md | 222 +++++++++++++++----------------
 1 file changed, 111 insertions(+), 111 deletions(-)

diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md
index 2684e6fdd3a..707a05b63e5 100644
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@@ -8,118 +8,118 @@ toc_title: Adopters
 !!! warning "Disclaimer"
     The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. We’d appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you won’t have any NDA issues by doing so. Providing updates with publications from other companies is also useful.
 
-| Company                                                                                        | Industry                        | Usecase               | Cluster Size                                               | (Un)Compressed Data Size<abbr title="of single replica"><sup>\*</sup></abbr> | Reference                                                                                                                                                                                                               |
-|------------------------------------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| <a href="https://2gis.ru" class="favicon">2gis</a>                                             | Maps                            | Monitoring            | —                                                          | —                                                                            | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw)                                                                                                                                                              |
-| <a href="https://getadmiral.com/" class="favicon">Admiral</a>                                  | Martech                         | Engagement Management | —                                                          | —                                                                            | [Webinar Slides, June 2020](https://altinity.com/presentations/2020/06/16/big-data-in-real-time-how-clickhouse-powers-admirals-visitor-relationships-for-publishers)                                                                                                                                                                                              |
-| <a href="https://cn.aliyun.com/" class="favicon">Alibaba Cloud</a> | Cloud | Managed Service | — | — | [Official Website](https://help.aliyun.com/product/144466.html)                                                                                                                                                              |
-| <a href="https://alohabrowser.com/" class="favicon">Aloha Browser</a>                          | Mobile App                      | Browser backend       | —                                                          | —                                                                            | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf)                                                                                                                                 |
-| <a href="https://amadeus.com/" class="favicon">Amadeus</a>                                     | Travel                          | Analytics             | —                                                          | —                                                                            | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms)                                        |
-| <a href="https://www.appsflyer.com" class="favicon">Appsflyer</a>                              | Mobile analytics                | Main product          | —                                                          | —                                                                            | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY)                                                                                                                                               |
-| <a href="https://arenadata.tech/" class="favicon">ArenaData</a>                                | Data Platform                   | Main product          | —                                                          | —                                                                            | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf)                                                                                             |
-| <a href="https://avito.ru/" class="favicon">Avito</a>                                | Classifieds                   | Monitoring          | —                                                          | —                                                                            | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ)                                                                                             |
-| <a href="https://badoo.com" class="favicon">Badoo</a>                                          | Dating                          | Timeseries            | —                                                          | —                                                                            | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf)                                                                                                                         |
-| <a href="https://www.benocs.com/" class="favicon">Benocs</a>                                   | Network Telemetry and Analytics | Main Product          | —                                                          | —                                                                            | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf)                                                                                                   |
+| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size<abbr title="of single replica"><sup>\*</sup></abbr> | Reference |
+|---------|----------|---------|--------------|------------------------------------------------------------------------------|-----------|
+| <a href="https://2gis.ru" class="favicon">2gis</a> | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) |
+| <a href="https://getadmiral.com/" class="favicon">Admiral</a> | Martech | Engagement Management | — | — | [Webinar Slides, June 2020](https://altinity.com/presentations/2020/06/16/big-data-in-real-time-how-clickhouse-powers-admirals-visitor-relationships-for-publishers) |
+| <a href="https://cn.aliyun.com/" class="favicon">Alibaba Cloud</a> | Cloud | Managed Service | — | — | [Official Website](https://help.aliyun.com/product/144466.html) |
+| <a href="https://alohabrowser.com/" class="favicon">Aloha Browser</a> | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) |
+| <a href="https://amadeus.com/" class="favicon">Amadeus</a> | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) |
+| <a href="https://www.appsflyer.com" class="favicon">Appsflyer</a> | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) |
+| <a href="https://arenadata.tech/" class="favicon">ArenaData</a> | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) |
+| <a href="https://avito.ru/" class="favicon">Avito</a> | Classifieds | Monitoring | — | — | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ) |
+| <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) |
+| <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
 | <a href="https://www.bigo.sg/" class="favicon">BIGO</a> | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) |
-| <a href="https://www.bloomberg.com/" class="favicon">Bloomberg</a>                             | Finance, Media                  | Monitoring            | 102 servers                                                | —                                                                            | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov)                                                                                |
-| <a href="https://bloxy.info" class="favicon">Bloxy</a>                                         | Blockchain                      | Analytics             | —                                                          | —                                                                            | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx)                                                                                              |
-| <a href="https://www.bytedance.com" class="favicon">Bytedance</a>                                              | Social platforms                            |  — | —                                                          | —                                                                            | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns)                                                                                                |
+| <a href="https://www.bloomberg.com/" class="favicon">Bloomberg</a> | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
+| <a href="https://bloxy.info" class="favicon">Bloxy</a> | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
+| <a href="https://www.bytedance.com" class="favicon">Bytedance</a> | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) |
 | <a href="https://cardsmobile.ru/" class="favicon">CardsMobile</a> | Finance | Analytics | — | — | [VC.ru](https://vc.ru/s/cardsmobile/143449-rukovoditel-gruppy-analiza-dannyh) |
-| <a href="https://carto.com/" class="favicon">CARTO</a>                                         | Business Intelligence           | Geo analytics         | —                                                          | —                                                                            | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/)                                                                                                                  |
-| <a href="http://public.web.cern.ch/public/" class="favicon">CERN</a>                           | Research                        | Experiment            | —                                                          | —                                                                            | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/)                                                                                                                |
-| <a href="http://cisco.com/" class="favicon">Cisco</a>                                          | Networking                      | Traffic analysis      | —                                                          | —                                                                            | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057)                                                                                                                                                     |
-| <a href="https://www.citadelsecurities.com/" class="favicon">Citadel Securities</a>            | Finance                         | —                     | —                                                          | —                                                                            | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774)                                                                                                                                          |
-| <a href="https://city-mobil.ru" class="favicon">Citymobil</a>                                  | Taxi                            | Analytics             | —                                                          | —                                                                            | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/)                                                                                                                                  |
-| <a href="https://cloudflare.com" class="favicon">Cloudflare</a>                                | CDN                             | Traffic analysis      | 36 servers                                                 | —                                                                            | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) |
-| <a href="https://corporate.comcast.com/" class="favicon">Comcast</a>                           | Media                           | CDN Traffic Analysis  | —                                                          | —                                                                            | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg)                                                                                                          |
-| <a href="https://contentsquare.com" class="favicon">ContentSquare</a>                          | Web analytics                   | Main product          | —                                                          | —                                                                            | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/)                              |
-| <a href="https://coru.net/" class="favicon">Corunet</a>                                        | Analytics                       | Main product          | —                                                          | —                                                                            | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf)                                                                                      |
-| <a href="https://www.creditx.com" class="favicon">CraiditX 氪信</a>                            | Finance AI                      | Analysis              | —                                                          | —                                                                            | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx)                                                                                                |
-| <a href="https://crazypanda.ru/en/" class="favicon">Crazypanda</a>                             | Games                           |                       | —                                                          | —                                                                            | Live session on ClickHouse meetup                                                                                                                                                                                       |
-| <a href="https://www.criteo.com/" class="favicon">Criteo</a>                                   | Retail                          | Main product          | —                                                          | —                                                                            | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx)                                                                                         |
-| <a href="https://www.chinatelecomglobal.com/" class="favicon">Dataliance for China Telecom</a> | Telecom                         | Analytics             | —                                                          | —                                                                            | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf)                                                                                              |
-| <a href="https://db.com" class="favicon">Deutsche Bank</a>                                     | Finance                         | BI Analytics          | —                                                          | —                                                                            | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf)                                                                                                       |
-| <a href="https://deeplay.io/eng/" class="favicon">Deeplay</a> | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568)                                                                                                       |
-| <a href="https://www.diva-e.com" class="favicon">Diva-e</a>                                    | Digital consulting              | Main Product          | —                                                          | —                                                                            | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf)                                               |
-| <a href="https://www.ecwid.com/" class="favicon">Ecwid</a>                                    | E-commerce SaaS | Metrics, Logging      | —                                                          | —                                                                            | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf)                                                                                                                                                        |
-| <a href="https://www.ebay.com/" class="favicon">eBay</a>                                    | E-commerce | Logs, Metrics and Events | —                                                          | —                                                                            | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/)                                                                                                                                                        |
-| <a href="https://www.exness.com" class="favicon">Exness</a>                                    | Trading                         | Metrics, Logging      | —                                                          | —                                                                            | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215)                                                                                                                                                        |
-| <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product |  | —                                                                            | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
-| <a href="https://www.flipkart.com/" class="favicon">Flipkart</a>                               | e-Commerce                      | —                     | —                                                          | —                                                                            | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239)                                                                                               |
-| <a href="https://fun.co/rp" class="favicon">FunCorp</a>                                        | Games                           |                       | —                                                          | —                                                                            | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse)                                                                                                                                          |
-| <a href="https://geniee.co.jp" class="favicon">Geniee</a>                                      | Ad network                      | Main product          | —                                                          | —                                                                            | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100)                                                                                                                                   |
-| <a href="https://www.genotek.ru/" class="favicon">Genotek</a>                                      | Bioinformatics                      | Main product          | —                                                          | —                                                                            | [Video, August 2020](https://youtu.be/v3KyZbz9lEE)                                                                                                                                   |
-| <a href="https://www.huya.com/" class="favicon">HUYA</a>                                       | Video Streaming                 | Analytics             | —                                                          | —                                                                            | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf)                                            |
-| <a href="https://www.the-ica.com/" class="favicon">ICA</a>                   | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263)                                                                                                         |
-| <a href="https://www.idealista.com" class="favicon">Idealista</a>                              | Real Estate                     | Analytics             | —                                                          | —                                                                            | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019)                                                                                                         |
-| <a href="https://www.infovista.com/" class="favicon">Infovista</a>                             | Networks                        | Analytics             | —                                                          | —                                                                            | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf)                                                                                            |
-| <a href="https://www.innogames.com" class="favicon">InnoGames</a>                              | Games                           | Metrics, Logging      | —                                                          | —                                                                            | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf)                                                                            |
-| <a href="https://www.instana.com" class="favicon">Instana</a>                                  | APM Platform                    | Main product          | —                                                          | —                                                                            | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304)                                                                                                                                              |
-| <a href="https://integros.com" class="favicon">Integros</a>                                    | Platform for video services     | Analytics             | —                                                          | —                                                                            | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf)                                                                                               |
-| <a href="https://ippon.tech" class="favicon">Ippon Technologies</a>                            | Technology Consulting           | —                     | —                                                          | —                                                                            | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205)                                                                                               |
-| <a href="https://www.ivi.ru/" class="favicon">Ivi</a> | Online Cinema | Analytics, Monitoring | —                                                          | —                                                                            | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) |
-| <a href="https://jinshuju.net" class="favicon">Jinshuju 金数据</a>                             | BI Analytics                    | Main product          | —                                                          | —                                                                            | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf)                                                                    |
-| <a href="https://www.kodiakdata.com/" class="favicon">Kodiak Data</a>                          | Clouds                          | Main product          | —                                                          | —                                                                            | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf)                                                                                             |
-| <a href="https://kontur.ru" class="favicon">Kontur</a>                                         | Software Development            | Metrics               | —                                                          | —                                                                            | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY)                                                                                                                                           |
-| <a href="https://www.kuaishou.com/" class="favicon">Kuaishou</a>                                         | Video            | — | —                                                          | —                                                                            | [ClickHouse Meetup, October 2018](https://clickhouse.tech/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/)                                                                                                                                           |
-| <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a>        | Research                        | Traffic analysis      | 1 server                                                   | 11.8 TiB                                                                     | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf)                                                                                                                          |
-| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a>                               | Ad network                      | Main product          | 75 servers (3 replicas)                                    | 5.27 PiB                                                                     | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/)                                                                                                                                                 |
-| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a>                     | Cloud services                  | Main product          | —                                                          | —                                                                            | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#)                                                                                                                                                    |
-| <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a>                                | Advertising                     | Statistics            | —                                                          | —                                                                            | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc)                                                                                                                                               |
-| <a href="https://mellodesign.ru/" class="favicon">Mello</a>                                | Marketing                     | Analytics            | 1 server                                                          | —                                                                            | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki)                                                                                                                                               |
-| <a href="https://www.messagebird.com" class="favicon">MessageBird</a>                          | Telecommunications              | Statistics            | —                                                          | —                                                                            | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf)                                                                                         |
-| <a href="https://www.mindsdb.com/" class="favicon">MindsDB</a>                          | Machine Learning              | Main Product            | —                                                          | —                                                                            | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |x
-| <a href="https://mux.com/" class="favicon">MUX</a>                                       | Online Video                           | Video Analytics       | —                                                          | —                                                                            | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/)                                                                                                              |
-| <a href="https://www.mgid.com/" class="favicon">MGID</a>                                       | Ad network                      | Web-analytics         | —                                                          | —                                                                            | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c)                                                                                                                          |
-| <a href="https://getnoc.com/" class="favicon">NOC Project</a> | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/)                                                                                               |
-| <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a>                                  | Health Data Analytics           | —                     | —                                                          | —                                                                            | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170)                                                                                               |
-| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a>                                   | Monitorings and Data Analysis   | Main product          | —                                                          | —                                                                            | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf)                                                                 |
-| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a>                           | Analytics                       | Main Product          | —                                                          | —                                                                            | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf)                                                            |
-| <a href="https://www.percona.com/" class="favicon">Percona</a>               | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/)                                                            |
-| <a href="https://plausible.io/" class="favicon">Plausible</a>                                  | Analytics                       | Main Product          | —                                                          | —                                                                            | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280)                                                                                                                                      |
-| <a href="https://posthog.com/" class="favicon">PostHog</a>                                 | Product Analytics                        | Main Product                     | —                                                          | —                                                                            | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0)                                                                                               |
-| <a href="https://postmates.com/" class="favicon">Postmates</a>                                 | Delivery                        | —                     | —                                                          | —                                                                            | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188)                                                                                               |
-| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a>               | Telemetry and Big Data Analysis | Main product          | —                                                          | —                                                                            | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf)                                                                                  |
-| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a>                             | Cloud services                  | Main product          | —                                                          | —                                                                            | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf)                                        |
-| <a href="https://qrator.net" class="favicon">Qrator</a>                                        | DDoS protection                 | Main product          | —                                                          | —                                                                            | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/)                                                                                                                                      |
-| <a href="https://www.rbinternational.com/" class="favicon">Raiffeisenbank</a>                                       | Banking               | Analytics             | —                                                          | —                                                                            | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html)                                                                                                |
-| <a href="https://rambler.ru" class="favicon">Rambler</a>                                       | Internet services               | Analytics             | —                                                          | —                                                                            | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141)                                                                                                |
-| <a href="https://retell.cc/" class="favicon">Retell</a>                                       | Speech synthesis               | Analytics             | —                                                          | —                                                                            | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno)                                                                                                |
-| <a href="https://rspamd.com/" class="favicon">Rspamd</a> | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html)                                                                                                |
-| <a href="https://rusiem.com/en" class="favicon">RuSIEM</a> | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture)                                                                                                |
-| <a href="https://www.s7.ru" class="favicon">S7 Airlines</a>                                    | Airlines                        | Metrics, Logging      | —                                                          | —                                                                            | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s)                                                                                                                                        |
-| <a href="https://www.scireum.de/" class="favicon">scireum GmbH</a>                             | e-Commerce                      | Main product          | —                                                          | —                                                                            | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4)                                                                                                                                            |
-| <a href="https://segment.com/" class="favicon">Segment</a>                   | Data processing                      | Main product          | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs                                                          | —                                                                            | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse)                                                                                                                                            |
-| <a href="https://www.semrush.com/" class="favicon">SEMrush</a>                                 | Marketing                       | Main product          | —                                                          | —                                                                            | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf)                                                                                             |
-| <a href="https://sentry.io/" class="favicon">Sentry</a>                                        | Software Development            | Main product          | —                                                          | —                                                                            | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure)                                                                                                 |
-| <a href="https://seo.do/" class="favicon">seo.do</a>                                           | Analytics                       | Main product          | —                                                          | —                                                                            | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf)                                                            |
-| <a href="http://www.sgk.gov.tr/wps/portal/sgk/tr" class="favicon">SGK</a>                      | Goverment Social Security       | Analytics             | —                                                          | —                                                                            | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf)                                                                 |
-| <a href="http://english.sina.com/index.html" class="favicon">Sina</a>                          | News                            | —                     | —                                                          | —                                                                            | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf)                                                                  |
-| <a href="https://smi2.ru/" class="favicon">SMI2</a>                                            | News                            | Analytics             | —                                                          | —                                                                            | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/)                                                                                                                                    |
-| <a href="https://www.splunk.com/" class="favicon">Splunk</a>                                   | Business Analytics              | Main product          | —                                                          | —                                                                            | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf)                                                                                               |
-| <a href="https://www.spotify.com" class="favicon">Spotify</a>                                  | Music                           | Experimentation       | —                                                          | —                                                                            | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173)                                                                                                                   |
-| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a>                                | Information Security                      | Main Product | —                                                          | —                                                                            | [Official website, Documentation](https://www.staffcop.ru/sce43) |
-| <a href="https://www.suning.com/" class="favicon">Suning</a>                                | E-Commerce                      | User behaviour analytics | —                                                          | —                                                                            | [Blog article](https://www.sohu.com/a/434152235_411876) |
-| <a href="https://www.teralytics.net/" class="favicon">Teralytics</a>                                | Mobility                      | Analytics | —                                                          | —                                                                            | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) |
-| <a href="https://www.tencent.com" class="favicon">Tencent</a>                                  | Big Data                        | Data processing       | —                                                          | —                                                                            | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf)                                                   |
-| <a href="https://www.tencent.com" class="favicon">Tencent</a>                                  | Messaging                       | Logging               | —                                                          | —                                                                            | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050)                                                                                                                                                   |
-| <a href="https://www.tencentmusic.com/" class="favicon">Tencent Music Entertainment (TME)</a>                                  | BigData                       | Data processing               | —                                                          | —                                                                            | [Blog in Chinese, June 2020](https://cloud.tencent.com/developer/article/1637840)                                                                                                                                                   |
-| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a>                          | AD network                      | —                     | —                                                          | —                                                                            | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf)                                                                                          |
-| <a href="https://www.uber.com" class="favicon">Uber</a>                                        | Taxi                            | Logging               | —                                                          | —                                                                            | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf)                                                                                                                                        |
-| <a href="https://vk.com" class="favicon">VKontakte</a>                                         | Social Network                  | Statistics, Logging   | —                                                          | —                                                                            | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf)                                                                                                  |
-| <a href="https://www.walmartlabs.com/" class="favicon">Walmart Labs</a>                        | Internet, Retail                | —                     | —                                                          | —                                                                            | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144)                                                                                               |
-| <a href="https://wargaming.com/en/" class="favicon">Wargaming</a>                              | Games                           |                       | —                                                          | —                                                                            | [Interview](https://habr.com/en/post/496954/)                                                                                                                                                                           |
-| <a href="https://wisebits.com/" class="favicon">Wisebits</a>                                   | IT Solutions                    | Analytics             | —                                                          | —                                                                            | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf)                                                                                               |
-| <a href="https://www.workato.com/" class="favicon">Workato</a>                                 | Automation Software             | —                     | —                                                          | —                                                                            | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=334)                                                                                               |
-| <a href="http://www.xiaoxintech.cn/" class="favicon">Xiaoxin Tech</a>                          | Education                       | Common purpose        | —                                                          | —                                                                            | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx)                                                                 |
-| <a href="https://www.ximalaya.com/" class="favicon">Ximalaya</a>                               | Audio sharing                   | OLAP                  | —                                                          | —                                                                            | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf)                                                                                            |
-| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud                    | Main product          | —                                                          | —                                                                            | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o)                                                                                                                                           |
-| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a>        | Business Intelligence           | Main product          | —                                                          | —                                                                            | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf)                                                                                                                         |
-| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a>                          | e-Commerce                      | Metrics, Logging      | —                                                          | —                                                                            | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478)                                                                                                                                                     |
-| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a>                        | Web analytics                   | Main product          | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records                                                        | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13)                                                                                                                                |
-| <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a>                                           | Software Development            | Metrics, Logging      | —                                                          | —                                                                            | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk)                                                                                                   |
-| <a href="https://mkb.ru/" class="favicon">МКБ</a>                                              | Bank                            | Web-system monitoring | —                                                          | —                                                                            | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf)                                                                                                |
-| <a href="https://cft.ru/" class="favicon">ЦФТ</a>                                              | Banking, Financial products, Payments                            |  — | —                                                          | —                                                                            | [Meetup in Russian, April 2020](https://team.cft.ru/events/162)                                                                                                |
-| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a>                                              | Internet company         |  — | —                                                          | —                                                                            | [if(kakao)2020 conference](https://if.kakao.com/session/117)                                                                                                |
+| <a href="https://carto.com/" class="favicon">CARTO</a> | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) |
+| <a href="http://public.web.cern.ch/public/" class="favicon">CERN</a> | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) |
+| <a href="http://cisco.com/" class="favicon">Cisco</a> | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) |
+| <a href="https://www.citadelsecurities.com/" class="favicon">Citadel Securities</a> | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) |
+| <a href="https://city-mobil.ru" class="favicon">Citymobil</a> | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) |
+| <a href="https://cloudflare.com" class="favicon">Cloudflare</a> | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) |
+| <a href="https://corporate.comcast.com/" class="favicon">Comcast</a> | Media | CDN Traffic Analysis | — | — | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg) |
+| <a href="https://contentsquare.com" class="favicon">ContentSquare</a> | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) |
+| <a href="https://coru.net/" class="favicon">Corunet</a> | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) |
+| <a href="https://www.creditx.com" class="favicon">CraiditX 氪信</a> | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) |
+| <a href="https://crazypanda.ru/en/" class="favicon">Crazypanda</a> | Games | | — | — | Live session on ClickHouse meetup |
+| <a href="https://www.criteo.com/" class="favicon">Criteo</a> | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) |
+| <a href="https://www.chinatelecomglobal.com/" class="favicon">Dataliance for China Telecom</a> | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) |
+| <a href="https://db.com" class="favicon">Deutsche Bank</a> | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) |
+| <a href="https://deeplay.io/eng/" class="favicon">Deeplay</a> | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) |
+| <a href="https://www.diva-e.com" class="favicon">Diva-e</a> | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
+| <a href="https://www.ecwid.com/" class="favicon">Ecwid</a> | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) |
+| <a href="https://www.ebay.com/" class="favicon">eBay</a> | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) |
+| <a href="https://www.exness.com" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
+| <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
+| <a href="https://www.flipkart.com/" class="favicon">Flipkart</a> | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) |
+| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
+| <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
+| <a href="https://www.genotek.ru/" class="favicon">Genotek</a> | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) |
+| <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
+| <a href="https://www.the-ica.com/" class="favicon">ICA</a> | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) |
+| <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
+| <a href="https://www.infovista.com/" class="favicon">Infovista</a> | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) |
+| <a href="https://www.innogames.com" class="favicon">InnoGames</a> | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) |
+| <a href="https://www.instana.com" class="favicon">Instana</a> | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) |
+| <a href="https://integros.com" class="favicon">Integros</a> | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
+| <a href="https://ippon.tech" class="favicon">Ippon Technologies</a> | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) |
+| <a href="https://www.ivi.ru/" class="favicon">Ivi</a> | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) |
+| <a href="https://jinshuju.net" class="favicon">Jinshuju 金数据</a> | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) |
+| <a href="https://www.kodiakdata.com/" class="favicon">Kodiak Data</a> | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) |
+| <a href="https://kontur.ru" class="favicon">Kontur</a> | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
+| <a href="https://www.kuaishou.com/" class="favicon">Kuaishou</a> | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.tech/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) |
+| <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a> | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
+| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
+| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
+| <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a> | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) |
+| <a href="https://mellodesign.ru/" class="favicon">Mello</a> | Marketing | Analytics | 1 server | — | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) |
+| <a href="https://www.messagebird.com" class="favicon">MessageBird</a> | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
+| <a href="https://www.mindsdb.com/" class="favicon">MindsDB</a> | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |x
+| <a href="https://mux.com/" class="favicon">MUX</a> | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) |
+| <a href="https://www.mgid.com/" class="favicon">MGID</a> | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
+| <a href="https://getnoc.com/" class="favicon">NOC Project</a> | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) |
+| <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a> | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) |
+| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
+| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
+| <a href="https://www.percona.com/" class="favicon">Percona</a> | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) |
+| <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
+| <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) |
+| <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
+| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
+| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
+| <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
+| <a href="https://www.rbinternational.com/" class="favicon">Raiffeisenbank</a> | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) |
+| <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
+| <a href="https://retell.cc/" class="favicon">Retell</a> | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) |
+| <a href="https://rspamd.com/" class="favicon">Rspamd</a> | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) |
+| <a href="https://rusiem.com/en" class="favicon">RuSIEM</a> | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) |
+| <a href="https://www.s7.ru" class="favicon">S7 Airlines</a> | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
+| <a href="https://www.scireum.de/" class="favicon">scireum GmbH</a> | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
+| <a href="https://segment.com/" class="favicon">Segment</a> | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) |
+| <a href="https://www.semrush.com/" class="favicon">SEMrush</a> | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) |
+| <a href="https://sentry.io/" class="favicon">Sentry</a> | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) |
+| <a href="https://seo.do/" class="favicon">seo.do</a> | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) |
+| <a href="http://www.sgk.gov.tr/wps/portal/sgk/tr" class="favicon">SGK</a> | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) |
+| <a href="http://english.sina.com/index.html" class="favicon">Sina</a> | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) |
+| <a href="https://smi2.ru/" class="favicon">SMI2</a> | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
+| <a href="https://www.splunk.com/" class="favicon">Splunk</a> | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
+| <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
+| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a> | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) |
+| <a href="https://www.suning.com/" class="favicon">Suning</a> | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) |
+| <a href="https://www.teralytics.net/" class="favicon">Teralytics</a> | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) |
+| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
+| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
+| <a href="https://www.tencentmusic.com/" class="favicon">Tencent Music Entertainment (TME)</a> | BigData | Data processing | — | — | [Blog in Chinese, June 2020](https://cloud.tencent.com/developer/article/1637840) |
+| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
+| <a href="https://www.uber.com" class="favicon">Uber</a> | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) |
+| <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
+| <a href="https://www.walmartlabs.com/" class="favicon">Walmart Labs</a> | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) |
+| <a href="https://wargaming.com/en/" class="favicon">Wargaming</a> | Games | | — | — | [Interview](https://habr.com/en/post/496954/) |
+| <a href="https://wisebits.com/" class="favicon">Wisebits</a> | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
+| <a href="https://www.workato.com/" class="favicon">Workato</a> | Automation Software | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=334) |
+| <a href="http://www.xiaoxintech.cn/" class="favicon">Xiaoxin Tech</a> | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) |
+| <a href="https://www.ximalaya.com/" class="favicon">Ximalaya</a> | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) |
+| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
+| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) |
+| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
+| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
+| <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a> | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
+| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
+| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |
+| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) |
 
 [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->

From d265e3b4197a07235863f113f736d24de3e31fd9 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 2 Feb 2021 11:02:25 +0300
Subject: [PATCH 0525/1238] Less timeouts

---
 programs/server/Server.cpp                         |  4 ++--
 programs/server/config.d/nu_keeper_port.xml        |  1 -
 programs/server/config.d/test_keeper_port.xml      |  1 +
 src/Coordination/NuKeeperStorageDispatcher.cpp     | 14 +++++++-------
 src/Coordination/ya.make                           | 10 ----------
 src/Interpreters/Context.cpp                       |  2 +-
 .../{nu_keeper_port.xml => test_keeper_port.xml}   |  4 ++--
 .../configs/enable_test_keeper.xml                 |  4 ++--
 .../configs/enable_test_keeper1.xml                |  4 ++--
 .../configs/enable_test_keeper2.xml                |  4 ++--
 .../configs/enable_test_keeper3.xml                |  4 ++--
 .../integration/test_testkeeper_multinode/test.py  |  4 ++--
 12 files changed, 23 insertions(+), 33 deletions(-)
 delete mode 120000 programs/server/config.d/nu_keeper_port.xml
 create mode 120000 programs/server/config.d/test_keeper_port.xml
 rename tests/config/config.d/{nu_keeper_port.xml => test_keeper_port.xml} (88%)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index fb58e85d813..a96cb2b8973 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -844,7 +844,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
         listen_try = true;
     }
 
-    if (config().has("nu_keeper_server"))
+    if (config().has("test_keeper_server"))
     {
 #if USE_NURAFT
         /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config.
@@ -852,7 +852,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
         for (const auto & listen_host : listen_hosts)
         {
             /// TCP NuKeeper
-            const char * port_name = "nu_keeper_server.tcp_port";
+            const char * port_name = "test_keeper_server.tcp_port";
             createServer(listen_host, port_name, listen_try, [&](UInt16 port)
             {
                 Poco::Net::ServerSocket socket;
diff --git a/programs/server/config.d/nu_keeper_port.xml b/programs/server/config.d/nu_keeper_port.xml
deleted file mode 120000
index 8de0a309ff0..00000000000
--- a/programs/server/config.d/nu_keeper_port.xml
+++ /dev/null
@@ -1 +0,0 @@
-../../../tests/config/config.d/nu_keeper_port.xml
\ No newline at end of file
diff --git a/programs/server/config.d/test_keeper_port.xml b/programs/server/config.d/test_keeper_port.xml
new file mode 120000
index 00000000000..f3f721caae0
--- /dev/null
+++ b/programs/server/config.d/test_keeper_port.xml
@@ -0,0 +1 @@
+../../../tests/config/config.d/test_keeper_port.xml
\ No newline at end of file
diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
index c531939d6ee..9988e0ac476 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -107,24 +107,24 @@ namespace
 void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration & config)
 {
     LOG_DEBUG(log, "Initializing storage dispatcher");
-    int myid = config.getInt("nu_keeper_server.server_id");
+    int myid = config.getInt("test_keeper_server.server_id");
     std::string myhostname;
     int myport;
     int32_t my_priority = 1;
 
     Poco::Util::AbstractConfiguration::Keys keys;
-    config.keys("nu_keeper_server.raft_configuration", keys);
+    config.keys("test_keeper_server.raft_configuration", keys);
     bool my_can_become_leader = true;
 
     std::vector<std::tuple<int, std::string, int, bool, int32_t>> server_configs;
     std::vector<int32_t> ids;
     for (const auto & server_key : keys)
     {
-        int server_id = config.getInt("nu_keeper_server.raft_configuration." + server_key + ".id");
-        std::string hostname = config.getString("nu_keeper_server.raft_configuration." + server_key + ".hostname");
-        int port = config.getInt("nu_keeper_server.raft_configuration." + server_key + ".port");
-        bool can_become_leader = config.getBool("nu_keeper_server.raft_configuration." + server_key + ".can_become_leader", true);
-        int32_t priority = config.getInt("nu_keeper_server.raft_configuration." + server_key + ".priority", 1);
+        int server_id = config.getInt("test_keeper_server.raft_configuration." + server_key + ".id");
+        std::string hostname = config.getString("test_keeper_server.raft_configuration." + server_key + ".hostname");
+        int port = config.getInt("test_keeper_server.raft_configuration." + server_key + ".port");
+        bool can_become_leader = config.getBool("test_keeper_server.raft_configuration." + server_key + ".can_become_leader", true);
+        int32_t priority = config.getInt("test_keeper_server.raft_configuration." + server_key + ".priority", 1);
         if (server_id == myid)
         {
             myhostname = hostname;
diff --git a/src/Coordination/ya.make b/src/Coordination/ya.make
index 833ca27f2f4..470fe8c75be 100644
--- a/src/Coordination/ya.make
+++ b/src/Coordination/ya.make
@@ -5,20 +5,10 @@ LIBRARY()
 
 PEERDIR(
     clickhouse/src/Common
-    contrib/libs/NuRaft
 )
 
-
 SRCS(
-    InMemoryLogStore.cpp
-    InMemoryStateManager.cpp
-    NuKeeperServer.cpp
-    NuKeeperStateMachine.cpp
-    NuKeeperStorage.cpp
     NuKeeperStorageDispatcher.cpp
-    NuKeeperStorageSerializer.cpp
-    SummingStateMachine.cpp
-    WriteBufferFromNuraftBuffer.cpp
 
 )
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 983ac733849..b913c3ed396 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1591,7 +1591,7 @@ void Context::initializeNuKeeperStorageDispatcher() const
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize NuKeeper multiple times");
 
     const auto & config = getConfigRef();
-    if (config.has("nu_keeper_server"))
+    if (config.has("test_keeper_server"))
     {
         shared->nu_keeper_storage_dispatcher = std::make_shared<NuKeeperStorageDispatcher>();
         shared->nu_keeper_storage_dispatcher->initialize(config);
diff --git a/tests/config/config.d/nu_keeper_port.xml b/tests/config/config.d/test_keeper_port.xml
similarity index 88%
rename from tests/config/config.d/nu_keeper_port.xml
rename to tests/config/config.d/test_keeper_port.xml
index afd22955a33..fff60d749f6 100644
--- a/tests/config/config.d/nu_keeper_port.xml
+++ b/tests/config/config.d/test_keeper_port.xml
@@ -1,5 +1,5 @@
 <yandex>
-    <nu_keeper_server>
+    <test_keeper_server>
         <tcp_port>9181</tcp_port>
         <operation_timeout_ms>10000</operation_timeout_ms>
         <session_timeout_ms>30000</session_timeout_ms>
@@ -11,5 +11,5 @@
                 <port>44444</port>
             </server>
         </raft_configuration>
-    </nu_keeper_server>
+    </test_keeper_server>
 </yandex>
diff --git a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
index afd22955a33..fff60d749f6 100644
--- a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
+++ b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
@@ -1,5 +1,5 @@
 <yandex>
-    <nu_keeper_server>
+    <test_keeper_server>
         <tcp_port>9181</tcp_port>
         <operation_timeout_ms>10000</operation_timeout_ms>
         <session_timeout_ms>30000</session_timeout_ms>
@@ -11,5 +11,5 @@
                 <port>44444</port>
             </server>
         </raft_configuration>
-    </nu_keeper_server>
+    </test_keeper_server>
 </yandex>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
index fde0d511886..81f68f50c7c 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
@@ -1,5 +1,5 @@
 <yandex>
-    <nu_keeper_server>
+    <test_keeper_server>
         <tcp_port>9181</tcp_port>
         <operation_timeout_ms>10000</operation_timeout_ms>
         <session_timeout_ms>30000</session_timeout_ms>
@@ -27,5 +27,5 @@
                 <priority>1</priority>
             </server>
         </raft_configuration>
-    </nu_keeper_server>
+    </test_keeper_server>
 </yandex>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
index c6f4e7b5a22..73340973367 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
@@ -1,5 +1,5 @@
 <yandex>
-    <nu_keeper_server>
+    <test_keeper_server>
         <tcp_port>9181</tcp_port>
         <operation_timeout_ms>10000</operation_timeout_ms>
         <session_timeout_ms>30000</session_timeout_ms>
@@ -27,5 +27,5 @@
                 <priority>1</priority>
             </server>
         </raft_configuration>
-    </nu_keeper_server>
+    </test_keeper_server>
 </yandex>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
index d1e8830c480..fbc51489d11 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
@@ -1,5 +1,5 @@
 <yandex>
-    <nu_keeper_server>
+    <test_keeper_server>
         <tcp_port>9181</tcp_port>
         <operation_timeout_ms>10000</operation_timeout_ms>
         <session_timeout_ms>30000</session_timeout_ms>
@@ -27,5 +27,5 @@
                 <priority>1</priority>
             </server>
         </raft_configuration>
-    </nu_keeper_server>
+    </test_keeper_server>
 </yandex>
diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index fe568e7252d..7b9430b2368 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -183,14 +183,14 @@ def test_blocade_leader_twice(started_cluster):
         # Total network partition
         pm.partition_instances(node3, node2)
 
-        for i in range(30):
+        for i in range(10):
             try:
                 node3.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
                 assert False, "Node3 became leader?"
             except Exception as ex:
                 time.sleep(0.5)
 
-        for i in range(30):
+        for i in range(10):
             try:
                 node2.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
                 assert False, "Node2 became leader?"

From 19d6e867c6b439c9150171ccd32e545b7872bd17 Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Tue, 2 Feb 2021 12:08:47 +0300
Subject: [PATCH 0526/1238] HDFS test had a clash with 9000 ClickHouse port.

---
 .../docker_compose_kerberized_hdfs.yml        |  2 +-
 tests/integration/helpers/cluster.py          |  6 +++---
 .../hdfs_configs/bootstrap.sh                 |  4 ++--
 .../test_storage_kerberized_hdfs/test.py      | 20 +++++++++----------
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
index a74476613f3..f2a659bce58 100644
--- a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
@@ -14,7 +14,7 @@ services:
     ports:
       - 1006:1006
       - 50070:50070
-      - 9000:9000
+      - 9010:9010
     depends_on:
         - hdfskerberos
     entrypoint: /etc/bootstrap.sh -d
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 64f7e5dd889..a04c1b7bf7d 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -48,9 +48,9 @@ def run_and_check(args, env=None, shell=False):
     res = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, shell=shell)
     if res.returncode != 0:
         # check_call(...) from subprocess does not print stderr, so we do it manually
-        print('Stderr:\n{}\n'.format(res.stderr))
-        print('Stdout:\n{}\n'.format(res.stdout))
-        raise Exception('Command {} return non-zero code {}: {}'.format(args, res.returncode, res.stderr))
+        print('Stderr:\n{}\n'.format(res.stderr.decode('utf-8')))
+        print('Stdout:\n{}\n'.format(res.stdout.decode('utf-8')))
+        raise Exception('Command {} return non-zero code {}: {}'.format(args, res.returncode, res.stderr.decode('utf-8')))
 
 
 def subprocess_check_call(args):
diff --git a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh
index 38f098ae1e1..971491d4053 100755
--- a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh
+++ b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh
@@ -34,11 +34,11 @@ cat >> /usr/local/hadoop/etc/hadoop/core-site.xml << EOF
    </property>
   <property>
     <name>fs.defaultFS</name>
-    <value>hdfs://kerberizedhdfs1:9000</value>
+    <value>hdfs://kerberizedhdfs1:9010</value>
   </property>
   <property>
     <name>fs.default.name</name>
-    <value>hdfs://kerberizedhdfs1:9000</value>
+    <value>hdfs://kerberizedhdfs1:9010</value>
   </property>
   <!--
   <property>
diff --git a/tests/integration/test_storage_kerberized_hdfs/test.py b/tests/integration/test_storage_kerberized_hdfs/test.py
index a2a2a4ef88e..1fffd7a8c12 100644
--- a/tests/integration/test_storage_kerberized_hdfs/test.py
+++ b/tests/integration/test_storage_kerberized_hdfs/test.py
@@ -29,12 +29,12 @@ def test_read_table(started_cluster):
     api_read = started_cluster.hdfs_api.read_data("/simple_table_function")
     assert api_read == data
 
-    select_read = node1.query("select * from hdfs('hdfs://kerberizedhdfs1:9000/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')")
+    select_read = node1.query("select * from hdfs('hdfs://kerberizedhdfs1:9010/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')")
     assert select_read == data
 
 
 def test_read_write_storage(started_cluster):
-    node1.query("create table SimpleHDFSStorage2 (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9000/simple_storage1', 'TSV')")
+    node1.query("create table SimpleHDFSStorage2 (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9010/simple_storage1', 'TSV')")
     node1.query("insert into SimpleHDFSStorage2 values (1, 'Mark', 72.53)")
 
     api_read = started_cluster.hdfs_api.read_data("/simple_storage1")
@@ -45,7 +45,7 @@ def test_read_write_storage(started_cluster):
 
 
 def test_write_storage_not_expired(started_cluster):
-    node1.query("create table SimpleHDFSStorageNotExpired (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9000/simple_storage_not_expired', 'TSV')")
+    node1.query("create table SimpleHDFSStorageNotExpired (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9010/simple_storage_not_expired', 'TSV')")
 
     time.sleep(45)   # wait for ticket expiration
     node1.query("insert into SimpleHDFSStorageNotExpired values (1, 'Mark', 72.53)")
@@ -58,15 +58,15 @@ def test_write_storage_not_expired(started_cluster):
 
 
 def test_two_users(started_cluster):
-    node1.query("create table HDFSStorOne (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9000/storage_user_one', 'TSV')")
+    node1.query("create table HDFSStorOne (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9010/storage_user_one', 'TSV')")
     node1.query("insert into HDFSStorOne values (1, 'Real', 86.00)")
 
-    node1.query("create table HDFSStorTwo (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://suser@kerberizedhdfs1:9000/user/specuser/storage_user_two', 'TSV')")
+    node1.query("create table HDFSStorTwo (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://suser@kerberizedhdfs1:9010/user/specuser/storage_user_two', 'TSV')")
     node1.query("insert into HDFSStorTwo values (1, 'Ideal', 74.00)")
 
-    select_read_1 = node1.query("select * from hdfs('hdfs://kerberizedhdfs1:9000/user/specuser/storage_user_two', 'TSV', 'id UInt64, text String, number Float64')")
+    select_read_1 = node1.query("select * from hdfs('hdfs://kerberizedhdfs1:9010/user/specuser/storage_user_two', 'TSV', 'id UInt64, text String, number Float64')")
 
-    select_read_2 = node1.query("select * from hdfs('hdfs://suser@kerberizedhdfs1:9000/storage_user_one', 'TSV', 'id UInt64, text String, number Float64')")
+    select_read_2 = node1.query("select * from hdfs('hdfs://suser@kerberizedhdfs1:9010/storage_user_one', 'TSV', 'id UInt64, text String, number Float64')")
 
 def test_read_table_expired(started_cluster):
     data = "1\tSerialize\t555.222\n2\tData\t777.333\n"
@@ -76,7 +76,7 @@ def test_read_table_expired(started_cluster):
     time.sleep(45)
 
     try:
-        select_read = node1.query("select * from hdfs('hdfs://reloginuser&kerberizedhdfs1:9000/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')")
+        select_read = node1.query("select * from hdfs('hdfs://reloginuser&kerberizedhdfs1:9010/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')")
         assert False, "Exception have to be thrown"
     except Exception as ex:
         assert "DB::Exception: kinit failure:" in str(ex)
@@ -85,7 +85,7 @@ def test_read_table_expired(started_cluster):
 
 
 def test_prohibited(started_cluster):
-    node1.query("create table HDFSStorTwoProhibited (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://suser@kerberizedhdfs1:9000/storage_user_two_prohibited', 'TSV')")
+    node1.query("create table HDFSStorTwoProhibited (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://suser@kerberizedhdfs1:9010/storage_user_two_prohibited', 'TSV')")
     try:
         node1.query("insert into HDFSStorTwoProhibited values (1, 'SomeOne', 74.00)")
         assert False, "Exception have to be thrown"
@@ -94,7 +94,7 @@ def test_prohibited(started_cluster):
 
 
 def test_cache_path(started_cluster):
-    node1.query("create table HDFSStorCachePath (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://dedicatedcachepath@kerberizedhdfs1:9000/storage_dedicated_cache_path', 'TSV')")
+    node1.query("create table HDFSStorCachePath (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://dedicatedcachepath@kerberizedhdfs1:9010/storage_dedicated_cache_path', 'TSV')")
     try:
         node1.query("insert into HDFSStorCachePath values (1, 'FatMark', 92.53)")
         assert False, "Exception have to be thrown"

From 0b166e1cbd5431af20a179b702cd68eb177a7a75 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 2 Feb 2021 12:11:20 +0300
Subject: [PATCH 0527/1238] Add performance test

---
 tests/performance/avg.xml | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 tests/performance/avg.xml

diff --git a/tests/performance/avg.xml b/tests/performance/avg.xml
new file mode 100644
index 00000000000..b58b5d7cdb8
--- /dev/null
+++ b/tests/performance/avg.xml
@@ -0,0 +1,3 @@
+<test>
+    <query>SELECT avg(number) FROM numbers(1000000000)</query>
+</test>

From 0edf65c0945171e97a6f06d4112f310d61c2f772 Mon Sep 17 00:00:00 2001
From: feng lv <fenglv15@mails.ucas.ac.cn>
Date: Tue, 2 Feb 2021 06:06:17 +0000
Subject: [PATCH 0528/1238] fix test

fix

update

fix spell
---
 src/Core/Settings.h                           |   2 +-
 .../DistributedBlockOutputStream.cpp          |   4 +-
 .../01684_insert_specify_shard_id.reference   | 118 +++++++++++++++---
 .../01684_insert_specify_shard_id.sql         |  25 +++-
 4 files changed, 122 insertions(+), 27 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 6f30c4523e9..1e12a640445 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -420,7 +420,7 @@ class IColumn;
     M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
     \
     M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
-    M(UInt64, insert_shard_id, 0, "If non zero, when insert intoi a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
+    M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
     /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
     \
     M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \
diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index c83133a55e6..23270a5728c 100644
--- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -60,7 +60,7 @@ namespace ErrorCodes
     extern const int TIMEOUT_EXCEEDED;
 }
 
-static void writeBlockConvert(const BlockOutputStreamPtr & out, const Block & block, const size_t repeats)
+static void writeBlockConvert(const BlockOutputStreamPtr & out, const Block & block, size_t repeats)
 {
     if (!blocksHaveEqualStructure(out->getHeader(), block))
     {
@@ -588,7 +588,7 @@ void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, size_t sh
 }
 
 
-void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_t repeats)
+void DistributedBlockOutputStream::writeToLocal(const Block & block, size_t repeats)
 {
     /// Async insert does not support settings forwarding yet whereas sync one supports
     InterpreterInsertQuery interp(query_ast, context);
diff --git a/tests/queries/0_stateless/01684_insert_specify_shard_id.reference b/tests/queries/0_stateless/01684_insert_specify_shard_id.reference
index e542790e401..1cd79d87135 100644
--- a/tests/queries/0_stateless/01684_insert_specify_shard_id.reference
+++ b/tests/queries/0_stateless/01684_insert_specify_shard_id.reference
@@ -1,40 +1,120 @@
 0
+0
 1
-2
-3
-4
-5
-6
-7
-8
-9
-10
-11
-12
-13
-14
-15
-16
-17
-18
-19
-0
 1
 2
+2
+3
 3
 4
+4
+5
 5
 6
+6
+7
 7
 8
+8
+9
+9
+0
+0
+1
+1
+2
+2
+3
+3
+4
+4
+5
+5
+6
+6
+7
+7
+8
+8
+9
+9
+0
+0
+1
+1
+2
+2
+3
+3
+4
+4
+5
+5
+6
+6
+7
+7
+8
+8
+9
 9
 10
+10
+11
 11
 12
+12
+13
 13
 14
+14
+15
 15
 16
+16
+17
 17
 18
+18
+19
+19
+0
+0
+1
+1
+2
+2
+3
+3
+4
+4
+5
+5
+6
+6
+7
+7
+8
+8
+9
+9
+10
+10
+11
+11
+12
+12
+13
+13
+14
+14
+15
+15
+16
+16
+17
+17
+18
+18
+19
 19
diff --git a/tests/queries/0_stateless/01684_insert_specify_shard_id.sql b/tests/queries/0_stateless/01684_insert_specify_shard_id.sql
index d4a3ae6a48d..ce1c7807b59 100644
--- a/tests/queries/0_stateless/01684_insert_specify_shard_id.sql
+++ b/tests/queries/0_stateless/01684_insert_specify_shard_id.sql
@@ -1,22 +1,37 @@
 DROP TABLE IF EXISTS x;
+DROP TABLE IF EXISTS x_dist;
+DROP TABLE IF EXISTS y;
+DROP TABLE IF EXISTS y_dist;
 
 CREATE TABLE x AS system.numbers ENGINE = MergeTree ORDER BY number;
+CREATE TABLE y AS system.numbers ENGINE = MergeTree ORDER BY number;
 
 CREATE TABLE x_dist as x ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), x);
+CREATE TABLE y_dist as y ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), y);
 
 -- insert into first shard
 INSERT INTO x_dist SELECT * FROM numbers(10) settings insert_shard_id = 1;
+INSERT INTO y_dist SELECT * FROM numbers(10) settings insert_shard_id = 1;
+
+SELECT * FROM x_dist ORDER by number;
+SELECT * FROM y_dist ORDER by number;
 
 -- insert into second shard
 INSERT INTO x_dist SELECT * FROM numbers(10, 10) settings insert_shard_id = 2;
+INSERT INTO y_dist SELECT * FROM numbers(10, 10) settings insert_shard_id = 2;
+
+SELECT * FROM x_dist ORDER by number;
+SELECT * FROM y_dist ORDER by number;
+
+-- no sharding key
+INSERT INTO x_dist SELECT * FROM numbers(10); -- { serverError 55 }
+INSERT INTO y_dist SELECT * FROM numbers(10); -- { serverError 55 }
 
 -- invalid shard id
 INSERT INTO x_dist SELECT * FROM numbers(10) settings insert_shard_id = 3; -- { serverError 1003 }
-
-SELECT * FROM remote('127.0.0.1', currentDatabase(), x);
-SELECT * FROM remote('127.0.0.2', currentDatabase(), x);
-
-SELECT * FROM x_dist ORDER by number;
+INSERT INTO y_dist SELECT * FROM numbers(10) settings insert_shard_id = 3; -- { serverError 1003 }
 
 DROP TABLE x;
 DROP TABLE x_dist;
+DROP TABLE y;
+DROP TABLE y_dist;

From c889f51e9e8ba5277035c3a3ec4e1a00d6d8ee0a Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Tue, 2 Feb 2021 12:50:58 +0300
Subject: [PATCH 0529/1238] Updated performance benchmark

---
 tests/performance/reinterpret_as.xml | 266 ++++++++++++++++++++++++---
 1 file changed, 245 insertions(+), 21 deletions(-)

diff --git a/tests/performance/reinterpret_as.xml b/tests/performance/reinterpret_as.xml
index 16045d255b0..50cf0cb2278 100644
--- a/tests/performance/reinterpret_as.xml
+++ b/tests/performance/reinterpret_as.xml
@@ -4,30 +4,254 @@
         <max_memory_usage>15G</max_memory_usage>
     </settings>
 
-    <create_query>CREATE TABLE t (a UInt64, b UInt256, c Int128, d Int256, f String, g FixedString(20)) ENGINE = Memory</create_query>
-    <!-- use less threads to save memory -->
-    <fill_query>
-        INSERT INTO t
-        SELECT number, number, number, number, toString(number) as string, string
-        FROM numbers_mt(200000000) SETTINGS max_threads = 8
-    </fill_query>
+    <query>
+        SELECT
+            reinterpretAsUInt8(a),
+            reinterpretAsUInt8(b),
+            reinterpretAsUInt8(c),
+            reinterpretAsUInt8(d),
+            reinterpretAsUInt8(f),
+            reinterpretAsUInt8(g),
 
-    <drop_query>DROP TABLE IF EXISTS t</drop_query>
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsUInt16(a),
+            reinterpretAsUInt16(b),
+            reinterpretAsUInt16(c),
+            reinterpretAsUInt16(d),
+            reinterpretAsUInt16(f),
+            reinterpretAsUInt16(g),
 
-    <query>SELECT reinterpretAsUInt8(a), reinterpretAsUInt8(b), reinterpretAsUInt8(c), reinterpretAsUInt8(d), reinterpretAsUInt8(f), reinterpretAsUInt8(g) FROM t FORMAT Null</query>
-    <query>SELECT reinterpretAsUInt16(a), reinterpretAsUInt16(b), reinterpretAsUInt16(c), reinterpretAsUInt16(d), reinterpretAsUInt16(f), reinterpretAsUInt16(g) FROM t FORMAT Null</query>
-    <query>SELECT reinterpretAsUInt32(a), reinterpretAsUInt32(b), reinterpretAsUInt32(c), reinterpretAsUInt32(d), reinterpretAsUInt32(f), reinterpretAsUInt32(g) FROM t FORMAT Null</query>
-    <query>SELECT reinterpretAsUInt64(a), reinterpretAsUInt64(b), reinterpretAsUInt64(c), reinterpretAsUInt64(d), reinterpretAsUInt64(f), reinterpretAsUInt64(g) FROM t FORMAT Null</query>
-    <query>SELECT reinterpretAsUInt256(a), reinterpretAsUInt256(b), reinterpretAsUInt256(c), reinterpretAsUInt256(d), reinterpretAsUInt256(f), reinterpretAsUInt256(g) FROM t FORMAT Null</query>
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsUInt32(a),
+            reinterpretAsUInt32(b),
+            reinterpretAsUInt32(c),
+            reinterpretAsUInt32(d),
+            reinterpretAsUInt32(f),
+            reinterpretAsUInt32(g),
 
-    <query>SELECT reinterpretAsInt8(a), reinterpretAsInt8(b), reinterpretAsInt8(c), reinterpretAsInt8(d), reinterpretAsInt8(f), reinterpretAsInt8(g) FROM t FORMAT Null</query>
-    <query>SELECT reinterpretAsInt16(a), reinterpretAsInt16(b), reinterpretAsInt16(c), reinterpretAsInt16(d), reinterpretAsInt16(f), reinterpretAsInt16(g) FROM t FORMAT Null</query>
-    <query>SELECT reinterpretAsInt32(a), reinterpretAsInt32(b), reinterpretAsInt32(c), reinterpretAsInt32(d), reinterpretAsInt32(f), reinterpretAsInt32(g) FROM t FORMAT Null</query>
-    <query>SELECT reinterpretAsInt64(a), reinterpretAsInt64(b), reinterpretAsInt64(c), reinterpretAsInt64(d), reinterpretAsInt64(f), reinterpretAsInt64(g) FROM t FORMAT Null</query>
-    <query>SELECT reinterpretAsInt128(a), reinterpretAsInt128(b), reinterpretAsInt128(c), reinterpretAsInt128(d), reinterpretAsInt128(f), reinterpretAsInt64(g) FROM t FORMAT Null</query>
-    <query>SELECT reinterpretAsInt256(a), reinterpretAsInt256(b), reinterpretAsInt256(c), reinterpretAsInt256(d), reinterpretAsInt256(f), reinterpretAsInt256(g) FROM t FORMAT Null</query>
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsUInt64(a),
+            reinterpretAsUInt64(b),
+            reinterpretAsUInt64(c),
+            reinterpretAsUInt64(d),
+            reinterpretAsUInt64(f),
+            reinterpretAsUInt64(g),
 
-    <query>SELECT reinterpretAsString(a), reinterpretAsString(b), reinterpretAsString(c), reinterpretAsString(d), reinterpretAsInt128(f), reinterpretAsString(g) FROM t FORMAT Null</query>
-    <query>SELECT reinterpretAsFixedString(a), reinterpretAsFixedString(b), reinterpretAsFixedString(c), reinterpretAsFixedString(d), reinterpretAsFixedString(g) FROM t FORMAT Null</query>
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsUInt256(a),
+            reinterpretAsUInt256(b),
+            reinterpretAsUInt256(c),
+            reinterpretAsUInt256(d),
+            reinterpretAsUInt256(f),
+            reinterpretAsUInt256(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+
+    <query>
+        SELECT
+            reinterpretAsInt8(a),
+            reinterpretAsInt8(b),
+            reinterpretAsInt8(c),
+            reinterpretAsInt8(d),
+            reinterpretAsInt8(f),
+            reinterpretAsInt8(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsInt16(a),
+            reinterpretAsInt16(b),
+            reinterpretAsInt16(c),
+            reinterpretAsInt16(d),
+            reinterpretAsInt16(f),
+            reinterpretAsInt16(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsInt32(a),
+            reinterpretAsInt32(b),
+            reinterpretAsInt32(c),
+            reinterpretAsInt32(d),
+            reinterpretAsInt32(f),
+            reinterpretAsInt32(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsInt64(a),
+            reinterpretAsInt64(b),
+            reinterpretAsInt64(c),
+            reinterpretAsInt64(d),
+            reinterpretAsInt64(f),
+            reinterpretAsInt64(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsInt128(a),
+            reinterpretAsInt128(b),
+            reinterpretAsInt128(c),
+            reinterpretAsInt128(d),
+            reinterpretAsInt128(f),
+            reinterpretAsInt128(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    <query>
+        SELECT
+            reinterpretAsInt256(a),
+            reinterpretAsInt256(b),
+            reinterpretAsInt256(c),
+            reinterpretAsInt256(d),
+            reinterpretAsInt256(f),
+            reinterpretAsInt256(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+
+    <query>
+        SELECT
+            reinterpretAsString(a),
+            reinterpretAsString(b),
+            reinterpretAsString(c),
+            reinterpretAsString(d),
+            reinterpretAsString(f),
+            reinterpretAsString(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
+    
+    <query>
+         SELECT
+            reinterpretAsFixedString(a),
+            reinterpretAsFixedString(b),
+            reinterpretAsFixedString(c),
+            reinterpretAsFixedString(d),
+            reinterpretAsFixedString(g),
+
+            toUInt64(number) as a,
+            toUInt256(number) as b,
+            toInt128(number) as c,
+            toInt256(number) as d,
+            toString(number) as f,
+            toFixedString(f, 20) as g
+        FROM numbers_mt(200000000)
+        SETTINGS max_threads = 8
+        FORMAT Null
+    </query>
 
 </test>

From 0073c87d5d2e80a054468255b021acdbe5ceb660 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Tue, 2 Feb 2021 13:32:42 +0300
Subject: [PATCH 0530/1238] fix

---
 src/Databases/DatabaseAtomic.cpp          |  2 +-
 src/Interpreters/DDLWorker.cpp            |  2 +-
 src/Interpreters/InterpreterDropQuery.cpp |  2 +-
 src/Storages/StorageMaterializedView.cpp  | 12 ++++++------
 src/Storages/StorageMaterializedView.h    |  2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index 8b75f439152..e6bc3bfcd44 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -131,7 +131,7 @@ void DatabaseAtomic::dropTable(const Context & context, const String & table_nam
     /// Remove the inner table (if any) to avoid deadlock
     /// (due to attempt to execute DROP from the worker thread)
     if (auto * mv = dynamic_cast<StorageMaterializedView *>(table.get()))
-        mv->dropInnerTable(no_delay);
+        mv->dropInnerTable(no_delay, context);
     /// Notify DatabaseCatalog that table was dropped. It will remove table data in background.
     /// Cleanup is performed outside of database to allow easily DROP DATABASE without waiting for cleanup to complete.
     DatabaseCatalog::instance().enqueueDroppedTableCleanup(table->getStorageID(), table, table_metadata_path_drop, no_delay);
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index fabb9f9563e..dd822e0f237 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -484,7 +484,7 @@ void DDLWorker::processTask(DDLTaskBase & task)
                     auto table_id = context.tryResolveStorageID(*query_with_table, Context::ResolveOrdinary);
                     DatabasePtr database;
                     std::tie(database, storage) = DatabaseCatalog::instance().tryGetDatabaseAndTable(table_id, context);
-                    if (database && database->getEngineName() == "Replicated")
+                    if (database && database->getEngineName() == "Replicated" && !typeid_cast<const DatabaseReplicatedTask *>(&task))
                         throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER queries are not allowed for Replicated databases");
                 }
 
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index eed7337b9ab..68680f27ea4 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -127,7 +127,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
         table_id.uuid = database->tryGetTableUUID(table_id.table_name);
 
         /// Prevents recursive drop from drop database query. The original query must specify a table.
-        bool is_drop_or_detach_database = query.table.empty();
+        bool is_drop_or_detach_database = query_ptr->as<ASTDropQuery>()->table.empty();
         bool is_replicated_ddl_query = typeid_cast<DatabaseReplicated *>(database.get()) &&
                                        context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY &&
                                        !is_drop_or_detach_database;
diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp
index 29aea3e6150..fb75a933910 100644
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@@ -194,7 +194,7 @@ BlockOutputStreamPtr StorageMaterializedView::write(const ASTPtr & query, const
 }
 
 
-static void executeDropQuery(ASTDropQuery::Kind kind, Context & global_context, const StorageID & target_table_id, bool no_delay)
+static void executeDropQuery(ASTDropQuery::Kind kind, const Context & global_context, const StorageID & target_table_id, bool no_delay)
 {
     if (DatabaseCatalog::instance().tryGetTable(target_table_id, global_context))
     {
@@ -220,19 +220,19 @@ void StorageMaterializedView::drop()
     if (!select_query.select_table_id.empty())
         DatabaseCatalog::instance().removeDependency(select_query.select_table_id, table_id);
 
-    dropInnerTable(true);
+    dropInnerTable(true, global_context);
 }
 
-void StorageMaterializedView::dropInnerTable(bool no_delay)
+void StorageMaterializedView::dropInnerTable(bool no_delay, const Context & context)
 {
     if (has_inner_table && tryGetTargetTable())
-        executeDropQuery(ASTDropQuery::Kind::Drop, global_context, target_table_id, no_delay);
+        executeDropQuery(ASTDropQuery::Kind::Drop, context, target_table_id, no_delay);
 }
 
-void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &)
+void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context & context, TableExclusiveLockHolder &)
 {
     if (has_inner_table)
-        executeDropQuery(ASTDropQuery::Kind::Truncate, global_context, target_table_id, true);
+        executeDropQuery(ASTDropQuery::Kind::Truncate, context, target_table_id, true);
 }
 
 void StorageMaterializedView::checkStatementCanBeForwarded() const
diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h
index fab9e28afe3..94e4295cd34 100644
--- a/src/Storages/StorageMaterializedView.h
+++ b/src/Storages/StorageMaterializedView.h
@@ -37,7 +37,7 @@ public:
     BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override;
 
     void drop() override;
-    void dropInnerTable(bool no_delay);
+    void dropInnerTable(bool no_delay, const Context & context);
 
     void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override;
 

From 998aed30291e18d3064703d2f42639dc1f3c8a4e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 2 Feb 2021 14:58:35 +0300
Subject: [PATCH 0531/1238] Do not use inputs which values are known constants
 in ActionsDAG.

---
 src/Interpreters/ActionsDAG.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index d8c40ffda2f..4bed322076a 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -39,7 +39,17 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
     for (const auto & input : inputs_)
     {
         if (input.column && isColumnConst(*input.column))
+        {
             addInput(input, true);
+
+            /// Here we also add column.
+            /// It will allow to remove input which is actually constant (after projection).
+            /// Also, some transforms from query pipeline may randomly materialize constants,
+            ///   without any respect to header structure. So, it is a way to drop materialized column and use
+            ///   constant value from header.
+            /// We cannot remove such input right now cause inputs positions are important in some cases.
+            addColumn(input, true);
+        }
         else
             addInput(input.name, input.type, true);
     }

From 10a4cd7b8ac5111539b4648f5c5e1789225679ee Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Tue, 2 Feb 2021 15:09:36 +0300
Subject: [PATCH 0532/1238] First draft

---
 docs/en/operations/settings/settings.md       | 52 +++++++++++++++++++
 .../sql-reference/statements/create/view.md   |  2 +
 2 files changed, 54 insertions(+)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index edfd391c71e..09328015712 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2592,4 +2592,56 @@ Possible values:
 
 Default value: `16`.
 
+## optimize_on_insert {#optimize-on-insert}
+
+Do the same transformation for inserted block of data as if merge was done on this block.
+
+Possible values:
+
+-   0 — Disabled.
+-   1 — Enabled.
+
+Default value: 1.
+
+**Example**
+
+The difference enabled and disabled:
+
+Query:
+
+```sql
+SET optimize_on_insert = 1;
+
+CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable;
+
+INSERT INTO test1 SELECT number % 2 FROM numbers(5);
+
+SELECT * FROM test1;
+
+SET optimize_on_insert = 0;
+
+CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable;
+
+INSERT INTO test2 SELECT number % 2 FROM numbers(5);
+
+SELECT * FROM test2;
+```
+
+Result:
+
+``` text
+┌─FirstTable─┐
+│          0 │
+│          1 │
+└────────────┘
+
+┌─SecondTable─┐
+│           0 │
+│           0 │
+│           0 │
+│           1 │
+│           1 │
+└─────────────┘
+```
+
 [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 4370735b8d9..e5b92bf187a 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -59,6 +59,8 @@ A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Note
 
 The execution of [ALTER](../../../sql-reference/statements/alter/index.md) queries on materialized views has limitations, so they might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view.
 
+Note that Materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) settings, because there is a merge of data before the insertion in a view.
+
 Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query.
 
 There isn’t a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md).

From b4a3795473b4d5e446e39692de79722ca1a40eba Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 2 Feb 2021 15:38:08 +0300
Subject: [PATCH 0533/1238] Fix config path

---
 tests/config/install.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/config/install.sh b/tests/config/install.sh
index 6f620ef6404..9965e1fb1ad 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -29,7 +29,7 @@ ln -sf $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/max_concurrent_queries.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/test_cluster_with_incorrect_pw.xml $DEST_SERVER_PATH/config.d/
-ln -sf $SRC_PATH/config.d/nu_keeper_port.xml $DEST_SERVER_PATH/config.d/
+ln -sf $SRC_PATH/config.d/test_keeper_port.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/logging_no_rotate.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/tcp_with_proxy.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/top_level_domains_lists.xml $DEST_SERVER_PATH/config.d/

From 94201ebf0411b18fecb0a8d63fbb2ec7b9bfb953 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 2 Feb 2021 15:41:09 +0300
Subject: [PATCH 0534/1238] More info in test

---
 tests/integration/test_testkeeper_multinode/test.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index 7b9430b2368..16ca00124a5 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -237,5 +237,15 @@ def test_blocade_leader_twice(started_cluster):
             assert False, "Cannot reconnect for node{}".format(n + 1)
 
     assert node1.query("SELECT COUNT() FROM t2") == "510\n"
+    if node2.query("SELECT COUNT() FROM t2") != "510\n":
+        print(node2.query("SELECT * FROM system.replication_queue FORMAT Vertical"))
+        print("Replicas")
+        print(node2.query("SELECT * FROM system.replicas FORMAT Vertical"))
+        print("Replica 2 info")
+        print(node2.query("SELECT * FROM system.zookeeper WHERE path = '/clickhouse/t2/replicas/2' FORMAT Vertical"))
+        print("Queue")
+        print(node2.query("SELECT * FROM system.zookeeper WHERE path = '/clickhouse/t2/replicas/2/queue' FORMAT Vertical"))
+        print("Log")
+        print(node2.query("SELECT * FROM system.zookeeper WHERE path = '/clickhouse/t2/log' FORMAT Vertical"))
     assert node2.query("SELECT COUNT() FROM t2") == "510\n"
     assert node3.query("SELECT COUNT() FROM t2") == "510\n"

From b4afc49d3c54154d0b41e67d592e73c65deec5c1 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 2 Feb 2021 17:21:43 +0300
Subject: [PATCH 0535/1238] update perf tests

sed -i s'/^<test.*$/<test>/g' tests/performance/*.xml

WITH ceil(max(q[3]), 1) AS h
SELECT concat('sed -i s\'/^<test.*$/<test max_ignored_relative_change="', toString(h), '">/g\' tests/performance/', test, '.xml') AS s
FROM
(
    SELECT
        test,
        query_index,
        count(*),
        min(event_time),
        max(event_time) AS t,
        arrayMap(x -> floor(x, 3), quantiles(0, 0.5, 0.95, 1)(stat_threshold)) AS q,
        median(stat_threshold) AS m
    FROM perftest.query_metrics_v2
    WHERE ((pr_number != 0) AND (event_date > '2021-01-01')) AND (metric = 'client_time') AND (abs(diff) < 0.05) AND (old_value > 0.1)
    GROUP BY
        test,
        query_index,
        query_display_name
    HAVING (t > '2021-01-01 00:00:00') AND ((q[3]) > 0.1)
    ORDER BY test DESC
)
GROUP BY test
ORDER BY h DESC
FORMAT PrettySpace
---
 tests/performance/ColumnMap.xml                      |  2 +-
 .../array_index_low_cardinality_strings.xml          |  2 +-
 tests/performance/casts.xml                          |  2 +-
 tests/performance/conditional.xml                    |  2 +-
 tests/performance/decimal_casts.xml                  |  2 +-
 tests/performance/general_purpose_hashes.xml         |  2 +-
 tests/performance/group_by_sundy_li.xml              | 12 ++++++++----
 tests/performance/if_array_num.xml                   |  2 +-
 tests/performance/jit_small_requests.xml             |  2 +-
 tests/performance/joins_in_memory_pmj.xml            |  2 +-
 tests/performance/number_formatting_formats.xml      |  2 +-
 tests/performance/or_null_default.xml                |  2 +-
 tests/performance/polymorphic_parts_l.xml            |  2 +-
 tests/performance/polymorphic_parts_m.xml            |  2 +-
 tests/performance/polymorphic_parts_s.xml            |  2 +-
 tests/performance/read_in_order_many_parts.xml       |  2 +-
 tests/performance/set_index.xml                      |  2 +-
 tests/performance/uniq.xml                           |  2 +-
 18 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/tests/performance/ColumnMap.xml b/tests/performance/ColumnMap.xml
index 96f2026f870..f6393985377 100644
--- a/tests/performance/ColumnMap.xml
+++ b/tests/performance/ColumnMap.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
 
     <settings>
         <allow_experimental_map_type>1</allow_experimental_map_type>
diff --git a/tests/performance/array_index_low_cardinality_strings.xml b/tests/performance/array_index_low_cardinality_strings.xml
index 896a5923a9e..bbfea083f0a 100644
--- a/tests/performance/array_index_low_cardinality_strings.xml
+++ b/tests/performance/array_index_low_cardinality_strings.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <create_query>DROP TABLE IF EXISTS perf_lc_str</create_query>
     <create_query>CREATE TABLE perf_lc_str(
         str LowCardinality(String),
diff --git a/tests/performance/casts.xml b/tests/performance/casts.xml
index 17a1c774353..aeb0674b9bf 100644
--- a/tests/performance/casts.xml
+++ b/tests/performance/casts.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <settings>
         <max_memory_usage>15G</max_memory_usage>
     </settings>
diff --git a/tests/performance/conditional.xml b/tests/performance/conditional.xml
index 91b6cb95ff2..21623f45b05 100644
--- a/tests/performance/conditional.xml
+++ b/tests/performance/conditional.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(if(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04')))</query>
     <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04')))</query>
     <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(if(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')]))</query>
diff --git a/tests/performance/decimal_casts.xml b/tests/performance/decimal_casts.xml
index f087d0938c1..6dd38b6a06a 100644
--- a/tests/performance/decimal_casts.xml
+++ b/tests/performance/decimal_casts.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <settings>
         <max_memory_usage>15G</max_memory_usage>
     </settings>
diff --git a/tests/performance/general_purpose_hashes.xml b/tests/performance/general_purpose_hashes.xml
index f34554360cf..bd2fa9674f6 100644
--- a/tests/performance/general_purpose_hashes.xml
+++ b/tests/performance/general_purpose_hashes.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <substitutions>
         <substitution>
            <name>gp_hash_func</name>
diff --git a/tests/performance/group_by_sundy_li.xml b/tests/performance/group_by_sundy_li.xml
index 3fcc4acf88d..762a07b8702 100644
--- a/tests/performance/group_by_sundy_li.xml
+++ b/tests/performance/group_by_sundy_li.xml
@@ -1,4 +1,8 @@
 <test max_ignored_relative_change="0.2">
+    <settings>
+        <max_insert_threads>8</max_insert_threads>
+    </settings>
+
     <create_query>
 CREATE TABLE a
 (
@@ -10,10 +14,10 @@ PARTITION BY d
 ORDER BY d
     </create_query>
 
-    <fill_query>insert into a select '2000-01-01', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-02', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-03', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-04', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-01', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers_mt(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-02', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers_mt(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-03', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers_mt(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-04', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers_mt(100000000)</fill_query>
 
     <fill_query>OPTIMIZE TABLE a FINAL</fill_query>
 
diff --git a/tests/performance/if_array_num.xml b/tests/performance/if_array_num.xml
index 26d16cbfb97..f3f418b809c 100644
--- a/tests/performance/if_array_num.xml
+++ b/tests/performance/if_array_num.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
 
 
diff --git a/tests/performance/jit_small_requests.xml b/tests/performance/jit_small_requests.xml
index d8f917fb9af..c9abec0926b 100644
--- a/tests/performance/jit_small_requests.xml
+++ b/tests/performance/jit_small_requests.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <query>
         WITH
             bitXor(number, 0x4CF2D2BAAE6DA887) AS x0,
diff --git a/tests/performance/joins_in_memory_pmj.xml b/tests/performance/joins_in_memory_pmj.xml
index 1142fdd8222..5dd4395513d 100644
--- a/tests/performance/joins_in_memory_pmj.xml
+++ b/tests/performance/joins_in_memory_pmj.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.9">
+<test max_ignored_relative_change="1.3">
     <create_query>CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory</create_query>
 
     <settings>
diff --git a/tests/performance/number_formatting_formats.xml b/tests/performance/number_formatting_formats.xml
index 92e04a62024..77b39da8e92 100644
--- a/tests/performance/number_formatting_formats.xml
+++ b/tests/performance/number_formatting_formats.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <substitutions>
         <substitution>
             <name>format</name>
diff --git a/tests/performance/or_null_default.xml b/tests/performance/or_null_default.xml
index 009719f66a5..6fed0cce4d6 100644
--- a/tests/performance/or_null_default.xml
+++ b/tests/performance/or_null_default.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.3">
     <query>SELECT sumOrNull(number) FROM numbers(100000000)</query>
     <query>SELECT sumOrDefault(toNullable(number)) FROM numbers(100000000)</query>
     <query>SELECT sumOrNull(number) FROM numbers(10000000) GROUP BY number % 1024</query>
diff --git a/tests/performance/polymorphic_parts_l.xml b/tests/performance/polymorphic_parts_l.xml
index d2ae9417bf7..539422aa6e3 100644
--- a/tests/performance/polymorphic_parts_l.xml
+++ b/tests/performance/polymorphic_parts_l.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <create_query>
         CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree()
         PARTITION BY toYYYYMM(EventDate)
diff --git a/tests/performance/polymorphic_parts_m.xml b/tests/performance/polymorphic_parts_m.xml
index 54a81def55e..ed29ac43d64 100644
--- a/tests/performance/polymorphic_parts_m.xml
+++ b/tests/performance/polymorphic_parts_m.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <create_query>
         CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree()
         PARTITION BY toYYYYMM(EventDate)
diff --git a/tests/performance/polymorphic_parts_s.xml b/tests/performance/polymorphic_parts_s.xml
index 5021e135bb9..0496692b087 100644
--- a/tests/performance/polymorphic_parts_s.xml
+++ b/tests/performance/polymorphic_parts_s.xml
@@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
     <create_query>
         CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree()
         PARTITION BY toYYYYMM(EventDate)
diff --git a/tests/performance/read_in_order_many_parts.xml b/tests/performance/read_in_order_many_parts.xml
index c10d00c436f..ca713935426 100644
--- a/tests/performance/read_in_order_many_parts.xml
+++ b/tests/performance/read_in_order_many_parts.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <settings>
         <optimize_aggregation_in_order>1</optimize_aggregation_in_order>
         <optimize_read_in_order>1</optimize_read_in_order>
diff --git a/tests/performance/set_index.xml b/tests/performance/set_index.xml
index fd018b79597..76f1087a1bf 100644
--- a/tests/performance/set_index.xml
+++ b/tests/performance/set_index.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
     <create_query>CREATE TABLE test_in (`a` UInt32) ENGINE = MergeTree() ORDER BY a</create_query>
     <fill_query>INSERT INTO test_in SELECT number FROM numbers(500000000)</fill_query>
 
diff --git a/tests/performance/uniq.xml b/tests/performance/uniq.xml
index 7a35c6fb704..b4e73733769 100644
--- a/tests/performance/uniq.xml
+++ b/tests/performance/uniq.xml
@@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.7">
+<test>
 
     <preconditions>
         <table_exists>hits_100m_single</table_exists>

From b9647e5326aaa4c138131e054b4d436e6a2389ad Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 2 Feb 2021 17:22:03 +0300
Subject: [PATCH 0536/1238] Fix double whitespace

---
 tests/performance/group_by_sundy_li.xml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/performance/group_by_sundy_li.xml b/tests/performance/group_by_sundy_li.xml
index 3fcc4acf88d..dd512f7a53c 100644
--- a/tests/performance/group_by_sundy_li.xml
+++ b/tests/performance/group_by_sundy_li.xml
@@ -10,10 +10,10 @@ PARTITION BY d
 ORDER BY d
     </create_query>
 
-    <fill_query>insert into a select '2000-01-01', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-02', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-03', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-04', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-01', ['aa','bb','cc','dd'][number % 4 + 1] from numbers(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-02', ['aa','bb','cc','dd'][number % 4 + 1] from numbers(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-03', ['aa','bb','cc','dd'][number % 4 + 1] from numbers(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-04', ['aa','bb','cc','dd'][number % 4 + 1] from numbers(100000000)</fill_query>
 
     <fill_query>OPTIMIZE TABLE a FINAL</fill_query>
 

From b2d779079498e76d670c1736be560202c048234c Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 2 Feb 2021 17:30:54 +0300
Subject: [PATCH 0537/1238] Fix test.

---
 src/Storages/MergeTree/MergeTreeRangeReader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
index 4e5e7e6f946..6d4d8737f10 100644
--- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@@ -867,7 +867,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
     if (result.totalRowsPerGranule() == 0)
         result.setFilterConstFalse();
     /// If we need to filter in PREWHERE
-    else if (prewhere->need_filter || result.need_filter)
+    else if (prewhere->need_filter || result.need_filter || prewhere->remove_prewhere_column)
     {
         /// If there is a filter and without optimized
         if (result.getFilter() && last_reader_in_chain)

From 9beb7689cc2e4a239d640a6b7ce19b0a1b80586d Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Tue, 2 Feb 2021 23:35:45 +0800
Subject: [PATCH 0538/1238] add comments and update test cases.

---
 base/common/DateLUTImpl.h                     |   18 +-
 src/Functions/DateTimeTransforms.h            |    6 +-
 src/Functions/timezoneOffset.cpp              |    2 +-
 .../01699_timezoneOffset.reference            | 1381 +++--------------
 .../0_stateless/01699_timezoneOffset.sql      |   48 +-
 5 files changed, 235 insertions(+), 1220 deletions(-)
 mode change 100644 => 100755 tests/queries/0_stateless/01699_timezoneOffset.sql

diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h
index 3ec397dc051..23c78f6e7fc 100644
--- a/base/common/DateLUTImpl.h
+++ b/base/common/DateLUTImpl.h
@@ -274,23 +274,31 @@ public:
         return res / 3600;
     }
 
-
+    /** Calculating offset from UTC in seconds.
+     * which means Using the same literal time of "t" to get the corresponding timestamp in UTC,
+     * then subtract the former from the latter to get the offset result.
+     * The boundaries when meets DST(daylight saving time) change should be handled very carefully.
+     */
     inline time_t timezoneOffset(time_t t) const
     {
         DayNum index = findIndex(t);
 
-        /// Calculate daylight saving offset first, ignore the leap seconds
+        /// Calculate daylight saving offset first.
+        /// Because the "amount_of_offset_change" in LUT entry only exists in the change day, it's costly to scan it from the very begin.
+        /// but we can figure out all the accumulated offsets from 1970-01-01 to that day just by get the whole difference between lut[].date,
+        /// and then, we can directly subtract multiple 86400s to get the real DST offsets for the leap seconds is not considered now.
         time_t res = (lut[index].date - lut[0].date) % 86400;
+        /// As so far to know, the maximal DST offset couldn't be more than 2 hours, so after the modulo operation the remainder
+        /// will sits between [-offset --> 0 --> offset] which respectively corresponds to moving clock forward or backward.
         res = res > 43200 ? (86400 - res) : (0 - res);
-        /// Check if has a offset change during this day
+
+        /// Check if has a offset change during this day. Add the change when cross the line
         if (lut[index].amount_of_offset_change != 0 && t >= lut[index].date + lut[index].time_at_offset_change)
             res += lut[index].amount_of_offset_change;
 
         return res + offset_at_start_of_epoch;
     }
 
-
-
     /** Only for time zones with/when offset from UTC is multiple of five minutes.
       * This is true for all time zones: right now, all time zones have an offset that is multiple of 15 minutes.
       *
diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index 883c6ed07a3..b55f78e71bd 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -407,7 +407,7 @@ struct ToHourImpl
     using FactorTransform = ToDateImpl;
 };
 
-struct timezoneOffsetImpl
+struct TimezoneOffsetImpl
 {
     static constexpr auto name = "timezoneOffset";
 
@@ -420,8 +420,8 @@ struct timezoneOffsetImpl
     {
         return dateIsNotSupported(name);
     }
-/////need to do
-    using FactorTransform = ToDateImpl;
+
+    using FactorTransform = ToTimeImpl;
 };
 
 struct ToMinuteImpl
diff --git a/src/Functions/timezoneOffset.cpp b/src/Functions/timezoneOffset.cpp
index 62e844e691b..5acdb105b2b 100644
--- a/src/Functions/timezoneOffset.cpp
+++ b/src/Functions/timezoneOffset.cpp
@@ -7,7 +7,7 @@
 namespace DB
 {
 
-using FunctiontimezoneOffset = FunctionDateOrDateTimeToSomething<DataTypeInt32, timezoneOffsetImpl>;
+using FunctiontimezoneOffset = FunctionDateOrDateTimeToSomething<DataTypeInt32, TimezoneOffsetImpl>;
 
 void registerFunctiontimezoneOffset(FunctionFactory & factory)
 {
diff --git a/tests/queries/0_stateless/01699_timezoneOffset.reference b/tests/queries/0_stateless/01699_timezoneOffset.reference
index f9a521dc460..e70c5fa62ee 100644
--- a/tests/queries/0_stateless/01699_timezoneOffset.reference
+++ b/tests/queries/0_stateless/01699_timezoneOffset.reference
@@ -1,1202 +1,183 @@
-0	1981-04-01 00:00:00	10800	354920400
-1	1981-04-01 00:10:00	10800	354921000
-2	1981-04-01 00:20:00	10800	354921600
-3	1981-04-01 00:30:00	10800	354922200
-4	1981-04-01 00:40:00	10800	354922800
-5	1981-04-01 00:50:00	10800	354923400
-6	1981-04-01 01:00:00	10800	354924000
-7	1981-04-01 01:10:00	10800	354924600
-8	1981-04-01 01:20:00	10800	354925200
-9	1981-04-01 01:30:00	10800	354925800
-10	1981-04-01 01:40:00	10800	354926400
-11	1981-04-01 01:50:00	10800	354927000
-12	1981-04-01 02:00:00	10800	354927600
-13	1981-04-01 02:10:00	10800	354928200
-14	1981-04-01 02:20:00	10800	354928800
-15	1981-04-01 02:30:00	10800	354929400
-16	1981-04-01 02:40:00	10800	354930000
-17	1981-04-01 02:50:00	10800	354930600
-18	1981-04-01 03:00:00	10800	354931200
-19	1981-04-01 03:10:00	10800	354931800
-20	1981-04-01 03:20:00	10800	354932400
-21	1981-04-01 03:30:00	10800	354933000
-22	1981-04-01 03:40:00	10800	354933600
-23	1981-04-01 03:50:00	10800	354934200
-24	1981-04-01 04:00:00	10800	354934800
-25	1981-04-01 04:10:00	10800	354935400
-26	1981-04-01 04:20:00	10800	354936000
-27	1981-04-01 04:30:00	10800	354936600
-28	1981-04-01 04:40:00	10800	354937200
-29	1981-04-01 04:50:00	10800	354937800
-30	1981-04-01 05:00:00	10800	354938400
-31	1981-04-01 05:10:00	10800	354939000
-32	1981-04-01 05:20:00	10800	354939600
-33	1981-04-01 05:30:00	10800	354940200
-34	1981-04-01 05:40:00	10800	354940800
-35	1981-04-01 05:50:00	10800	354941400
-36	1981-04-01 06:00:00	10800	354942000
-37	1981-04-01 06:10:00	10800	354942600
-38	1981-04-01 06:20:00	10800	354943200
-39	1981-04-01 06:30:00	10800	354943800
-40	1981-04-01 06:40:00	10800	354944400
-41	1981-04-01 06:50:00	10800	354945000
-42	1981-04-01 07:00:00	10800	354945600
-43	1981-04-01 07:10:00	10800	354946200
-44	1981-04-01 07:20:00	10800	354946800
-45	1981-04-01 07:30:00	10800	354947400
-46	1981-04-01 07:40:00	10800	354948000
-47	1981-04-01 07:50:00	10800	354948600
-48	1981-04-01 08:00:00	10800	354949200
-49	1981-04-01 08:10:00	10800	354949800
-50	1981-04-01 08:20:00	10800	354950400
-51	1981-04-01 08:30:00	10800	354951000
-52	1981-04-01 08:40:00	10800	354951600
-53	1981-04-01 08:50:00	10800	354952200
-54	1981-04-01 09:00:00	10800	354952800
-55	1981-04-01 09:10:00	10800	354953400
-56	1981-04-01 09:20:00	10800	354954000
-57	1981-04-01 09:30:00	10800	354954600
-58	1981-04-01 09:40:00	10800	354955200
-59	1981-04-01 09:50:00	10800	354955800
-60	1981-04-01 10:00:00	10800	354956400
-61	1981-04-01 10:10:00	10800	354957000
-62	1981-04-01 10:20:00	10800	354957600
-63	1981-04-01 10:30:00	10800	354958200
-64	1981-04-01 10:40:00	10800	354958800
-65	1981-04-01 10:50:00	10800	354959400
-66	1981-04-01 11:00:00	10800	354960000
-67	1981-04-01 11:10:00	10800	354960600
-68	1981-04-01 11:20:00	10800	354961200
-69	1981-04-01 11:30:00	10800	354961800
-70	1981-04-01 11:40:00	10800	354962400
-71	1981-04-01 11:50:00	10800	354963000
-72	1981-04-01 12:00:00	10800	354963600
-73	1981-04-01 12:10:00	10800	354964200
-74	1981-04-01 12:20:00	10800	354964800
-75	1981-04-01 12:30:00	10800	354965400
-76	1981-04-01 12:40:00	10800	354966000
-77	1981-04-01 12:50:00	10800	354966600
-78	1981-04-01 13:00:00	10800	354967200
-79	1981-04-01 13:10:00	10800	354967800
-80	1981-04-01 13:20:00	10800	354968400
-81	1981-04-01 13:30:00	10800	354969000
-82	1981-04-01 13:40:00	10800	354969600
-83	1981-04-01 13:50:00	10800	354970200
-84	1981-04-01 14:00:00	10800	354970800
-85	1981-04-01 14:10:00	10800	354971400
-86	1981-04-01 14:20:00	10800	354972000
-87	1981-04-01 14:30:00	10800	354972600
-88	1981-04-01 14:40:00	10800	354973200
-89	1981-04-01 14:50:00	10800	354973800
-90	1981-04-01 15:00:00	10800	354974400
-91	1981-04-01 15:10:00	10800	354975000
-92	1981-04-01 15:20:00	10800	354975600
-93	1981-04-01 15:30:00	10800	354976200
-94	1981-04-01 15:40:00	10800	354976800
-95	1981-04-01 15:50:00	10800	354977400
-96	1981-04-01 16:00:00	10800	354978000
-97	1981-04-01 16:10:00	10800	354978600
-98	1981-04-01 16:20:00	10800	354979200
-99	1981-04-01 16:30:00	10800	354979800
-100	1981-04-01 16:40:00	10800	354980400
-101	1981-04-01 16:50:00	10800	354981000
-102	1981-04-01 17:00:00	10800	354981600
-103	1981-04-01 17:10:00	10800	354982200
-104	1981-04-01 17:20:00	10800	354982800
-105	1981-04-01 17:30:00	10800	354983400
-106	1981-04-01 17:40:00	10800	354984000
-107	1981-04-01 17:50:00	10800	354984600
-108	1981-04-01 18:00:00	10800	354985200
-109	1981-04-01 18:10:00	10800	354985800
-110	1981-04-01 18:20:00	10800	354986400
-111	1981-04-01 18:30:00	10800	354987000
-112	1981-04-01 18:40:00	10800	354987600
-113	1981-04-01 18:50:00	10800	354988200
-114	1981-04-01 19:00:00	10800	354988800
-115	1981-04-01 19:10:00	10800	354989400
-116	1981-04-01 19:20:00	10800	354990000
-117	1981-04-01 19:30:00	10800	354990600
-118	1981-04-01 19:40:00	10800	354991200
-119	1981-04-01 19:50:00	10800	354991800
-120	1981-04-01 20:00:00	10800	354992400
-121	1981-04-01 20:10:00	10800	354993000
-122	1981-04-01 20:20:00	10800	354993600
-123	1981-04-01 20:30:00	10800	354994200
-124	1981-04-01 20:40:00	10800	354994800
-125	1981-04-01 20:50:00	10800	354995400
-126	1981-04-01 21:00:00	10800	354996000
-127	1981-04-01 21:10:00	10800	354996600
-128	1981-04-01 21:20:00	10800	354997200
-129	1981-04-01 21:30:00	10800	354997800
-130	1981-04-01 21:40:00	10800	354998400
-131	1981-04-01 21:50:00	10800	354999000
-132	1981-04-01 22:00:00	10800	354999600
-133	1981-04-01 22:10:00	10800	355000200
-134	1981-04-01 22:20:00	10800	355000800
-135	1981-04-01 22:30:00	10800	355001400
-136	1981-04-01 22:40:00	10800	355002000
-137	1981-04-01 22:50:00	10800	355002600
-138	1981-04-02 00:00:00	14400	355003200
-139	1981-04-02 00:10:00	14400	355003800
-140	1981-04-02 00:20:00	14400	355004400
-141	1981-04-02 00:30:00	14400	355005000
-142	1981-04-02 00:40:00	14400	355005600
-143	1981-04-02 00:50:00	14400	355006200
-144	1981-04-02 01:00:00	14400	355006800
-145	1981-04-02 01:10:00	14400	355007400
-146	1981-04-02 01:20:00	14400	355008000
-147	1981-04-02 01:30:00	14400	355008600
-148	1981-04-02 01:40:00	14400	355009200
-149	1981-04-02 01:50:00	14400	355009800
-150	1981-04-02 02:00:00	14400	355010400
-151	1981-04-02 02:10:00	14400	355011000
-152	1981-04-02 02:20:00	14400	355011600
-153	1981-04-02 02:30:00	14400	355012200
-154	1981-04-02 02:40:00	14400	355012800
-155	1981-04-02 02:50:00	14400	355013400
-156	1981-04-02 03:00:00	14400	355014000
-157	1981-04-02 03:10:00	14400	355014600
-158	1981-04-02 03:20:00	14400	355015200
-159	1981-04-02 03:30:00	14400	355015800
-160	1981-04-02 03:40:00	14400	355016400
-161	1981-04-02 03:50:00	14400	355017000
-162	1981-04-02 04:00:00	14400	355017600
-163	1981-04-02 04:10:00	14400	355018200
-164	1981-04-02 04:20:00	14400	355018800
-165	1981-04-02 04:30:00	14400	355019400
-166	1981-04-02 04:40:00	14400	355020000
-167	1981-04-02 04:50:00	14400	355020600
-168	1981-04-02 05:00:00	14400	355021200
-169	1981-04-02 05:10:00	14400	355021800
-170	1981-04-02 05:20:00	14400	355022400
-171	1981-04-02 05:30:00	14400	355023000
-172	1981-04-02 05:40:00	14400	355023600
-173	1981-04-02 05:50:00	14400	355024200
-174	1981-04-02 06:00:00	14400	355024800
-175	1981-04-02 06:10:00	14400	355025400
-176	1981-04-02 06:20:00	14400	355026000
-177	1981-04-02 06:30:00	14400	355026600
-178	1981-04-02 06:40:00	14400	355027200
-179	1981-04-02 06:50:00	14400	355027800
-180	1981-04-02 07:00:00	14400	355028400
-181	1981-04-02 07:10:00	14400	355029000
-182	1981-04-02 07:20:00	14400	355029600
-183	1981-04-02 07:30:00	14400	355030200
-184	1981-04-02 07:40:00	14400	355030800
-185	1981-04-02 07:50:00	14400	355031400
-186	1981-04-02 08:00:00	14400	355032000
-187	1981-04-02 08:10:00	14400	355032600
-188	1981-04-02 08:20:00	14400	355033200
-189	1981-04-02 08:30:00	14400	355033800
-190	1981-04-02 08:40:00	14400	355034400
-191	1981-04-02 08:50:00	14400	355035000
-192	1981-04-02 09:00:00	14400	355035600
-193	1981-04-02 09:10:00	14400	355036200
-194	1981-04-02 09:20:00	14400	355036800
-195	1981-04-02 09:30:00	14400	355037400
-196	1981-04-02 09:40:00	14400	355038000
-197	1981-04-02 09:50:00	14400	355038600
-198	1981-04-02 10:00:00	14400	355039200
-199	1981-04-02 10:10:00	14400	355039800
-0	1981-09-30 00:00:00	14400	370641600
-1	1981-09-30 00:10:00	14400	370642200
-2	1981-09-30 00:20:00	14400	370642800
-3	1981-09-30 00:30:00	14400	370643400
-4	1981-09-30 00:40:00	14400	370644000
-5	1981-09-30 00:50:00	14400	370644600
-6	1981-09-30 01:00:00	14400	370645200
-7	1981-09-30 01:10:00	14400	370645800
-8	1981-09-30 01:20:00	14400	370646400
-9	1981-09-30 01:30:00	14400	370647000
-10	1981-09-30 01:40:00	14400	370647600
-11	1981-09-30 01:50:00	14400	370648200
-12	1981-09-30 02:00:00	14400	370648800
-13	1981-09-30 02:10:00	14400	370649400
-14	1981-09-30 02:20:00	14400	370650000
-15	1981-09-30 02:30:00	14400	370650600
-16	1981-09-30 02:40:00	14400	370651200
-17	1981-09-30 02:50:00	14400	370651800
-18	1981-09-30 03:00:00	14400	370652400
-19	1981-09-30 03:10:00	14400	370653000
-20	1981-09-30 03:20:00	14400	370653600
-21	1981-09-30 03:30:00	14400	370654200
-22	1981-09-30 03:40:00	14400	370654800
-23	1981-09-30 03:50:00	14400	370655400
-24	1981-09-30 04:00:00	14400	370656000
-25	1981-09-30 04:10:00	14400	370656600
-26	1981-09-30 04:20:00	14400	370657200
-27	1981-09-30 04:30:00	14400	370657800
-28	1981-09-30 04:40:00	14400	370658400
-29	1981-09-30 04:50:00	14400	370659000
-30	1981-09-30 05:00:00	14400	370659600
-31	1981-09-30 05:10:00	14400	370660200
-32	1981-09-30 05:20:00	14400	370660800
-33	1981-09-30 05:30:00	14400	370661400
-34	1981-09-30 05:40:00	14400	370662000
-35	1981-09-30 05:50:00	14400	370662600
-36	1981-09-30 06:00:00	14400	370663200
-37	1981-09-30 06:10:00	14400	370663800
-38	1981-09-30 06:20:00	14400	370664400
-39	1981-09-30 06:30:00	14400	370665000
-40	1981-09-30 06:40:00	14400	370665600
-41	1981-09-30 06:50:00	14400	370666200
-42	1981-09-30 07:00:00	14400	370666800
-43	1981-09-30 07:10:00	14400	370667400
-44	1981-09-30 07:20:00	14400	370668000
-45	1981-09-30 07:30:00	14400	370668600
-46	1981-09-30 07:40:00	14400	370669200
-47	1981-09-30 07:50:00	14400	370669800
-48	1981-09-30 08:00:00	14400	370670400
-49	1981-09-30 08:10:00	14400	370671000
-50	1981-09-30 08:20:00	14400	370671600
-51	1981-09-30 08:30:00	14400	370672200
-52	1981-09-30 08:40:00	14400	370672800
-53	1981-09-30 08:50:00	14400	370673400
-54	1981-09-30 09:00:00	14400	370674000
-55	1981-09-30 09:10:00	14400	370674600
-56	1981-09-30 09:20:00	14400	370675200
-57	1981-09-30 09:30:00	14400	370675800
-58	1981-09-30 09:40:00	14400	370676400
-59	1981-09-30 09:50:00	14400	370677000
-60	1981-09-30 10:00:00	14400	370677600
-61	1981-09-30 10:10:00	14400	370678200
-62	1981-09-30 10:20:00	14400	370678800
-63	1981-09-30 10:30:00	14400	370679400
-64	1981-09-30 10:40:00	14400	370680000
-65	1981-09-30 10:50:00	14400	370680600
-66	1981-09-30 11:00:00	14400	370681200
-67	1981-09-30 11:10:00	14400	370681800
-68	1981-09-30 11:20:00	14400	370682400
-69	1981-09-30 11:30:00	14400	370683000
-70	1981-09-30 11:40:00	14400	370683600
-71	1981-09-30 11:50:00	14400	370684200
-72	1981-09-30 12:00:00	14400	370684800
-73	1981-09-30 12:10:00	14400	370685400
-74	1981-09-30 12:20:00	14400	370686000
-75	1981-09-30 12:30:00	14400	370686600
-76	1981-09-30 12:40:00	14400	370687200
-77	1981-09-30 12:50:00	14400	370687800
-78	1981-09-30 13:00:00	14400	370688400
-79	1981-09-30 13:10:00	14400	370689000
-80	1981-09-30 13:20:00	14400	370689600
-81	1981-09-30 13:30:00	14400	370690200
-82	1981-09-30 13:40:00	14400	370690800
-83	1981-09-30 13:50:00	14400	370691400
-84	1981-09-30 14:00:00	14400	370692000
-85	1981-09-30 14:10:00	14400	370692600
-86	1981-09-30 14:20:00	14400	370693200
-87	1981-09-30 14:30:00	14400	370693800
-88	1981-09-30 14:40:00	14400	370694400
-89	1981-09-30 14:50:00	14400	370695000
-90	1981-09-30 15:00:00	14400	370695600
-91	1981-09-30 15:10:00	14400	370696200
-92	1981-09-30 15:20:00	14400	370696800
-93	1981-09-30 15:30:00	14400	370697400
-94	1981-09-30 15:40:00	14400	370698000
-95	1981-09-30 15:50:00	14400	370698600
-96	1981-09-30 16:00:00	14400	370699200
-97	1981-09-30 16:10:00	14400	370699800
-98	1981-09-30 16:20:00	14400	370700400
-99	1981-09-30 16:30:00	14400	370701000
-100	1981-09-30 16:40:00	14400	370701600
-101	1981-09-30 16:50:00	14400	370702200
-102	1981-09-30 17:00:00	14400	370702800
-103	1981-09-30 17:10:00	14400	370703400
-104	1981-09-30 17:20:00	14400	370704000
-105	1981-09-30 17:30:00	14400	370704600
-106	1981-09-30 17:40:00	14400	370705200
-107	1981-09-30 17:50:00	14400	370705800
-108	1981-09-30 18:00:00	14400	370706400
-109	1981-09-30 18:10:00	14400	370707000
-110	1981-09-30 18:20:00	14400	370707600
-111	1981-09-30 18:30:00	14400	370708200
-112	1981-09-30 18:40:00	14400	370708800
-113	1981-09-30 18:50:00	14400	370709400
-114	1981-09-30 19:00:00	14400	370710000
-115	1981-09-30 19:10:00	14400	370710600
-116	1981-09-30 19:20:00	14400	370711200
-117	1981-09-30 19:30:00	14400	370711800
-118	1981-09-30 19:40:00	14400	370712400
-119	1981-09-30 19:50:00	14400	370713000
-120	1981-09-30 20:00:00	14400	370713600
-121	1981-09-30 20:10:00	14400	370714200
-122	1981-09-30 20:20:00	14400	370714800
-123	1981-09-30 20:30:00	14400	370715400
-124	1981-09-30 20:40:00	14400	370716000
-125	1981-09-30 20:50:00	14400	370716600
-126	1981-09-30 21:00:00	14400	370717200
-127	1981-09-30 21:10:00	14400	370717800
-128	1981-09-30 21:20:00	14400	370718400
-129	1981-09-30 21:30:00	14400	370719000
-130	1981-09-30 21:40:00	14400	370719600
-131	1981-09-30 21:50:00	14400	370720200
-132	1981-09-30 22:00:00	14400	370720800
-133	1981-09-30 22:10:00	14400	370721400
-134	1981-09-30 22:20:00	14400	370722000
-135	1981-09-30 22:30:00	14400	370722600
-136	1981-09-30 22:40:00	14400	370723200
-137	1981-09-30 22:50:00	14400	370723800
-138	1981-09-30 23:00:00	14400	370724400
-139	1981-09-30 23:10:00	14400	370725000
-140	1981-09-30 23:20:00	14400	370725600
-141	1981-09-30 23:30:00	14400	370726200
-142	1981-09-30 23:40:00	14400	370726800
-143	1981-09-30 23:50:00	14400	370727400
-144	1981-09-30 23:00:00	10800	370728000
-145	1981-09-30 23:10:00	10800	370728600
-146	1981-09-30 23:20:00	10800	370729200
-147	1981-09-30 23:30:00	10800	370729800
-148	1981-09-30 23:40:00	10800	370730400
-149	1981-09-30 23:50:00	10800	370731000
-150	1981-10-01 00:00:00	10800	370731600
-151	1981-10-01 00:10:00	10800	370732200
-152	1981-10-01 00:20:00	10800	370732800
-153	1981-10-01 00:30:00	10800	370733400
-154	1981-10-01 00:40:00	10800	370734000
-155	1981-10-01 00:50:00	10800	370734600
-156	1981-10-01 01:00:00	10800	370735200
-157	1981-10-01 01:10:00	10800	370735800
-158	1981-10-01 01:20:00	10800	370736400
-159	1981-10-01 01:30:00	10800	370737000
-160	1981-10-01 01:40:00	10800	370737600
-161	1981-10-01 01:50:00	10800	370738200
-162	1981-10-01 02:00:00	10800	370738800
-163	1981-10-01 02:10:00	10800	370739400
-164	1981-10-01 02:20:00	10800	370740000
-165	1981-10-01 02:30:00	10800	370740600
-166	1981-10-01 02:40:00	10800	370741200
-167	1981-10-01 02:50:00	10800	370741800
-168	1981-10-01 03:00:00	10800	370742400
-169	1981-10-01 03:10:00	10800	370743000
-170	1981-10-01 03:20:00	10800	370743600
-171	1981-10-01 03:30:00	10800	370744200
-172	1981-10-01 03:40:00	10800	370744800
-173	1981-10-01 03:50:00	10800	370745400
-174	1981-10-01 04:00:00	10800	370746000
-175	1981-10-01 04:10:00	10800	370746600
-176	1981-10-01 04:20:00	10800	370747200
-177	1981-10-01 04:30:00	10800	370747800
-178	1981-10-01 04:40:00	10800	370748400
-179	1981-10-01 04:50:00	10800	370749000
-180	1981-10-01 05:00:00	10800	370749600
-181	1981-10-01 05:10:00	10800	370750200
-182	1981-10-01 05:20:00	10800	370750800
-183	1981-10-01 05:30:00	10800	370751400
-184	1981-10-01 05:40:00	10800	370752000
-185	1981-10-01 05:50:00	10800	370752600
-186	1981-10-01 06:00:00	10800	370753200
-187	1981-10-01 06:10:00	10800	370753800
-188	1981-10-01 06:20:00	10800	370754400
-189	1981-10-01 06:30:00	10800	370755000
-190	1981-10-01 06:40:00	10800	370755600
-191	1981-10-01 06:50:00	10800	370756200
-192	1981-10-01 07:00:00	10800	370756800
-193	1981-10-01 07:10:00	10800	370757400
-194	1981-10-01 07:20:00	10800	370758000
-195	1981-10-01 07:30:00	10800	370758600
-196	1981-10-01 07:40:00	10800	370759200
-197	1981-10-01 07:50:00	10800	370759800
-198	1981-10-01 08:00:00	10800	370760400
-199	1981-10-01 08:10:00	10800	370761000
-0	2020-03-21 00:00:00	12600	1584736200
-1	2020-03-21 00:10:00	12600	1584736800
-2	2020-03-21 00:20:00	12600	1584737400
-3	2020-03-21 00:30:00	12600	1584738000
-4	2020-03-21 00:40:00	12600	1584738600
-5	2020-03-21 00:50:00	12600	1584739200
-6	2020-03-21 01:00:00	12600	1584739800
-7	2020-03-21 01:10:00	12600	1584740400
-8	2020-03-21 01:20:00	12600	1584741000
-9	2020-03-21 01:30:00	12600	1584741600
-10	2020-03-21 01:40:00	12600	1584742200
-11	2020-03-21 01:50:00	12600	1584742800
-12	2020-03-21 02:00:00	12600	1584743400
-13	2020-03-21 02:10:00	12600	1584744000
-14	2020-03-21 02:20:00	12600	1584744600
-15	2020-03-21 02:30:00	12600	1584745200
-16	2020-03-21 02:40:00	12600	1584745800
-17	2020-03-21 02:50:00	12600	1584746400
-18	2020-03-21 03:00:00	12600	1584747000
-19	2020-03-21 03:10:00	12600	1584747600
-20	2020-03-21 03:20:00	12600	1584748200
-21	2020-03-21 03:30:00	12600	1584748800
-22	2020-03-21 03:40:00	12600	1584749400
-23	2020-03-21 03:50:00	12600	1584750000
-24	2020-03-21 04:00:00	12600	1584750600
-25	2020-03-21 04:10:00	12600	1584751200
-26	2020-03-21 04:20:00	12600	1584751800
-27	2020-03-21 04:30:00	12600	1584752400
-28	2020-03-21 04:40:00	12600	1584753000
-29	2020-03-21 04:50:00	12600	1584753600
-30	2020-03-21 05:00:00	12600	1584754200
-31	2020-03-21 05:10:00	12600	1584754800
-32	2020-03-21 05:20:00	12600	1584755400
-33	2020-03-21 05:30:00	12600	1584756000
-34	2020-03-21 05:40:00	12600	1584756600
-35	2020-03-21 05:50:00	12600	1584757200
-36	2020-03-21 06:00:00	12600	1584757800
-37	2020-03-21 06:10:00	12600	1584758400
-38	2020-03-21 06:20:00	12600	1584759000
-39	2020-03-21 06:30:00	12600	1584759600
-40	2020-03-21 06:40:00	12600	1584760200
-41	2020-03-21 06:50:00	12600	1584760800
-42	2020-03-21 07:00:00	12600	1584761400
-43	2020-03-21 07:10:00	12600	1584762000
-44	2020-03-21 07:20:00	12600	1584762600
-45	2020-03-21 07:30:00	12600	1584763200
-46	2020-03-21 07:40:00	12600	1584763800
-47	2020-03-21 07:50:00	12600	1584764400
-48	2020-03-21 08:00:00	12600	1584765000
-49	2020-03-21 08:10:00	12600	1584765600
-50	2020-03-21 08:20:00	12600	1584766200
-51	2020-03-21 08:30:00	12600	1584766800
-52	2020-03-21 08:40:00	12600	1584767400
-53	2020-03-21 08:50:00	12600	1584768000
-54	2020-03-21 09:00:00	12600	1584768600
-55	2020-03-21 09:10:00	12600	1584769200
-56	2020-03-21 09:20:00	12600	1584769800
-57	2020-03-21 09:30:00	12600	1584770400
-58	2020-03-21 09:40:00	12600	1584771000
-59	2020-03-21 09:50:00	12600	1584771600
-60	2020-03-21 10:00:00	12600	1584772200
-61	2020-03-21 10:10:00	12600	1584772800
-62	2020-03-21 10:20:00	12600	1584773400
-63	2020-03-21 10:30:00	12600	1584774000
-64	2020-03-21 10:40:00	12600	1584774600
-65	2020-03-21 10:50:00	12600	1584775200
-66	2020-03-21 11:00:00	12600	1584775800
-67	2020-03-21 11:10:00	12600	1584776400
-68	2020-03-21 11:20:00	12600	1584777000
-69	2020-03-21 11:30:00	12600	1584777600
-70	2020-03-21 11:40:00	12600	1584778200
-71	2020-03-21 11:50:00	12600	1584778800
-72	2020-03-21 12:00:00	12600	1584779400
-73	2020-03-21 12:10:00	12600	1584780000
-74	2020-03-21 12:20:00	12600	1584780600
-75	2020-03-21 12:30:00	12600	1584781200
-76	2020-03-21 12:40:00	12600	1584781800
-77	2020-03-21 12:50:00	12600	1584782400
-78	2020-03-21 13:00:00	12600	1584783000
-79	2020-03-21 13:10:00	12600	1584783600
-80	2020-03-21 13:20:00	12600	1584784200
-81	2020-03-21 13:30:00	12600	1584784800
-82	2020-03-21 13:40:00	12600	1584785400
-83	2020-03-21 13:50:00	12600	1584786000
-84	2020-03-21 14:00:00	12600	1584786600
-85	2020-03-21 14:10:00	12600	1584787200
-86	2020-03-21 14:20:00	12600	1584787800
-87	2020-03-21 14:30:00	12600	1584788400
-88	2020-03-21 14:40:00	12600	1584789000
-89	2020-03-21 14:50:00	12600	1584789600
-90	2020-03-21 15:00:00	12600	1584790200
-91	2020-03-21 15:10:00	12600	1584790800
-92	2020-03-21 15:20:00	12600	1584791400
-93	2020-03-21 15:30:00	12600	1584792000
-94	2020-03-21 15:40:00	12600	1584792600
-95	2020-03-21 15:50:00	12600	1584793200
-96	2020-03-21 16:00:00	12600	1584793800
-97	2020-03-21 16:10:00	12600	1584794400
-98	2020-03-21 16:20:00	12600	1584795000
-99	2020-03-21 16:30:00	12600	1584795600
-100	2020-03-21 16:40:00	12600	1584796200
-101	2020-03-21 16:50:00	12600	1584796800
-102	2020-03-21 17:00:00	12600	1584797400
-103	2020-03-21 17:10:00	12600	1584798000
-104	2020-03-21 17:20:00	12600	1584798600
-105	2020-03-21 17:30:00	12600	1584799200
-106	2020-03-21 17:40:00	12600	1584799800
-107	2020-03-21 17:50:00	12600	1584800400
-108	2020-03-21 18:00:00	12600	1584801000
-109	2020-03-21 18:10:00	12600	1584801600
-110	2020-03-21 18:20:00	12600	1584802200
-111	2020-03-21 18:30:00	12600	1584802800
-112	2020-03-21 18:40:00	12600	1584803400
-113	2020-03-21 18:50:00	12600	1584804000
-114	2020-03-21 19:00:00	12600	1584804600
-115	2020-03-21 19:10:00	12600	1584805200
-116	2020-03-21 19:20:00	12600	1584805800
-117	2020-03-21 19:30:00	12600	1584806400
-118	2020-03-21 19:40:00	12600	1584807000
-119	2020-03-21 19:50:00	12600	1584807600
-120	2020-03-21 20:00:00	12600	1584808200
-121	2020-03-21 20:10:00	12600	1584808800
-122	2020-03-21 20:20:00	12600	1584809400
-123	2020-03-21 20:30:00	12600	1584810000
-124	2020-03-21 20:40:00	12600	1584810600
-125	2020-03-21 20:50:00	12600	1584811200
-126	2020-03-21 21:00:00	12600	1584811800
-127	2020-03-21 21:10:00	12600	1584812400
-128	2020-03-21 21:20:00	12600	1584813000
-129	2020-03-21 21:30:00	12600	1584813600
-130	2020-03-21 21:40:00	12600	1584814200
-131	2020-03-21 21:50:00	12600	1584814800
-132	2020-03-21 22:00:00	12600	1584815400
-133	2020-03-21 22:10:00	12600	1584816000
-134	2020-03-21 22:20:00	12600	1584816600
-135	2020-03-21 22:30:00	12600	1584817200
-136	2020-03-21 22:40:00	12600	1584817800
-137	2020-03-21 22:50:00	12600	1584818400
-138	2020-03-22 00:00:00	16200	1584819000
-139	2020-03-22 00:10:00	16200	1584819600
-140	2020-03-22 00:20:00	16200	1584820200
-141	2020-03-22 00:30:00	16200	1584820800
-142	2020-03-22 00:40:00	16200	1584821400
-143	2020-03-22 00:50:00	16200	1584822000
-144	2020-03-22 01:00:00	16200	1584822600
-145	2020-03-22 01:10:00	16200	1584823200
-146	2020-03-22 01:20:00	16200	1584823800
-147	2020-03-22 01:30:00	16200	1584824400
-148	2020-03-22 01:40:00	16200	1584825000
-149	2020-03-22 01:50:00	16200	1584825600
-150	2020-03-22 02:00:00	16200	1584826200
-151	2020-03-22 02:10:00	16200	1584826800
-152	2020-03-22 02:20:00	16200	1584827400
-153	2020-03-22 02:30:00	16200	1584828000
-154	2020-03-22 02:40:00	16200	1584828600
-155	2020-03-22 02:50:00	16200	1584829200
-156	2020-03-22 03:00:00	16200	1584829800
-157	2020-03-22 03:10:00	16200	1584830400
-158	2020-03-22 03:20:00	16200	1584831000
-159	2020-03-22 03:30:00	16200	1584831600
-160	2020-03-22 03:40:00	16200	1584832200
-161	2020-03-22 03:50:00	16200	1584832800
-162	2020-03-22 04:00:00	16200	1584833400
-163	2020-03-22 04:10:00	16200	1584834000
-164	2020-03-22 04:20:00	16200	1584834600
-165	2020-03-22 04:30:00	16200	1584835200
-166	2020-03-22 04:40:00	16200	1584835800
-167	2020-03-22 04:50:00	16200	1584836400
-168	2020-03-22 05:00:00	16200	1584837000
-169	2020-03-22 05:10:00	16200	1584837600
-170	2020-03-22 05:20:00	16200	1584838200
-171	2020-03-22 05:30:00	16200	1584838800
-172	2020-03-22 05:40:00	16200	1584839400
-173	2020-03-22 05:50:00	16200	1584840000
-174	2020-03-22 06:00:00	16200	1584840600
-175	2020-03-22 06:10:00	16200	1584841200
-176	2020-03-22 06:20:00	16200	1584841800
-177	2020-03-22 06:30:00	16200	1584842400
-178	2020-03-22 06:40:00	16200	1584843000
-179	2020-03-22 06:50:00	16200	1584843600
-180	2020-03-22 07:00:00	16200	1584844200
-181	2020-03-22 07:10:00	16200	1584844800
-182	2020-03-22 07:20:00	16200	1584845400
-183	2020-03-22 07:30:00	16200	1584846000
-184	2020-03-22 07:40:00	16200	1584846600
-185	2020-03-22 07:50:00	16200	1584847200
-186	2020-03-22 08:00:00	16200	1584847800
-187	2020-03-22 08:10:00	16200	1584848400
-188	2020-03-22 08:20:00	16200	1584849000
-189	2020-03-22 08:30:00	16200	1584849600
-190	2020-03-22 08:40:00	16200	1584850200
-191	2020-03-22 08:50:00	16200	1584850800
-192	2020-03-22 09:00:00	16200	1584851400
-193	2020-03-22 09:10:00	16200	1584852000
-194	2020-03-22 09:20:00	16200	1584852600
-195	2020-03-22 09:30:00	16200	1584853200
-196	2020-03-22 09:40:00	16200	1584853800
-197	2020-03-22 09:50:00	16200	1584854400
-198	2020-03-22 10:00:00	16200	1584855000
-199	2020-03-22 10:10:00	16200	1584855600
-0	2020-09-20 00:00:00	16200	1600543800
-1	2020-09-20 00:10:00	16200	1600544400
-2	2020-09-20 00:20:00	16200	1600545000
-3	2020-09-20 00:30:00	16200	1600545600
-4	2020-09-20 00:40:00	16200	1600546200
-5	2020-09-20 00:50:00	16200	1600546800
-6	2020-09-20 01:00:00	16200	1600547400
-7	2020-09-20 01:10:00	16200	1600548000
-8	2020-09-20 01:20:00	16200	1600548600
-9	2020-09-20 01:30:00	16200	1600549200
-10	2020-09-20 01:40:00	16200	1600549800
-11	2020-09-20 01:50:00	16200	1600550400
-12	2020-09-20 02:00:00	16200	1600551000
-13	2020-09-20 02:10:00	16200	1600551600
-14	2020-09-20 02:20:00	16200	1600552200
-15	2020-09-20 02:30:00	16200	1600552800
-16	2020-09-20 02:40:00	16200	1600553400
-17	2020-09-20 02:50:00	16200	1600554000
-18	2020-09-20 03:00:00	16200	1600554600
-19	2020-09-20 03:10:00	16200	1600555200
-20	2020-09-20 03:20:00	16200	1600555800
-21	2020-09-20 03:30:00	16200	1600556400
-22	2020-09-20 03:40:00	16200	1600557000
-23	2020-09-20 03:50:00	16200	1600557600
-24	2020-09-20 04:00:00	16200	1600558200
-25	2020-09-20 04:10:00	16200	1600558800
-26	2020-09-20 04:20:00	16200	1600559400
-27	2020-09-20 04:30:00	16200	1600560000
-28	2020-09-20 04:40:00	16200	1600560600
-29	2020-09-20 04:50:00	16200	1600561200
-30	2020-09-20 05:00:00	16200	1600561800
-31	2020-09-20 05:10:00	16200	1600562400
-32	2020-09-20 05:20:00	16200	1600563000
-33	2020-09-20 05:30:00	16200	1600563600
-34	2020-09-20 05:40:00	16200	1600564200
-35	2020-09-20 05:50:00	16200	1600564800
-36	2020-09-20 06:00:00	16200	1600565400
-37	2020-09-20 06:10:00	16200	1600566000
-38	2020-09-20 06:20:00	16200	1600566600
-39	2020-09-20 06:30:00	16200	1600567200
-40	2020-09-20 06:40:00	16200	1600567800
-41	2020-09-20 06:50:00	16200	1600568400
-42	2020-09-20 07:00:00	16200	1600569000
-43	2020-09-20 07:10:00	16200	1600569600
-44	2020-09-20 07:20:00	16200	1600570200
-45	2020-09-20 07:30:00	16200	1600570800
-46	2020-09-20 07:40:00	16200	1600571400
-47	2020-09-20 07:50:00	16200	1600572000
-48	2020-09-20 08:00:00	16200	1600572600
-49	2020-09-20 08:10:00	16200	1600573200
-50	2020-09-20 08:20:00	16200	1600573800
-51	2020-09-20 08:30:00	16200	1600574400
-52	2020-09-20 08:40:00	16200	1600575000
-53	2020-09-20 08:50:00	16200	1600575600
-54	2020-09-20 09:00:00	16200	1600576200
-55	2020-09-20 09:10:00	16200	1600576800
-56	2020-09-20 09:20:00	16200	1600577400
-57	2020-09-20 09:30:00	16200	1600578000
-58	2020-09-20 09:40:00	16200	1600578600
-59	2020-09-20 09:50:00	16200	1600579200
-60	2020-09-20 10:00:00	16200	1600579800
-61	2020-09-20 10:10:00	16200	1600580400
-62	2020-09-20 10:20:00	16200	1600581000
-63	2020-09-20 10:30:00	16200	1600581600
-64	2020-09-20 10:40:00	16200	1600582200
-65	2020-09-20 10:50:00	16200	1600582800
-66	2020-09-20 11:00:00	16200	1600583400
-67	2020-09-20 11:10:00	16200	1600584000
-68	2020-09-20 11:20:00	16200	1600584600
-69	2020-09-20 11:30:00	16200	1600585200
-70	2020-09-20 11:40:00	16200	1600585800
-71	2020-09-20 11:50:00	16200	1600586400
-72	2020-09-20 12:00:00	16200	1600587000
-73	2020-09-20 12:10:00	16200	1600587600
-74	2020-09-20 12:20:00	16200	1600588200
-75	2020-09-20 12:30:00	16200	1600588800
-76	2020-09-20 12:40:00	16200	1600589400
-77	2020-09-20 12:50:00	16200	1600590000
-78	2020-09-20 13:00:00	16200	1600590600
-79	2020-09-20 13:10:00	16200	1600591200
-80	2020-09-20 13:20:00	16200	1600591800
-81	2020-09-20 13:30:00	16200	1600592400
-82	2020-09-20 13:40:00	16200	1600593000
-83	2020-09-20 13:50:00	16200	1600593600
-84	2020-09-20 14:00:00	16200	1600594200
-85	2020-09-20 14:10:00	16200	1600594800
-86	2020-09-20 14:20:00	16200	1600595400
-87	2020-09-20 14:30:00	16200	1600596000
-88	2020-09-20 14:40:00	16200	1600596600
-89	2020-09-20 14:50:00	16200	1600597200
-90	2020-09-20 15:00:00	16200	1600597800
-91	2020-09-20 15:10:00	16200	1600598400
-92	2020-09-20 15:20:00	16200	1600599000
-93	2020-09-20 15:30:00	16200	1600599600
-94	2020-09-20 15:40:00	16200	1600600200
-95	2020-09-20 15:50:00	16200	1600600800
-96	2020-09-20 16:00:00	16200	1600601400
-97	2020-09-20 16:10:00	16200	1600602000
-98	2020-09-20 16:20:00	16200	1600602600
-99	2020-09-20 16:30:00	16200	1600603200
-100	2020-09-20 16:40:00	16200	1600603800
-101	2020-09-20 16:50:00	16200	1600604400
-102	2020-09-20 17:00:00	16200	1600605000
-103	2020-09-20 17:10:00	16200	1600605600
-104	2020-09-20 17:20:00	16200	1600606200
-105	2020-09-20 17:30:00	16200	1600606800
-106	2020-09-20 17:40:00	16200	1600607400
-107	2020-09-20 17:50:00	16200	1600608000
-108	2020-09-20 18:00:00	16200	1600608600
-109	2020-09-20 18:10:00	16200	1600609200
-110	2020-09-20 18:20:00	16200	1600609800
-111	2020-09-20 18:30:00	16200	1600610400
-112	2020-09-20 18:40:00	16200	1600611000
-113	2020-09-20 18:50:00	16200	1600611600
-114	2020-09-20 19:00:00	16200	1600612200
-115	2020-09-20 19:10:00	16200	1600612800
-116	2020-09-20 19:20:00	16200	1600613400
-117	2020-09-20 19:30:00	16200	1600614000
-118	2020-09-20 19:40:00	16200	1600614600
-119	2020-09-20 19:50:00	16200	1600615200
-120	2020-09-20 20:00:00	16200	1600615800
-121	2020-09-20 20:10:00	16200	1600616400
-122	2020-09-20 20:20:00	16200	1600617000
-123	2020-09-20 20:30:00	16200	1600617600
-124	2020-09-20 20:40:00	16200	1600618200
-125	2020-09-20 20:50:00	16200	1600618800
-126	2020-09-20 21:00:00	16200	1600619400
-127	2020-09-20 21:10:00	16200	1600620000
-128	2020-09-20 21:20:00	16200	1600620600
-129	2020-09-20 21:30:00	16200	1600621200
-130	2020-09-20 21:40:00	16200	1600621800
-131	2020-09-20 21:50:00	16200	1600622400
-132	2020-09-20 22:00:00	16200	1600623000
-133	2020-09-20 22:10:00	16200	1600623600
-134	2020-09-20 22:20:00	16200	1600624200
-135	2020-09-20 22:30:00	16200	1600624800
-136	2020-09-20 22:40:00	16200	1600625400
-137	2020-09-20 22:50:00	16200	1600626000
-138	2020-09-20 23:00:00	16200	1600626600
-139	2020-09-20 23:10:00	16200	1600627200
-140	2020-09-20 23:20:00	16200	1600627800
-141	2020-09-20 23:30:00	16200	1600628400
-142	2020-09-20 23:40:00	16200	1600629000
-143	2020-09-20 23:50:00	16200	1600629600
-144	2020-09-20 23:00:00	12600	1600630200
-145	2020-09-20 23:10:00	12600	1600630800
-146	2020-09-20 23:20:00	12600	1600631400
-147	2020-09-20 23:30:00	12600	1600632000
-148	2020-09-20 23:40:00	12600	1600632600
-149	2020-09-20 23:50:00	12600	1600633200
-150	2020-09-21 00:00:00	12600	1600633800
-151	2020-09-21 00:10:00	12600	1600634400
-152	2020-09-21 00:20:00	12600	1600635000
-153	2020-09-21 00:30:00	12600	1600635600
-154	2020-09-21 00:40:00	12600	1600636200
-155	2020-09-21 00:50:00	12600	1600636800
-156	2020-09-21 01:00:00	12600	1600637400
-157	2020-09-21 01:10:00	12600	1600638000
-158	2020-09-21 01:20:00	12600	1600638600
-159	2020-09-21 01:30:00	12600	1600639200
-160	2020-09-21 01:40:00	12600	1600639800
-161	2020-09-21 01:50:00	12600	1600640400
-162	2020-09-21 02:00:00	12600	1600641000
-163	2020-09-21 02:10:00	12600	1600641600
-164	2020-09-21 02:20:00	12600	1600642200
-165	2020-09-21 02:30:00	12600	1600642800
-166	2020-09-21 02:40:00	12600	1600643400
-167	2020-09-21 02:50:00	12600	1600644000
-168	2020-09-21 03:00:00	12600	1600644600
-169	2020-09-21 03:10:00	12600	1600645200
-170	2020-09-21 03:20:00	12600	1600645800
-171	2020-09-21 03:30:00	12600	1600646400
-172	2020-09-21 03:40:00	12600	1600647000
-173	2020-09-21 03:50:00	12600	1600647600
-174	2020-09-21 04:00:00	12600	1600648200
-175	2020-09-21 04:10:00	12600	1600648800
-176	2020-09-21 04:20:00	12600	1600649400
-177	2020-09-21 04:30:00	12600	1600650000
-178	2020-09-21 04:40:00	12600	1600650600
-179	2020-09-21 04:50:00	12600	1600651200
-180	2020-09-21 05:00:00	12600	1600651800
-181	2020-09-21 05:10:00	12600	1600652400
-182	2020-09-21 05:20:00	12600	1600653000
-183	2020-09-21 05:30:00	12600	1600653600
-184	2020-09-21 05:40:00	12600	1600654200
-185	2020-09-21 05:50:00	12600	1600654800
-186	2020-09-21 06:00:00	12600	1600655400
-187	2020-09-21 06:10:00	12600	1600656000
-188	2020-09-21 06:20:00	12600	1600656600
-189	2020-09-21 06:30:00	12600	1600657200
-190	2020-09-21 06:40:00	12600	1600657800
-191	2020-09-21 06:50:00	12600	1600658400
-192	2020-09-21 07:00:00	12600	1600659000
-193	2020-09-21 07:10:00	12600	1600659600
-194	2020-09-21 07:20:00	12600	1600660200
-195	2020-09-21 07:30:00	12600	1600660800
-196	2020-09-21 07:40:00	12600	1600661400
-197	2020-09-21 07:50:00	12600	1600662000
-198	2020-09-21 08:00:00	12600	1600662600
-199	2020-09-21 08:10:00	12600	1600663200
+DST boundary test for Europe/Moscow:
+0	1981-04-01 22:40:00	10800	355002000
+1	1981-04-01 22:50:00	10800	355002600
+2	1981-04-02 00:00:00	14400	355003200
+3	1981-04-02 00:10:00	14400	355003800
+0	1981-09-30 23:00:00	14400	370724400
+1	1981-09-30 23:10:00	14400	370725000
+2	1981-09-30 23:20:00	14400	370725600
+3	1981-09-30 23:30:00	14400	370726200
+4	1981-09-30 23:40:00	14400	370726800
+5	1981-09-30 23:50:00	14400	370727400
+6	1981-09-30 23:00:00	10800	370728000
+7	1981-09-30 23:10:00	10800	370728600
+8	1981-09-30 23:20:00	10800	370729200
+9	1981-09-30 23:30:00	10800	370729800
+10	1981-09-30 23:40:00	10800	370730400
+11	1981-09-30 23:50:00	10800	370731000
+12	1981-10-01 00:00:00	10800	370731600
+13	1981-10-01 00:10:00	10800	370732200
+14	1981-10-01 00:20:00	10800	370732800
+15	1981-10-01 00:30:00	10800	370733400
+16	1981-10-01 00:40:00	10800	370734000
+17	1981-10-01 00:50:00	10800	370734600
+DST boundary test for Asia/Tehran:
+0	2020-03-21 22:40:00	12600	1584817800
+1	2020-03-21 22:50:00	12600	1584818400
+2	2020-03-22 00:00:00	16200	1584819000
+3	2020-03-22 00:10:00	16200	1584819600
+0	2020-09-20 23:00:00	16200	1600626600
+1	2020-09-20 23:10:00	16200	1600627200
+2	2020-09-20 23:20:00	16200	1600627800
+3	2020-09-20 23:30:00	16200	1600628400
+4	2020-09-20 23:40:00	16200	1600629000
+5	2020-09-20 23:50:00	16200	1600629600
+6	2020-09-20 23:00:00	12600	1600630200
+7	2020-09-20 23:10:00	12600	1600630800
+8	2020-09-20 23:20:00	12600	1600631400
+9	2020-09-20 23:30:00	12600	1600632000
+10	2020-09-20 23:40:00	12600	1600632600
+11	2020-09-20 23:50:00	12600	1600633200
+12	2020-09-21 00:00:00	12600	1600633800
+13	2020-09-21 00:10:00	12600	1600634400
+14	2020-09-21 00:20:00	12600	1600635000
+15	2020-09-21 00:30:00	12600	1600635600
+16	2020-09-21 00:40:00	12600	1600636200
+17	2020-09-21 00:50:00	12600	1600636800
+DST boundary test for Australia/Lord_Howe. This is a special timezone with DST offset is 30mins with the timezone epoc also lays at half hour
 37800
 39600
-0	2020-10-03 00:00:00	37800	1601645400
-1	2020-10-03 00:10:00	37800	1601646000
-2	2020-10-03 00:20:00	37800	1601646600
-3	2020-10-03 00:30:00	37800	1601647200
-4	2020-10-03 00:40:00	37800	1601647800
-5	2020-10-03 00:50:00	37800	1601648400
-6	2020-10-03 01:00:00	37800	1601649000
-7	2020-10-03 01:10:00	37800	1601649600
-8	2020-10-03 01:20:00	37800	1601650200
-9	2020-10-03 01:30:00	37800	1601650800
-10	2020-10-03 01:40:00	37800	1601651400
-11	2020-10-03 01:50:00	37800	1601652000
-12	2020-10-03 02:00:00	37800	1601652600
-13	2020-10-03 02:10:00	37800	1601653200
-14	2020-10-03 02:20:00	37800	1601653800
-15	2020-10-03 02:30:00	37800	1601654400
-16	2020-10-03 02:40:00	37800	1601655000
-17	2020-10-03 02:50:00	37800	1601655600
-18	2020-10-03 03:00:00	37800	1601656200
-19	2020-10-03 03:10:00	37800	1601656800
-20	2020-10-03 03:20:00	37800	1601657400
-21	2020-10-03 03:30:00	37800	1601658000
-22	2020-10-03 03:40:00	37800	1601658600
-23	2020-10-03 03:50:00	37800	1601659200
-24	2020-10-03 04:00:00	37800	1601659800
-25	2020-10-03 04:10:00	37800	1601660400
-26	2020-10-03 04:20:00	37800	1601661000
-27	2020-10-03 04:30:00	37800	1601661600
-28	2020-10-03 04:40:00	37800	1601662200
-29	2020-10-03 04:50:00	37800	1601662800
-30	2020-10-03 05:00:00	37800	1601663400
-31	2020-10-03 05:10:00	37800	1601664000
-32	2020-10-03 05:20:00	37800	1601664600
-33	2020-10-03 05:30:00	37800	1601665200
-34	2020-10-03 05:40:00	37800	1601665800
-35	2020-10-03 05:50:00	37800	1601666400
-36	2020-10-03 06:00:00	37800	1601667000
-37	2020-10-03 06:10:00	37800	1601667600
-38	2020-10-03 06:20:00	37800	1601668200
-39	2020-10-03 06:30:00	37800	1601668800
-40	2020-10-03 06:40:00	37800	1601669400
-41	2020-10-03 06:50:00	37800	1601670000
-42	2020-10-03 07:00:00	37800	1601670600
-43	2020-10-03 07:10:00	37800	1601671200
-44	2020-10-03 07:20:00	37800	1601671800
-45	2020-10-03 07:30:00	37800	1601672400
-46	2020-10-03 07:40:00	37800	1601673000
-47	2020-10-03 07:50:00	37800	1601673600
-48	2020-10-03 08:00:00	37800	1601674200
-49	2020-10-03 08:10:00	37800	1601674800
-50	2020-10-03 08:20:00	37800	1601675400
-51	2020-10-03 08:30:00	37800	1601676000
-52	2020-10-03 08:40:00	37800	1601676600
-53	2020-10-03 08:50:00	37800	1601677200
-54	2020-10-03 09:00:00	37800	1601677800
-55	2020-10-03 09:10:00	37800	1601678400
-56	2020-10-03 09:20:00	37800	1601679000
-57	2020-10-03 09:30:00	37800	1601679600
-58	2020-10-03 09:40:00	37800	1601680200
-59	2020-10-03 09:50:00	37800	1601680800
-60	2020-10-03 10:00:00	37800	1601681400
-61	2020-10-03 10:10:00	37800	1601682000
-62	2020-10-03 10:20:00	37800	1601682600
-63	2020-10-03 10:30:00	37800	1601683200
-64	2020-10-03 10:40:00	37800	1601683800
-65	2020-10-03 10:50:00	37800	1601684400
-66	2020-10-03 11:00:00	37800	1601685000
-67	2020-10-03 11:10:00	37800	1601685600
-68	2020-10-03 11:20:00	37800	1601686200
-69	2020-10-03 11:30:00	37800	1601686800
-70	2020-10-03 11:40:00	37800	1601687400
-71	2020-10-03 11:50:00	37800	1601688000
-72	2020-10-03 12:00:00	37800	1601688600
-73	2020-10-03 12:10:00	37800	1601689200
-74	2020-10-03 12:20:00	37800	1601689800
-75	2020-10-03 12:30:00	37800	1601690400
-76	2020-10-03 12:40:00	37800	1601691000
-77	2020-10-03 12:50:00	37800	1601691600
-78	2020-10-03 13:00:00	37800	1601692200
-79	2020-10-03 13:10:00	37800	1601692800
-80	2020-10-03 13:20:00	37800	1601693400
-81	2020-10-03 13:30:00	37800	1601694000
-82	2020-10-03 13:40:00	37800	1601694600
-83	2020-10-03 13:50:00	37800	1601695200
-84	2020-10-03 14:00:00	37800	1601695800
-85	2020-10-03 14:10:00	37800	1601696400
-86	2020-10-03 14:20:00	37800	1601697000
-87	2020-10-03 14:30:00	37800	1601697600
-88	2020-10-03 14:40:00	37800	1601698200
-89	2020-10-03 14:50:00	37800	1601698800
-90	2020-10-03 15:00:00	37800	1601699400
-91	2020-10-03 15:10:00	37800	1601700000
-92	2020-10-03 15:20:00	37800	1601700600
-93	2020-10-03 15:30:00	37800	1601701200
-94	2020-10-03 15:40:00	37800	1601701800
-95	2020-10-03 15:50:00	37800	1601702400
-96	2020-10-03 16:00:00	37800	1601703000
-97	2020-10-03 16:10:00	37800	1601703600
-98	2020-10-03 16:20:00	37800	1601704200
-99	2020-10-03 16:30:00	37800	1601704800
-100	2020-10-03 16:40:00	37800	1601705400
-101	2020-10-03 16:50:00	37800	1601706000
-102	2020-10-03 17:00:00	37800	1601706600
-103	2020-10-03 17:10:00	37800	1601707200
-104	2020-10-03 17:20:00	37800	1601707800
-105	2020-10-03 17:30:00	37800	1601708400
-106	2020-10-03 17:40:00	37800	1601709000
-107	2020-10-03 17:50:00	37800	1601709600
-108	2020-10-03 18:00:00	37800	1601710200
-109	2020-10-03 18:10:00	37800	1601710800
-110	2020-10-03 18:20:00	37800	1601711400
-111	2020-10-03 18:30:00	37800	1601712000
-112	2020-10-03 18:40:00	37800	1601712600
-113	2020-10-03 18:50:00	37800	1601713200
-114	2020-10-03 19:00:00	37800	1601713800
-115	2020-10-03 19:10:00	37800	1601714400
-116	2020-10-03 19:20:00	37800	1601715000
-117	2020-10-03 19:30:00	37800	1601715600
-118	2020-10-03 19:40:00	37800	1601716200
-119	2020-10-03 19:50:00	37800	1601716800
-120	2020-10-03 20:00:00	37800	1601717400
-121	2020-10-03 20:10:00	37800	1601718000
-122	2020-10-03 20:20:00	37800	1601718600
-123	2020-10-03 20:30:00	37800	1601719200
-124	2020-10-03 20:40:00	37800	1601719800
-125	2020-10-03 20:50:00	37800	1601720400
-126	2020-10-03 21:00:00	37800	1601721000
-127	2020-10-03 21:10:00	37800	1601721600
-128	2020-10-03 21:20:00	37800	1601722200
-129	2020-10-03 21:30:00	37800	1601722800
-130	2020-10-03 21:40:00	37800	1601723400
-131	2020-10-03 21:50:00	37800	1601724000
-132	2020-10-03 22:00:00	37800	1601724600
-133	2020-10-03 22:10:00	37800	1601725200
-134	2020-10-03 22:20:00	37800	1601725800
-135	2020-10-03 22:30:00	37800	1601726400
-136	2020-10-03 22:40:00	37800	1601727000
-137	2020-10-03 22:50:00	37800	1601727600
-138	2020-10-03 23:00:00	37800	1601728200
-139	2020-10-03 23:10:00	37800	1601728800
-140	2020-10-03 23:20:00	37800	1601729400
-141	2020-10-03 23:30:00	37800	1601730000
-142	2020-10-03 23:40:00	37800	1601730600
-143	2020-10-03 23:50:00	37800	1601731200
-144	2020-10-04 00:00:00	37800	1601731800
-145	2020-10-04 00:10:00	37800	1601732400
-146	2020-10-04 00:20:00	37800	1601733000
-147	2020-10-04 00:30:00	37800	1601733600
-148	2020-10-04 00:40:00	37800	1601734200
-149	2020-10-04 00:50:00	37800	1601734800
-150	2020-10-04 01:00:00	37800	1601735400
-151	2020-10-04 01:10:00	37800	1601736000
-152	2020-10-04 01:20:00	37800	1601736600
-153	2020-10-04 01:30:00	37800	1601737200
-154	2020-10-04 01:40:00	37800	1601737800
-155	2020-10-04 01:50:00	37800	1601738400
-156	2020-10-04 02:00:00	39600	1601739000
-157	2020-10-04 02:10:00	39600	1601739600
-158	2020-10-04 02:20:00	39600	1601740200
-159	2020-10-04 03:30:00	39600	1601740800
-160	2020-10-04 03:40:00	39600	1601741400
-161	2020-10-04 03:50:00	39600	1601742000
-162	2020-10-04 03:00:00	39600	1601742600
-163	2020-10-04 03:10:00	39600	1601743200
-164	2020-10-04 03:20:00	39600	1601743800
-165	2020-10-04 04:30:00	39600	1601744400
-166	2020-10-04 04:40:00	39600	1601745000
-167	2020-10-04 04:50:00	39600	1601745600
-168	2020-10-04 04:00:00	39600	1601746200
-169	2020-10-04 04:10:00	39600	1601746800
-170	2020-10-04 04:20:00	39600	1601747400
-171	2020-10-04 05:30:00	39600	1601748000
-172	2020-10-04 05:40:00	39600	1601748600
-173	2020-10-04 05:50:00	39600	1601749200
-174	2020-10-04 05:00:00	39600	1601749800
-175	2020-10-04 05:10:00	39600	1601750400
-176	2020-10-04 05:20:00	39600	1601751000
-177	2020-10-04 06:30:00	39600	1601751600
-178	2020-10-04 06:40:00	39600	1601752200
-179	2020-10-04 06:50:00	39600	1601752800
-180	2020-10-04 06:00:00	39600	1601753400
-181	2020-10-04 06:10:00	39600	1601754000
-182	2020-10-04 06:20:00	39600	1601754600
-183	2020-10-04 07:30:00	39600	1601755200
-184	2020-10-04 07:40:00	39600	1601755800
-185	2020-10-04 07:50:00	39600	1601756400
-186	2020-10-04 07:00:00	39600	1601757000
-187	2020-10-04 07:10:00	39600	1601757600
-188	2020-10-04 07:20:00	39600	1601758200
-189	2020-10-04 08:30:00	39600	1601758800
-190	2020-10-04 08:40:00	39600	1601759400
-191	2020-10-04 08:50:00	39600	1601760000
-192	2020-10-04 08:00:00	39600	1601760600
-193	2020-10-04 08:10:00	39600	1601761200
-194	2020-10-04 08:20:00	39600	1601761800
-195	2020-10-04 09:30:00	39600	1601762400
-196	2020-10-04 09:40:00	39600	1601763000
-197	2020-10-04 09:50:00	39600	1601763600
-198	2020-10-04 09:00:00	39600	1601764200
-199	2020-10-04 09:10:00	39600	1601764800
-0	2019-04-06 00:00:00	39600	1554469200
-1	2019-04-06 00:10:00	39600	1554469800
-2	2019-04-06 00:20:00	39600	1554470400
-3	2019-04-06 00:30:00	39600	1554471000
-4	2019-04-06 00:40:00	39600	1554471600
-5	2019-04-06 00:50:00	39600	1554472200
-6	2019-04-06 01:00:00	39600	1554472800
-7	2019-04-06 01:10:00	39600	1554473400
-8	2019-04-06 01:20:00	39600	1554474000
-9	2019-04-06 01:30:00	39600	1554474600
-10	2019-04-06 01:40:00	39600	1554475200
-11	2019-04-06 01:50:00	39600	1554475800
-12	2019-04-06 02:00:00	39600	1554476400
-13	2019-04-06 02:10:00	39600	1554477000
-14	2019-04-06 02:20:00	39600	1554477600
-15	2019-04-06 02:30:00	39600	1554478200
-16	2019-04-06 02:40:00	39600	1554478800
-17	2019-04-06 02:50:00	39600	1554479400
-18	2019-04-06 03:00:00	39600	1554480000
-19	2019-04-06 03:10:00	39600	1554480600
-20	2019-04-06 03:20:00	39600	1554481200
-21	2019-04-06 03:30:00	39600	1554481800
-22	2019-04-06 03:40:00	39600	1554482400
-23	2019-04-06 03:50:00	39600	1554483000
-24	2019-04-06 04:00:00	39600	1554483600
-25	2019-04-06 04:10:00	39600	1554484200
-26	2019-04-06 04:20:00	39600	1554484800
-27	2019-04-06 04:30:00	39600	1554485400
-28	2019-04-06 04:40:00	39600	1554486000
-29	2019-04-06 04:50:00	39600	1554486600
-30	2019-04-06 05:00:00	39600	1554487200
-31	2019-04-06 05:10:00	39600	1554487800
-32	2019-04-06 05:20:00	39600	1554488400
-33	2019-04-06 05:30:00	39600	1554489000
-34	2019-04-06 05:40:00	39600	1554489600
-35	2019-04-06 05:50:00	39600	1554490200
-36	2019-04-06 06:00:00	39600	1554490800
-37	2019-04-06 06:10:00	39600	1554491400
-38	2019-04-06 06:20:00	39600	1554492000
-39	2019-04-06 06:30:00	39600	1554492600
-40	2019-04-06 06:40:00	39600	1554493200
-41	2019-04-06 06:50:00	39600	1554493800
-42	2019-04-06 07:00:00	39600	1554494400
-43	2019-04-06 07:10:00	39600	1554495000
-44	2019-04-06 07:20:00	39600	1554495600
-45	2019-04-06 07:30:00	39600	1554496200
-46	2019-04-06 07:40:00	39600	1554496800
-47	2019-04-06 07:50:00	39600	1554497400
-48	2019-04-06 08:00:00	39600	1554498000
-49	2019-04-06 08:10:00	39600	1554498600
-50	2019-04-06 08:20:00	39600	1554499200
-51	2019-04-06 08:30:00	39600	1554499800
-52	2019-04-06 08:40:00	39600	1554500400
-53	2019-04-06 08:50:00	39600	1554501000
-54	2019-04-06 09:00:00	39600	1554501600
-55	2019-04-06 09:10:00	39600	1554502200
-56	2019-04-06 09:20:00	39600	1554502800
-57	2019-04-06 09:30:00	39600	1554503400
-58	2019-04-06 09:40:00	39600	1554504000
-59	2019-04-06 09:50:00	39600	1554504600
-60	2019-04-06 10:00:00	39600	1554505200
-61	2019-04-06 10:10:00	39600	1554505800
-62	2019-04-06 10:20:00	39600	1554506400
-63	2019-04-06 10:30:00	39600	1554507000
-64	2019-04-06 10:40:00	39600	1554507600
-65	2019-04-06 10:50:00	39600	1554508200
-66	2019-04-06 11:00:00	39600	1554508800
-67	2019-04-06 11:10:00	39600	1554509400
-68	2019-04-06 11:20:00	39600	1554510000
-69	2019-04-06 11:30:00	39600	1554510600
-70	2019-04-06 11:40:00	39600	1554511200
-71	2019-04-06 11:50:00	39600	1554511800
-72	2019-04-06 12:00:00	39600	1554512400
-73	2019-04-06 12:10:00	39600	1554513000
-74	2019-04-06 12:20:00	39600	1554513600
-75	2019-04-06 12:30:00	39600	1554514200
-76	2019-04-06 12:40:00	39600	1554514800
-77	2019-04-06 12:50:00	39600	1554515400
-78	2019-04-06 13:00:00	39600	1554516000
-79	2019-04-06 13:10:00	39600	1554516600
-80	2019-04-06 13:20:00	39600	1554517200
-81	2019-04-06 13:30:00	39600	1554517800
-82	2019-04-06 13:40:00	39600	1554518400
-83	2019-04-06 13:50:00	39600	1554519000
-84	2019-04-06 14:00:00	39600	1554519600
-85	2019-04-06 14:10:00	39600	1554520200
-86	2019-04-06 14:20:00	39600	1554520800
-87	2019-04-06 14:30:00	39600	1554521400
-88	2019-04-06 14:40:00	39600	1554522000
-89	2019-04-06 14:50:00	39600	1554522600
-90	2019-04-06 15:00:00	39600	1554523200
-91	2019-04-06 15:10:00	39600	1554523800
-92	2019-04-06 15:20:00	39600	1554524400
-93	2019-04-06 15:30:00	39600	1554525000
-94	2019-04-06 15:40:00	39600	1554525600
-95	2019-04-06 15:50:00	39600	1554526200
-96	2019-04-06 16:00:00	39600	1554526800
-97	2019-04-06 16:10:00	39600	1554527400
-98	2019-04-06 16:20:00	39600	1554528000
-99	2019-04-06 16:30:00	39600	1554528600
-100	2019-04-06 16:40:00	39600	1554529200
-101	2019-04-06 16:50:00	39600	1554529800
-102	2019-04-06 17:00:00	39600	1554530400
-103	2019-04-06 17:10:00	39600	1554531000
-104	2019-04-06 17:20:00	39600	1554531600
-105	2019-04-06 17:30:00	39600	1554532200
-106	2019-04-06 17:40:00	39600	1554532800
-107	2019-04-06 17:50:00	39600	1554533400
-108	2019-04-06 18:00:00	39600	1554534000
-109	2019-04-06 18:10:00	39600	1554534600
-110	2019-04-06 18:20:00	39600	1554535200
-111	2019-04-06 18:30:00	39600	1554535800
-112	2019-04-06 18:40:00	39600	1554536400
-113	2019-04-06 18:50:00	39600	1554537000
-114	2019-04-06 19:00:00	39600	1554537600
-115	2019-04-06 19:10:00	39600	1554538200
-116	2019-04-06 19:20:00	39600	1554538800
-117	2019-04-06 19:30:00	39600	1554539400
-118	2019-04-06 19:40:00	39600	1554540000
-119	2019-04-06 19:50:00	39600	1554540600
-120	2019-04-06 20:00:00	39600	1554541200
-121	2019-04-06 20:10:00	39600	1554541800
-122	2019-04-06 20:20:00	39600	1554542400
-123	2019-04-06 20:30:00	39600	1554543000
-124	2019-04-06 20:40:00	39600	1554543600
-125	2019-04-06 20:50:00	39600	1554544200
-126	2019-04-06 21:00:00	39600	1554544800
-127	2019-04-06 21:10:00	39600	1554545400
-128	2019-04-06 21:20:00	39600	1554546000
-129	2019-04-06 21:30:00	39600	1554546600
-130	2019-04-06 21:40:00	39600	1554547200
-131	2019-04-06 21:50:00	39600	1554547800
-132	2019-04-06 22:00:00	39600	1554548400
-133	2019-04-06 22:10:00	39600	1554549000
-134	2019-04-06 22:20:00	39600	1554549600
-135	2019-04-06 22:30:00	39600	1554550200
-136	2019-04-06 22:40:00	39600	1554550800
-137	2019-04-06 22:50:00	39600	1554551400
-138	2019-04-06 23:00:00	39600	1554552000
-139	2019-04-06 23:10:00	39600	1554552600
-140	2019-04-06 23:20:00	39600	1554553200
-141	2019-04-06 23:30:00	39600	1554553800
-142	2019-04-06 23:40:00	39600	1554554400
-143	2019-04-06 23:50:00	39600	1554555000
-144	2019-04-07 00:00:00	39600	1554555600
-145	2019-04-07 00:10:00	39600	1554556200
-146	2019-04-07 00:20:00	39600	1554556800
-147	2019-04-07 00:30:00	39600	1554557400
-148	2019-04-07 00:40:00	39600	1554558000
-149	2019-04-07 00:50:00	39600	1554558600
-150	2019-04-07 01:00:00	39600	1554559200
-151	2019-04-07 01:10:00	39600	1554559800
-152	2019-04-07 01:20:00	39600	1554560400
-153	2019-04-07 01:30:00	39600	1554561000
-154	2019-04-07 01:40:00	39600	1554561600
-155	2019-04-07 01:50:00	39600	1554562200
-156	2019-04-07 01:00:00	37800	1554562800
-157	2019-04-07 01:10:00	37800	1554563400
-158	2019-04-07 01:20:00	37800	1554564000
-159	2019-04-07 02:30:00	37800	1554564600
-160	2019-04-07 02:40:00	37800	1554565200
-161	2019-04-07 02:50:00	37800	1554565800
-162	2019-04-07 02:00:00	37800	1554566400
-163	2019-04-07 02:10:00	37800	1554567000
-164	2019-04-07 02:20:00	37800	1554567600
-165	2019-04-07 03:30:00	37800	1554568200
-166	2019-04-07 03:40:00	37800	1554568800
-167	2019-04-07 03:50:00	37800	1554569400
-168	2019-04-07 03:00:00	37800	1554570000
-169	2019-04-07 03:10:00	37800	1554570600
-170	2019-04-07 03:20:00	37800	1554571200
-171	2019-04-07 04:30:00	37800	1554571800
-172	2019-04-07 04:40:00	37800	1554572400
-173	2019-04-07 04:50:00	37800	1554573000
-174	2019-04-07 04:00:00	37800	1554573600
-175	2019-04-07 04:10:00	37800	1554574200
-176	2019-04-07 04:20:00	37800	1554574800
-177	2019-04-07 05:30:00	37800	1554575400
-178	2019-04-07 05:40:00	37800	1554576000
-179	2019-04-07 05:50:00	37800	1554576600
-180	2019-04-07 05:00:00	37800	1554577200
-181	2019-04-07 05:10:00	37800	1554577800
-182	2019-04-07 05:20:00	37800	1554578400
-183	2019-04-07 06:30:00	37800	1554579000
-184	2019-04-07 06:40:00	37800	1554579600
-185	2019-04-07 06:50:00	37800	1554580200
-186	2019-04-07 06:00:00	37800	1554580800
-187	2019-04-07 06:10:00	37800	1554581400
-188	2019-04-07 06:20:00	37800	1554582000
-189	2019-04-07 07:30:00	37800	1554582600
-190	2019-04-07 07:40:00	37800	1554583200
-191	2019-04-07 07:50:00	37800	1554583800
-192	2019-04-07 07:00:00	37800	1554584400
-193	2019-04-07 07:10:00	37800	1554585000
-194	2019-04-07 07:20:00	37800	1554585600
-195	2019-04-07 08:30:00	37800	1554586200
-196	2019-04-07 08:40:00	37800	1554586800
-197	2019-04-07 08:50:00	37800	1554587400
-198	2019-04-07 08:00:00	37800	1554588000
-199	2019-04-07 08:10:00	37800	1554588600
+DST boundary test for Australia/Lord_Howe:
+0	2020-10-04 01:40:00	37800	1601737800
+1	2020-10-04 01:50:00	37800	1601738400
+2	2020-10-04 02:00:00	39600	1601739000
+3	2020-10-04 02:10:00	39600	1601739600
+0	2019-04-07 01:00:00	39600	1554559200
+1	2019-04-07 01:10:00	39600	1554559800
+2	2019-04-07 01:20:00	39600	1554560400
+3	2019-04-07 01:30:00	39600	1554561000
+4	2019-04-07 01:40:00	39600	1554561600
+5	2019-04-07 01:50:00	39600	1554562200
+6	2019-04-07 01:00:00	37800	1554562800
+7	2019-04-07 01:10:00	37800	1554563400
+8	2019-04-07 01:20:00	37800	1554564000
+9	2019-04-07 02:30:00	37800	1554564600
+10	2019-04-07 02:40:00	37800	1554565200
+11	2019-04-07 02:50:00	37800	1554565800
+12	2019-04-07 02:00:00	37800	1554566400
+13	2019-04-07 02:10:00	37800	1554567000
+14	2019-04-07 02:20:00	37800	1554567600
+15	2019-04-07 03:30:00	37800	1554568200
+16	2019-04-07 03:40:00	37800	1554568800
+17	2019-04-07 03:50:00	37800	1554569400
+4 days test in batch comparing with manually computation result for Europe/Moscow:
+4 days test in batch comparing with manually computation result for Asia/Tehran:
+The result maybe wrong for toDateTime processing Australia/Lord_Howe
+1601739000	2020-10-04 02:00:00	39600	37800
+1601739600	2020-10-04 02:10:00	39600	37800
+1601740200	2020-10-04 02:20:00	39600	37800
+1601740800	2020-10-04 03:30:00	39600	41400
+1601741400	2020-10-04 03:40:00	39600	41400
+1601742000	2020-10-04 03:50:00	39600	41400
+1601742600	2020-10-04 03:00:00	39600	37800
+1601743200	2020-10-04 03:10:00	39600	37800
+1601743800	2020-10-04 03:20:00	39600	37800
+1601744400	2020-10-04 04:30:00	39600	41400
+1601745000	2020-10-04 04:40:00	39600	41400
+1601745600	2020-10-04 04:50:00	39600	41400
+1601746200	2020-10-04 04:00:00	39600	37800
+1601746800	2020-10-04 04:10:00	39600	37800
+1601747400	2020-10-04 04:20:00	39600	37800
+1601748000	2020-10-04 05:30:00	39600	41400
+1554562800	2019-04-07 01:00:00	37800	36000
+1554563400	2019-04-07 01:10:00	37800	36000
+1554564000	2019-04-07 01:20:00	37800	36000
+1554564600	2019-04-07 02:30:00	37800	39600
+1554565200	2019-04-07 02:40:00	37800	39600
+1554565800	2019-04-07 02:50:00	37800	39600
+1554566400	2019-04-07 02:00:00	37800	36000
+1554567000	2019-04-07 02:10:00	37800	36000
+1554567600	2019-04-07 02:20:00	37800	36000
+1554568200	2019-04-07 03:30:00	37800	39600
+1554568800	2019-04-07 03:40:00	37800	39600
+1554569400	2019-04-07 03:50:00	37800	39600
+Moscow DST Years:
+11	1981-06-01 00:00:00	14400
+12	1982-06-01 00:00:00	14400
+13	1983-06-01 00:00:00	14400
+14	1984-06-01 00:00:00	14400
+15	1985-06-01 00:00:00	14400
+16	1986-06-01 00:00:00	14400
+17	1987-06-01 00:00:00	14400
+18	1988-06-01 00:00:00	14400
+19	1989-06-01 00:00:00	14400
+20	1990-06-01 00:00:00	14400
+22	1992-06-01 00:00:00	14400
+23	1993-06-01 00:00:00	14400
+24	1994-06-01 00:00:00	14400
+25	1995-06-01 00:00:00	14400
+26	1996-06-01 00:00:00	14400
+27	1997-06-01 00:00:00	14400
+28	1998-06-01 00:00:00	14400
+29	1999-06-01 00:00:00	14400
+30	2000-06-01 00:00:00	14400
+31	2001-06-01 00:00:00	14400
+32	2002-06-01 00:00:00	14400
+33	2003-06-01 00:00:00	14400
+34	2004-06-01 00:00:00	14400
+35	2005-06-01 00:00:00	14400
+36	2006-06-01 00:00:00	14400
+37	2007-06-01 00:00:00	14400
+38	2008-06-01 00:00:00	14400
+39	2009-06-01 00:00:00	14400
+40	2010-06-01 00:00:00	14400
+41	2011-06-01 00:00:00	14400
+42	2012-06-01 00:00:00	14400
+43	2013-06-01 00:00:00	14400
+44	2014-06-01 00:00:00	14400
+Moscow DST Years with perment DST from 2011-2014:
+2011-01-01 00:00:00	2011-03-27 00:00:00	86	2011_10800
+2011-03-28 00:00:00	2011-12-31 00:00:00	279	2011_14400
+2012-01-01 00:00:00	2012-12-31 00:00:00	366	2012_14400
+2013-01-01 00:00:00	2013-12-31 00:00:00	365	2013_14400
+2014-01-01 00:00:00	2014-10-26 00:00:00	299	2014_14400
+2014-10-27 00:00:00	2014-12-31 00:00:00	66	2014_10800
+Tehran DST Years:
+8	1978-06-01 00:00:00	18000
+9	1979-06-01 00:00:00	16200
+10	1980-06-01 00:00:00	16200
+21	1991-06-01 00:00:00	16200
+22	1992-06-01 00:00:00	16200
+23	1993-06-01 00:00:00	16200
+24	1994-06-01 00:00:00	16200
+25	1995-06-01 00:00:00	16200
+26	1996-06-01 00:00:00	16200
+27	1997-06-01 00:00:00	16200
+28	1998-06-01 00:00:00	16200
+29	1999-06-01 00:00:00	16200
+30	2000-06-01 00:00:00	16200
+31	2001-06-01 00:00:00	16200
+32	2002-06-01 00:00:00	16200
+33	2003-06-01 00:00:00	16200
+34	2004-06-01 00:00:00	16200
+35	2005-06-01 00:00:00	16200
+38	2008-06-01 00:00:00	16200
+39	2009-06-01 00:00:00	16200
+40	2010-06-01 00:00:00	16200
+41	2011-06-01 00:00:00	16200
+42	2012-06-01 00:00:00	16200
+43	2013-06-01 00:00:00	16200
+44	2014-06-01 00:00:00	16200
+45	2015-06-01 00:00:00	16200
+46	2016-06-01 00:00:00	16200
+47	2017-06-01 00:00:00	16200
+48	2018-06-01 00:00:00	16200
+49	2019-06-01 00:00:00	16200
+50	2020-06-01 00:00:00	16200
+Shanghai DST Years:
+16	1986-08-01 00:00:00	32400
+17	1987-08-01 00:00:00	32400
+18	1988-08-01 00:00:00	32400
+19	1989-08-01 00:00:00	32400
+20	1990-08-01 00:00:00	32400
+21	1991-08-01 00:00:00	32400
diff --git a/tests/queries/0_stateless/01699_timezoneOffset.sql b/tests/queries/0_stateless/01699_timezoneOffset.sql
old mode 100644
new mode 100755
index 5f6855d6732..1b3f05ecdd7
--- a/tests/queries/0_stateless/01699_timezoneOffset.sql
+++ b/tests/queries/0_stateless/01699_timezoneOffset.sql
@@ -1,20 +1,46 @@
 
-/* timestamp  ==  (Europe/Moscow) */
+/* Test the DST(daylight saving time) offset changing boundary*/
+SELECT 'DST boundary test for Europe/Moscow:';
+SELECT number,(toDateTime('1981-04-01 22:40:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4);
+SELECT number,(toDateTime('1981-09-30 23:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(18);
 
+SELECT 'DST boundary test for Asia/Tehran:';
+SELECT number,(toDateTime('2020-03-21 22:40:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4);
+SELECT number,(toDateTime('2020-09-20 23:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(18);
 
-SELECT number,(toDateTime('1981-04-01 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(200);
-SELECT number,(toDateTime('1981-09-30 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(200);
-
-
-SELECT number,(toDateTime('2020-03-21 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(200);
-
-SELECT number,(toDateTime('2020-09-20 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(200);
-
+SELECT 'DST boundary test for Australia/Lord_Howe. This is a special timezone with DST offset is 30mins with the timezone epoc also lays at half hour';
 SELECT timezoneOffset(toDateTime('2018-08-21 22:20:00', 'Australia/Lord_Howe'));
 SELECT timezoneOffset(toDateTime('2018-02-21 22:20:00', 'Australia/Lord_Howe'));
 
+SELECT 'DST boundary test for Australia/Lord_Howe:';
+SELECT number,(toDateTime('2020-10-04 01:40:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4);
+SELECT number,(toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(18);
 
-SELECT number,(toDateTime('2020-10-03 00:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(200);
 
-SELECT number,(toDateTime('2019-04-06 00:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(200);
+/* The Batch Part. Test period is whole 4 days*/
+SELECT '4 days test in batch comparing with manually computation result for Europe/Moscow:';
+SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-04-01 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
+SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-09-30 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
+
+SELECT '4 days test in batch comparing with manually computation result for Asia/Tehran:';
+SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-03-21 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
+SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-09-20 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc;
+
+/* During this test we got unexpected result comes from the toDateTime() function when process the special time zone of 'Australia/Lord_Howe', which may be some kind of bugs. */
+SELECT 'The result maybe wrong for toDateTime processing Australia/Lord_Howe';
+SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-10-04 01:40:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(18) where res != calc;
+SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(18) where res != calc;
+
+
+/* Find all the years had followed DST during given period*/
+
+SELECT 'Moscow DST Years:';
+SELECT number, (toDateTime('1970-06-01 00:00:00', 'Europe/Moscow') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 10800;
+SELECT 'Moscow DST Years with perment DST from 2011-2014:';
+SELECT min((toDateTime('2011-01-01 00:00:00', 'Europe/Moscow') + INTERVAL number DAY) as day) as start, max(day) as end, count(1), concat(toString(toYear(day)),'_',toString(timezoneOffset(day)))as DST from numbers(365*4+1) group by DST order by start;
+
+SELECT 'Tehran DST Years:';
+SELECT number, (toDateTime('1970-06-01 00:00:00', 'Asia/Tehran') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 12600;
+SELECT 'Shanghai DST Years:';
+SELECT number, (toDateTime('1970-08-01 00:00:00', 'Asia/Shanghai') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 28800;
 

From b7e4f7bd1e7beaa9c1b17b7a3d945731de43e769 Mon Sep 17 00:00:00 2001
From: keenwolf <keenwolf@gmail.com>
Date: Wed, 3 Feb 2021 00:41:11 +0800
Subject: [PATCH 0539/1238] modify file attribute

---
 tests/queries/0_stateless/01699_timezoneOffset.sql | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 tests/queries/0_stateless/01699_timezoneOffset.sql

diff --git a/tests/queries/0_stateless/01699_timezoneOffset.sql b/tests/queries/0_stateless/01699_timezoneOffset.sql
old mode 100755
new mode 100644

From 921518db0a9637ea1b3452fa6ca90c9758c76df9 Mon Sep 17 00:00:00 2001
From: Aleksei Semiglazov <asemiglazov@cloudflare.com>
Date: Fri, 20 Nov 2020 17:23:53 +0000
Subject: [PATCH 0540/1238] CLICKHOUSE-606: query deduplication based on parts'
 UUID

* add the query data deduplication excluding duplicated parts in MergeTree family engines.

query deduplication is based on parts' UUID which should be enabled first with merge_tree setting
assign_part_uuids=1

allow_experimental_query_deduplication setting is to enable part deduplication, default ot false.

data part UUID is a mechanism of giving a data part a unique identifier.
Having UUID and deduplication mechanism provides a potential of moving parts
between shards preserving data consistency on a read path:
duplicated UUIDs will cause root executor to retry query against on of the replica explicitly
asking to exclude encountered duplicated fingerprints during a distributed query execution.

NOTE: this implementation don't provide any knobs to lock part and hence its UUID. Any mutations/merge will
update part's UUID.

* add _part_uuid virtual column, allowing to use UUIDs in predicates.

Signed-off-by: Aleksei Semiglazov <asemiglazov@cloudflare.com>

address comments
---
 programs/client/Client.cpp                    |   3 +
 src/Client/Connection.cpp                     |  10 ++
 src/Client/Connection.h                       |   3 +
 src/Client/MultiplexedConnections.cpp         |  17 ++
 src/Client/MultiplexedConnections.h           |   3 +
 src/Columns/ColumnsNumber.h                   |   2 +
 src/Common/ErrorCodes.cpp                     |   1 +
 src/Core/Protocol.h                           |   8 +-
 src/Core/Settings.h                           |   2 +
 src/DataStreams/RemoteQueryExecutor.cpp       |  79 ++++++++-
 src/DataStreams/RemoteQueryExecutor.h         |  18 ++
 src/IO/WriteHelpers.h                         |   1 +
 src/Interpreters/Context.cpp                  |  19 ++
 src/Interpreters/Context.h                    |   8 +
 src/Server/TCPHandler.cpp                     |  39 +++++
 src/Server/TCPHandler.h                       |   5 +
 .../MergeTreeBaseSelectProcessor.cpp          |  21 +++
 src/Storages/MergeTree/MergeTreeData.cpp      |   2 +
 .../MergeTree/MergeTreeDataPartUUID.cpp       |  38 ++++
 .../MergeTree/MergeTreeDataPartUUID.h         |  34 ++++
 .../MergeTree/MergeTreeDataSelectExecutor.cpp | 125 ++++++++++---
 src/Storages/StorageDistributed.cpp           |   2 +
 src/Storages/ya.make                          |   1 +
 .../test_query_deduplication/__init__.py      |   0
 .../configs/deduplication_settings.xml        |   5 +
 .../configs/remote_servers.xml                |  24 +++
 .../test_query_deduplication/test.py          | 165 ++++++++++++++++++
 27 files changed, 607 insertions(+), 28 deletions(-)
 create mode 100644 src/Storages/MergeTree/MergeTreeDataPartUUID.cpp
 create mode 100644 src/Storages/MergeTree/MergeTreeDataPartUUID.h
 create mode 100644 tests/integration/test_query_deduplication/__init__.py
 create mode 100644 tests/integration/test_query_deduplication/configs/deduplication_settings.xml
 create mode 100644 tests/integration/test_query_deduplication/configs/remote_servers.xml
 create mode 100644 tests/integration/test_query_deduplication/test.py

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 9a8b580407a..8d3a1ba7c74 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -1900,6 +1900,9 @@ private:
 
         switch (packet.type)
         {
+            case Protocol::Server::PartUUIDs:
+                return true;
+
             case Protocol::Server::Data:
                 if (!cancelled)
                     onData(packet.block);
diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 65b15a46955..e38a6b240a6 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -542,6 +542,12 @@ void Connection::sendData(const Block & block, const String & name, bool scalar)
         throttler->add(out->count() - prev_bytes);
 }
 
+void Connection::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
+{
+    writeVarUInt(Protocol::Client::IgnoredPartUUIDs, *out);
+    writeVectorBinary(uuids, *out);
+    out->next();
+}
 
 void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String & name)
 {
@@ -798,6 +804,10 @@ Packet Connection::receivePacket(std::function<void(Poco::Net::Socket &)> async_
             case Protocol::Server::EndOfStream:
                 return res;
 
+            case Protocol::Server::PartUUIDs:
+                readVectorBinary(res.part_uuids, *in);
+                return res;
+
             default:
                 /// In unknown state, disconnect - to not leave unsynchronised connection.
                 disconnect();
diff --git a/src/Client/Connection.h b/src/Client/Connection.h
index 83e8f3ba206..2d24b143d7a 100644
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@@ -66,6 +66,7 @@ struct Packet
     std::vector<String> multistring_message;
     Progress progress;
     BlockStreamProfileInfo profile_info;
+    std::vector<UUID> part_uuids;
 
     Packet() : type(Protocol::Server::Hello) {}
 };
@@ -157,6 +158,8 @@ public:
     void sendScalarsData(Scalars & data);
     /// Send all contents of external (temporary) tables.
     void sendExternalTablesData(ExternalTablesData & data);
+    /// Send parts' uuids to excluded them from query processing
+    void sendIgnoredPartUUIDs(const std::vector<UUID> & uuids);
 
     /// Send prepared block of data (serialized and, if need, compressed), that will be read from 'input'.
     /// You could pass size of serialized/compressed block.
diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp
index ed7aad0a515..c50dd7b6454 100644
--- a/src/Client/MultiplexedConnections.cpp
+++ b/src/Client/MultiplexedConnections.cpp
@@ -140,6 +140,21 @@ void MultiplexedConnections::sendQuery(
     sent_query = true;
 }
 
+void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
+{
+    std::lock_guard lock(cancel_mutex);
+
+    if (sent_query)
+        throw Exception("Cannot send uuids after query is sent.", ErrorCodes::LOGICAL_ERROR);
+
+    for (ReplicaState & state : replica_states)
+    {
+        Connection * connection = state.connection;
+        if (connection != nullptr)
+            connection->sendIgnoredPartUUIDs(uuids);
+    }
+}
+
 Packet MultiplexedConnections::receivePacket()
 {
     std::lock_guard lock(cancel_mutex);
@@ -195,6 +210,7 @@ Packet MultiplexedConnections::drain()
 
         switch (packet.type)
         {
+            case Protocol::Server::PartUUIDs:
             case Protocol::Server::Data:
             case Protocol::Server::Progress:
             case Protocol::Server::ProfileInfo:
@@ -253,6 +269,7 @@ Packet MultiplexedConnections::receivePacketUnlocked(std::function<void(Poco::Ne
 
     switch (packet.type)
     {
+        case Protocol::Server::PartUUIDs:
         case Protocol::Server::Data:
         case Protocol::Server::Progress:
         case Protocol::Server::ProfileInfo:
diff --git a/src/Client/MultiplexedConnections.h b/src/Client/MultiplexedConnections.h
index 2ab2b60570e..da0326fa6c0 100644
--- a/src/Client/MultiplexedConnections.h
+++ b/src/Client/MultiplexedConnections.h
@@ -50,6 +50,9 @@ public:
     /// Send a request to the replica to cancel the request
     void sendCancel();
 
+    /// Send parts' uuids to replicas to exclude them from query processing
+    void sendIgnoredPartUUIDs(const std::vector<UUID> & uuids);
+
     /** On each replica, read and skip all packets to EndOfStream or Exception.
       * Returns EndOfStream if no exception has been received. Otherwise
       * returns the last received packet of type Exception.
diff --git a/src/Columns/ColumnsNumber.h b/src/Columns/ColumnsNumber.h
index 96ce2bd6d6f..17a28e617c3 100644
--- a/src/Columns/ColumnsNumber.h
+++ b/src/Columns/ColumnsNumber.h
@@ -26,4 +26,6 @@ using ColumnInt256 = ColumnVector<Int256>;
 using ColumnFloat32 = ColumnVector<Float32>;
 using ColumnFloat64 = ColumnVector<Float64>;
 
+using ColumnUUID = ColumnVector<UInt128>;
+
 }
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index a2cd65137c0..09e5945f2b5 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -533,6 +533,7 @@
     M(564, INTERSERVER_SCHEME_DOESNT_MATCH) \
     M(565, TOO_MANY_PARTITIONS) \
     M(566, CANNOT_RMDIR) \
+    M(567, DUPLICATED_PART_UUIDS) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h
index f383e509751..df51a0cb61a 100644
--- a/src/Core/Protocol.h
+++ b/src/Core/Protocol.h
@@ -75,8 +75,9 @@ namespace Protocol
             TablesStatusResponse = 9, /// A response to TablesStatus request.
             Log = 10,                 /// System logs of the query execution
             TableColumns = 11,        /// Columns' description for default values calculation
+            PartUUIDs = 12,           /// List of unique parts ids.
 
-            MAX = TableColumns,
+            MAX = PartUUIDs,
         };
 
         /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10
@@ -98,6 +99,7 @@ namespace Protocol
                 "TablesStatusResponse",
                 "Log",
                 "TableColumns",
+                "PartUUIDs",
             };
             return packet <= MAX
                 ? data[packet]
@@ -132,8 +134,9 @@ namespace Protocol
             TablesStatusRequest = 5, /// Check status of tables on the server.
             KeepAlive = 6,           /// Keep the connection alive
             Scalar = 7,              /// A block of data (compressed or not).
+            IgnoredPartUUIDs = 8,    /// List of unique parts ids to exclude from query processing
 
-            MAX = Scalar,
+            MAX = IgnoredPartUUIDs,
         };
 
         inline const char * toString(UInt64 packet)
@@ -147,6 +150,7 @@ namespace Protocol
                 "TablesStatusRequest",
                 "KeepAlive",
                 "Scalar",
+                "IgnoredPartUUIDs",
             };
             return packet <= MAX
                 ? data[packet]
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index c4cf3803913..ecd3fa9e746 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -420,6 +420,8 @@ class IColumn;
     M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
     \
     M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
+    M(Bool, allow_experimental_query_deduplication, false, "Allow sending parts' UUIDs for a query in order to deduplicate data parts if any", 0) \
+    \
     /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
     \
     M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \
diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp
index 14e51ffefdf..ce7db264eef 100644
--- a/src/DataStreams/RemoteQueryExecutor.cpp
+++ b/src/DataStreams/RemoteQueryExecutor.cpp
@@ -13,6 +13,7 @@
 #include <Interpreters/InternalTextLogsQueue.h>
 #include <IO/ConnectionTimeoutsContext.h>
 #include <Common/FiberStack.h>
+#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
 
 namespace DB
 {
@@ -20,6 +21,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int UNKNOWN_PACKET_FROM_SERVER;
+    extern const int DUPLICATED_PART_UUIDS;
 }
 
 RemoteQueryExecutor::RemoteQueryExecutor(
@@ -158,6 +160,7 @@ void RemoteQueryExecutor::sendQuery()
     std::lock_guard guard(was_cancelled_mutex);
 
     established = true;
+    was_cancelled = false;
 
     auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings);
     ClientInfo modified_client_info = context.getClientInfo();
@@ -167,6 +170,14 @@ void RemoteQueryExecutor::sendQuery()
         modified_client_info.client_trace_context = CurrentThread::get().thread_trace_context;
     }
 
+    {
+        std::lock_guard lock(duplicated_part_uuids_mutex);
+        if (!duplicated_part_uuids.empty())
+        {
+            multiplexed_connections->sendIgnoredPartUUIDs(duplicated_part_uuids);
+        }
+    }
+
     multiplexed_connections->sendQuery(timeouts, query, query_id, stage, modified_client_info, true);
 
     established = false;
@@ -195,7 +206,29 @@ Block RemoteQueryExecutor::read()
         Packet packet = multiplexed_connections->receivePacket();
 
         if (auto block = processPacket(std::move(packet)))
+        {
+            if (got_duplicated_part_uuids)
+            {
+                /// Cancel previous query and disconnect before retry.
+                cancel();
+                multiplexed_connections->disconnect();
+
+                /// Only resend once, otherwise throw an exception
+                if (!resent_query)
+                {
+                    if (log)
+                        LOG_DEBUG(log, "Found duplicate UUIDs, will retry query without those parts");
+
+                    resent_query = true;
+                    sent_query = false;
+                    got_duplicated_part_uuids = false;
+                    /// Consecutive read will implicitly send query first.
+                    return read();
+                }
+                throw Exception("Found duplicate uuids while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS);
+            }
             return *block;
+        }
     }
 }
 
@@ -233,7 +266,29 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext>
         else
         {
             if (auto data = processPacket(std::move(read_context->packet)))
+            {
+                if (got_duplicated_part_uuids)
+                {
+                    /// Cancel previous query and disconnect before retry.
+                    cancel();
+                    multiplexed_connections->disconnect();
+
+                    /// Only resend once, otherwise throw an exception
+                    if (!resent_query)
+                    {
+                        if (log)
+                            LOG_DEBUG(log, "Found duplicate UUIDs, will retry query without those parts");
+
+                        resent_query = true;
+                        sent_query = false;
+                        got_duplicated_part_uuids = false;
+                        /// Consecutive read will implicitly send query first.
+                        return read(read_context);
+                    }
+                    throw Exception("Found duplicate uuids while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS);
+                }
                 return std::move(*data);
+            }
         }
     }
     while (true);
@@ -246,6 +301,13 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
 {
     switch (packet.type)
     {
+        case Protocol::Server::PartUUIDs:
+            if (!setPartUUIDs(packet.part_uuids))
+            {
+                got_duplicated_part_uuids = true;
+                return Block();
+            }
+            break;
         case Protocol::Server::Data:
             /// If the block is not empty and is not a header block
             if (packet.block && (packet.block.rows() > 0))
@@ -306,6 +368,20 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
     return {};
 }
 
+bool RemoteQueryExecutor::setPartUUIDs(const std::vector<UUID> & uuids)
+{
+    Context & query_context = const_cast<Context &>(context).getQueryContext();
+    auto duplicates = query_context.getPartUUIDs()->add(uuids);
+
+    if (!duplicates.empty())
+    {
+        std::lock_guard lock(duplicated_part_uuids_mutex);
+        duplicated_part_uuids.insert(duplicated_part_uuids.begin(), duplicates.begin(), duplicates.end());
+        return false;
+    }
+    return true;
+}
+
 void RemoteQueryExecutor::finish(std::unique_ptr<ReadContext> * read_context)
 {
     /** If one of:
@@ -383,6 +459,7 @@ void RemoteQueryExecutor::sendExternalTables()
     {
         std::lock_guard lock(external_tables_mutex);
 
+        external_tables_data.clear();
         external_tables_data.reserve(count);
 
         for (size_t i = 0; i < count; ++i)
@@ -446,7 +523,7 @@ bool RemoteQueryExecutor::isQueryPending() const
 
 bool RemoteQueryExecutor::hasThrownException() const
 {
-    return got_exception_from_replica || got_unknown_packet_from_replica;
+    return got_exception_from_replica || got_unknown_packet_from_replica || got_duplicated_part_uuids;
 }
 
 }
diff --git a/src/DataStreams/RemoteQueryExecutor.h b/src/DataStreams/RemoteQueryExecutor.h
index 46d9d067563..843cf75f1f8 100644
--- a/src/DataStreams/RemoteQueryExecutor.h
+++ b/src/DataStreams/RemoteQueryExecutor.h
@@ -57,6 +57,9 @@ public:
     /// Create connection and send query, external tables and scalars.
     void sendQuery();
 
+    /// Query is resent to a replica, the query itself can be modified.
+    std::atomic<bool> resent_query { false };
+
     /// Read next block of data. Returns empty block if query is finished.
     Block read();
 
@@ -152,6 +155,14 @@ private:
       */
     std::atomic<bool> got_unknown_packet_from_replica { false };
 
+    /** Got duplicated uuids from replica
+      */
+    std::atomic<bool> got_duplicated_part_uuids{ false };
+
+    /// Parts uuids, collected from remote replicas
+    std::mutex duplicated_part_uuids_mutex;
+    std::vector<UUID> duplicated_part_uuids;
+
     PoolMode pool_mode = PoolMode::GET_MANY;
     StorageID main_table = StorageID::createEmpty();
 
@@ -163,6 +174,10 @@ private:
     /// Send all temporary tables to remote servers
     void sendExternalTables();
 
+    /** Set part uuids to a query context, collected from remote replicas.
+      */
+    bool setPartUUIDs(const std::vector<UUID> & uuids);
+
     /// If wasn't sent yet, send request to cancel all connections to replicas
     void tryCancel(const char * reason, std::unique_ptr<ReadContext> * read_context);
 
@@ -174,6 +189,9 @@ private:
 
     /// Process packet for read and return data block if possible.
     std::optional<Block> processPacket(Packet packet);
+
+    /// Reads packet by packet
+    Block readPackets();
 };
 
 }
diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h
index 9072f306bd9..a37a5b5ddc6 100644
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@@ -910,6 +910,7 @@ inline void writeBinary(const StringRef & x, WriteBuffer & buf) { writeStringBin
 inline void writeBinary(const std::string_view & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
 inline void writeBinary(const Int128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
 inline void writeBinary(const UInt128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
+inline void writeBinary(const UUID & x, WriteBuffer & buf) { writePODBinary(x, buf); }
 inline void writeBinary(const DummyUInt256 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
 inline void writeBinary(const Decimal32 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
 inline void writeBinary(const Decimal64 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 8ff317764a7..9a1fcf6a067 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -64,6 +64,7 @@
 #include <Common/RemoteHostFilter.h>
 #include <Interpreters/DatabaseCatalog.h>
 #include <Storages/MergeTree/BackgroundJobsExecutor.h>
+#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
 
 
 namespace ProfileEvents
@@ -2510,4 +2511,22 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w
     return StorageID::createEmpty();
 }
 
+PartUUIDsPtr Context::getPartUUIDs()
+{
+    auto lock = getLock();
+    if (!part_uuids)
+        part_uuids = std::make_shared<PartUUIDs>();
+
+    return part_uuids;
+}
+
+PartUUIDsPtr Context::getIgnoredPartUUIDs()
+{
+    auto lock = getLock();
+    if (!ignored_part_uuids)
+        ignored_part_uuids = std::make_shared<PartUUIDs>();
+
+    return ignored_part_uuids;
+}
+
 }
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 5801cc2b949..4dbdf390473 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -107,6 +107,8 @@ using StoragePolicyPtr = std::shared_ptr<const IStoragePolicy>;
 using StoragePoliciesMap = std::map<String, StoragePolicyPtr>;
 class StoragePolicySelector;
 using StoragePolicySelectorPtr = std::shared_ptr<const StoragePolicySelector>;
+struct PartUUIDs;
+using PartUUIDsPtr = std::shared_ptr<PartUUIDs>;
 
 class IOutputFormat;
 using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
@@ -264,6 +266,9 @@ private:
     using SampleBlockCache = std::unordered_map<std::string, Block>;
     mutable SampleBlockCache sample_block_cache;
 
+    std::shared_ptr<PartUUIDs> part_uuids; /// set of parts' uuids, is used for query parts deduplication
+    std::shared_ptr<PartUUIDs> ignored_part_uuids; /// set of parts' uuids are meant to be excluded from query processing
+
     NameToNameMap query_parameters;   /// Dictionary with query parameters for prepared statements.
                                                      /// (key=name, value)
 
@@ -734,6 +739,9 @@ public:
     };
 
     MySQLWireContext mysql;
+
+    PartUUIDsPtr getPartUUIDs();
+    PartUUIDsPtr getIgnoredPartUUIDs();
 private:
     std::unique_lock<std::recursive_mutex> getLock() const;
 
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 12d1a0249b7..0d040652342 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -24,6 +24,7 @@
 #include <Interpreters/OpenTelemetrySpanLog.h>
 #include <Storages/StorageMemory.h>
 #include <Storages/StorageReplicatedMergeTree.h>
+#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
 #include <Core/ExternalTable.h>
 #include <Storages/ColumnDefault.h>
 #include <DataTypes/DataTypeLowCardinality.h>
@@ -180,10 +181,16 @@ void TCPHandler::runImpl()
 
             /** If Query - process it. If Ping or Cancel - go back to the beginning.
              *  There may come settings for a separate query that modify `query_context`.
+             *  It's possible to receive part uuids packet before the query, so then receivePacket has to be called twice.
              */
             if (!receivePacket())
                 continue;
 
+            /** If part_uuids got received in previous packet, trying to read again.
+              */
+            if (state.empty() && state.part_uuids && !receivePacket())
+                continue;
+
             query_scope.emplace(*query_context);
 
             send_exception_with_stack_trace = query_context->getSettingsRef().calculate_text_stack_trace;
@@ -528,6 +535,8 @@ void TCPHandler::processOrdinaryQuery()
     /// Pull query execution result, if exists, and send it to network.
     if (state.io.in)
     {
+        sendPartUUIDs();
+
         /// This allows the client to prepare output format
         if (Block header = state.io.in->getHeader())
             sendData(header);
@@ -592,6 +601,8 @@ void TCPHandler::processOrdinaryQueryWithProcessors()
 {
     auto & pipeline = state.io.pipeline;
 
+    sendPartUUIDs();
+
     /// Send header-block, to allow client to prepare output format for data to send.
     {
         const auto & header = pipeline.getHeader();
@@ -693,6 +704,20 @@ void TCPHandler::receiveUnexpectedTablesStatusRequest()
     throw NetException("Unexpected packet TablesStatusRequest received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
 }
 
+void TCPHandler::sendPartUUIDs()
+{
+    auto uuids = query_context->getPartUUIDs()->get();
+    if (!uuids.empty())
+    {
+        for (const auto & uuid : uuids)
+            LOG_TRACE(log, "Sending UUID: {}", toString(uuid));
+
+        writeVarUInt(Protocol::Server::PartUUIDs, *out);
+        writeVectorBinary(uuids, *out);
+        out->next();
+    }
+}
+
 void TCPHandler::sendProfileInfo(const BlockStreamProfileInfo & info)
 {
     writeVarUInt(Protocol::Server::ProfileInfo, *out);
@@ -905,6 +930,10 @@ bool TCPHandler::receivePacket()
 
     switch (packet_type)
     {
+        case Protocol::Client::IgnoredPartUUIDs:
+            /// Part uuids packet if any comes before query.
+            receiveIgnoredPartUUIDs();
+            return true;
         case Protocol::Client::Query:
             if (!state.empty())
                 receiveUnexpectedQuery();
@@ -940,6 +969,16 @@ bool TCPHandler::receivePacket()
     }
 }
 
+void TCPHandler::receiveIgnoredPartUUIDs()
+{
+    state.part_uuids = true;
+    std::vector<UUID> uuids;
+    readVectorBinary(uuids, *in);
+
+    if (!uuids.empty())
+        query_context->getIgnoredPartUUIDs()->add(uuids);
+}
+
 void TCPHandler::receiveClusterNameAndSalt()
 {
     readStringBinary(cluster, *in);
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index 0d3109a6591..41539bef1e1 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -67,6 +67,9 @@ struct QueryState
     /// Temporary tables read
     bool temporary_tables_read = false;
 
+    /// A state got uuids to exclude from a query
+    bool part_uuids = false;
+
     /// Request requires data from client for function input()
     bool need_receive_data_for_input = false;
     /// temporary place for incoming data block for input()
@@ -173,6 +176,7 @@ private:
     void receiveHello();
     bool receivePacket();
     void receiveQuery();
+    void receiveIgnoredPartUUIDs();
     bool receiveData(bool scalar);
     bool readDataNext(const size_t & poll_interval, const int & receive_timeout);
     void readData(const Settings & connection_settings);
@@ -201,6 +205,7 @@ private:
     void sendProgress();
     void sendLogs();
     void sendEndOfStream();
+    void sendPartUUIDs();
     void sendProfileInfo(const BlockStreamProfileInfo & info);
     void sendTotals(const Block & totals);
     void sendExtremes(const Block & extremes);
diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
index c852151f27d..ce60856505e 100644
--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@@ -7,6 +7,7 @@
 #include <Common/typeid_cast.h>
 #include <DataTypes/DataTypeNothing.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeUUID.h>
 
 
 namespace DB
@@ -205,6 +206,7 @@ namespace
 
         virtual void insertStringColumn(const ColumnPtr & column, const String & name) = 0;
         virtual void insertUInt64Column(const ColumnPtr & column, const String & name) = 0;
+        virtual void insertUUIDColumn(const ColumnPtr & column, const String & name) = 0;
     };
 }
 
@@ -241,6 +243,16 @@ static void injectVirtualColumnsImpl(size_t rows, VirtualColumnsInserter & inser
 
                 inserter.insertUInt64Column(column, virtual_column_name);
             }
+            else if (virtual_column_name == "_part_uuid")
+            {
+                ColumnPtr column;
+                if (rows)
+                    column = DataTypeUUID().createColumnConst(rows, task->data_part->uuid)->convertToFullColumnIfConst();
+                else
+                    column = DataTypeUUID().createColumn();
+
+                inserter.insertUUIDColumn(column, virtual_column_name);
+            }
             else if (virtual_column_name == "_partition_id")
             {
                 ColumnPtr column;
@@ -271,6 +283,11 @@ namespace
             block.insert({column, std::make_shared<DataTypeUInt64>(), name});
         }
 
+        void insertUUIDColumn(const ColumnPtr & column, const String & name) final
+        {
+            block.insert({column, std::make_shared<DataTypeUUID>(), name});
+        }
+
         Block & block;
     };
 
@@ -288,6 +305,10 @@ namespace
             columns.push_back(column);
         }
 
+        void insertUUIDColumn(const ColumnPtr & column, const String &) final
+        {
+            columns.push_back(column);
+        }
         Columns & columns;
     };
 }
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 9ed751cbc8e..56e6033d18e 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -4,6 +4,7 @@
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeEnum.h>
+#include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/NestedUtils.h>
@@ -3949,6 +3950,7 @@ NamesAndTypesList MergeTreeData::getVirtuals() const
     return NamesAndTypesList{
         NameAndTypePair("_part", std::make_shared<DataTypeString>()),
         NameAndTypePair("_part_index", std::make_shared<DataTypeUInt64>()),
+        NameAndTypePair("_part_uuid", std::make_shared<DataTypeUUID>()),
         NameAndTypePair("_partition_id", std::make_shared<DataTypeString>()),
         NameAndTypePair("_sample_factor", std::make_shared<DataTypeFloat64>()),
     };
diff --git a/src/Storages/MergeTree/MergeTreeDataPartUUID.cpp b/src/Storages/MergeTree/MergeTreeDataPartUUID.cpp
new file mode 100644
index 00000000000..17d19855798
--- /dev/null
+++ b/src/Storages/MergeTree/MergeTreeDataPartUUID.cpp
@@ -0,0 +1,38 @@
+#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
+
+namespace DB
+{
+
+std::vector<UUID> PartUUIDs::add(const std::vector<UUID> & new_uuids)
+{
+    std::lock_guard lock(mutex);
+    std::vector<UUID> intersection;
+
+    /// First check any presence of uuids in a uuids, return duplicates back if any
+    for (const auto & uuid : new_uuids)
+    {
+        if (uuids.find(uuid) != uuids.end())
+            intersection.emplace_back(uuid);
+    }
+
+    if (intersection.empty())
+    {
+        for (const auto & uuid : new_uuids)
+            uuids.emplace(uuid);
+    }
+    return intersection;
+}
+
+std::vector<UUID> PartUUIDs::get() const
+{
+    std::lock_guard lock(mutex);
+    return std::vector<UUID>(uuids.begin(), uuids.end());
+}
+
+bool PartUUIDs::has(const UUID & uuid) const
+{
+    std::lock_guard lock(mutex);
+    return uuids.find(uuid) != uuids.end();
+}
+
+}
diff --git a/src/Storages/MergeTree/MergeTreeDataPartUUID.h b/src/Storages/MergeTree/MergeTreeDataPartUUID.h
new file mode 100644
index 00000000000..ee3a9ee2791
--- /dev/null
+++ b/src/Storages/MergeTree/MergeTreeDataPartUUID.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <unordered_set>
+#include <Core/UUID.h>
+
+namespace DB
+{
+
+/** PartUUIDs is a uuid set to control query deduplication.
+ * The object is used in query context in both direction:
+ *  Server->Client to send all parts' UUIDs that have been read during the query
+ *  Client->Server to ignored specified parts from being processed.
+ *
+ *  Current implementation assumes a user setting allow_experimental_query_deduplication=1 is set.
+ */
+struct PartUUIDs
+{
+public:
+    /// Add new UUIDs if not duplicates found otherwise return duplicated UUIDs
+    std::vector<UUID> add(const std::vector<UUID> & uuids);
+    /// Get accumulated UUIDs
+    std::vector<UUID> get() const;
+    bool has(const UUID & uuid) const;
+
+private:
+    mutable std::mutex mutex;
+    std::unordered_set<UUID> uuids;
+};
+
+using PartUUIDsPtr = std::shared_ptr<PartUUIDs>;
+
+}
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 457c9c04aa9..740288e3b46 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -15,6 +15,7 @@
 #include <Storages/MergeTree/MergeTreeIndices.h>
 #include <Storages/MergeTree/MergeTreeIndexReader.h>
 #include <Storages/MergeTree/KeyCondition.h>
+#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
 #include <Storages/ReadInOrderOptimizer.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
@@ -35,8 +36,10 @@
 #include <Processors/QueryPlan/MergingFinal.h>
 #include <Processors/QueryPlan/ReadNothingStep.h>
 
+#include <Core/UUID.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeEnum.h>
+#include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Storages/VirtualColumnUtils.h>
 
@@ -61,6 +64,7 @@ namespace ErrorCodes
     extern const int TOO_MANY_ROWS;
     extern const int CANNOT_PARSE_TEXT;
     extern const int TOO_MANY_PARTITIONS;
+    extern const int DUPLICATED_PART_UUIDS;
 }
 
 
@@ -71,14 +75,27 @@ MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & d
 
 
 /// Construct a block consisting only of possible values of virtual columns
-static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts)
+static Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool with_uuid)
 {
-    auto column = ColumnString::create();
+    auto part_column = ColumnString::create();
+    auto part_uuid_column = ColumnUUID::create();
 
     for (const auto & part : parts)
-        column->insert(part->name);
+    {
+        part_column->insert(part->name);
+        if (with_uuid)
+            part_uuid_column->insert(part->uuid);
+    }
 
-    return Block{ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), "_part")};
+    if (with_uuid)
+    {
+        return Block(std::initializer_list<ColumnWithTypeAndName>{
+            ColumnWithTypeAndName(std::move(part_column), std::make_shared<DataTypeString>(), "_part"),
+            ColumnWithTypeAndName(std::move(part_uuid_column), std::make_shared<DataTypeUUID>(), "_part_uuid"),
+        });
+    }
+
+    return Block{ColumnWithTypeAndName(std::move(part_column), std::make_shared<DataTypeString>(), "_part")};
 }
 
 
@@ -162,6 +179,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
     Names real_column_names;
 
     bool part_column_queried = false;
+    bool part_uuid_column_queried = false;
 
     bool sample_factor_column_queried = false;
     Float64 used_sample_factor = 1;
@@ -181,6 +199,11 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
         {
             virt_column_names.push_back(name);
         }
+        else if (name == "_part_uuid")
+        {
+            part_uuid_column_queried = true;
+            virt_column_names.push_back(name);
+        }
         else if (name == "_sample_factor")
         {
             sample_factor_column_queried = true;
@@ -198,9 +221,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
     if (real_column_names.empty())
         real_column_names.push_back(ExpressionActions::getSmallestColumn(available_real_columns));
 
-    /// If `_part` virtual column is requested, we try to use it as an index.
-    Block virtual_columns_block = getBlockWithPartColumn(parts);
-    if (part_column_queried)
+    /// If `_part` or `_part_uuid` virtual columns are requested, we try to filter out data by them.
+    Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, part_uuid_column_queried);
+    if (part_column_queried || part_uuid_column_queried)
         VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context);
 
     auto part_values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
@@ -246,36 +269,88 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
 
     /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`,
     ///  as well as `max_block_number_to_read`.
+    /// Skip parts uuids if any to the query context, or skip parts which uuids marked as excluded.
     {
-        auto prev_parts = parts;
-        parts.clear();
+        Context & query_context
+            = context.hasQueryContext() ? const_cast<Context &>(context).getQueryContext() : const_cast<Context &>(context);
 
-        for (const auto & part : prev_parts)
+        /// process_parts prepare parts that have to be read for the query,
+        /// returns false if duplicated parts' UUID have been met
+        auto select_parts = [&] (MergeTreeData::DataPartsVector & selected_parts) -> bool
         {
-            if (part_values.find(part->name) == part_values.end())
-                continue;
+            auto ignored_part_uuids = query_context.getIgnoredPartUUIDs();
+            std::unordered_set<UUID> temp_part_uuids;
 
-            if (part->isEmpty())
-                continue;
+            auto prev_parts = selected_parts;
+            selected_parts.clear();
 
-            if (minmax_idx_condition && !minmax_idx_condition->checkInHyperrectangle(
-                    part->minmax_idx.hyperrectangle, data.minmax_idx_column_types).can_be_true)
-                continue;
-
-            if (partition_pruner)
+            for (const auto & part : prev_parts)
             {
-                if (partition_pruner->canBePruned(part))
+                if (part_values.find(part->name) == part_values.end())
                     continue;
+
+                if (part->isEmpty())
+                    continue;
+
+                if (minmax_idx_condition
+                    && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx.hyperrectangle, data.minmax_idx_column_types)
+                            .can_be_true)
+                    continue;
+
+                if (partition_pruner)
+                {
+                    if (partition_pruner->canBePruned(part))
+                        continue;
+                }
+
+                if (max_block_numbers_to_read)
+                {
+                    auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);
+                    if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second)
+                        continue;
+                }
+
+                /// populate UUIDs and exclude ignored parts if enabled
+                if (query_context.getSettingsRef().allow_experimental_query_deduplication && part->uuid != UUIDHelpers::Nil)
+                {
+                    /// Skip the part if its uuid is meant to be excluded
+                    if (ignored_part_uuids->has(part->uuid))
+                        continue;
+
+                    auto result = temp_part_uuids.insert(part->uuid);
+                    if (!result.second)
+                        throw Exception("Found a part with the same UUID on the same replica.", ErrorCodes::LOGICAL_ERROR);
+                }
+
+                selected_parts.push_back(part);
             }
 
-            if (max_block_numbers_to_read)
+            if (!temp_part_uuids.empty())
             {
-                auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);
-                if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second)
-                    continue;
+                auto duplicates = query_context.getPartUUIDs()->add(std::vector<UUID>{temp_part_uuids.begin(), temp_part_uuids.end()});
+                if (!duplicates.empty())
+                {
+                    /// on a local replica with prefer_localhost_replica=1 if any duplicates appeared during the first pass,
+                    /// adding them to the exclusion, so they will be skipped on second pass
+                    query_context.getIgnoredPartUUIDs()->add(duplicates);
+                    return false;
+                }
             }
 
-            parts.push_back(part);
+            return true;
+        };
+
+        /// Process parts that have to be read for a query.
+        auto needs_retry = !select_parts(parts);
+        /// If any duplicated part UUIDs met during the first step, try to ignore them in second pass
+        if (needs_retry)
+        {
+            if (log)
+                LOG_DEBUG(log, "Found duplicate uuids locally, will retry part selection without them");
+
+            /// Second attempt didn't help, throw an exception
+            if (!select_parts(parts))
+                throw Exception("Found duplicate UUIDs while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS);
         }
     }
 
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 5227cd8a33e..570aeef820d 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -4,6 +4,7 @@
 #include <Disks/IDisk.h>
 
 #include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypesNumber.h>
 
 #include <Storages/Distributed/DistributedBlockOutputStream.h>
@@ -345,6 +346,7 @@ NamesAndTypesList StorageDistributed::getVirtuals() const
             NameAndTypePair("_table", std::make_shared<DataTypeString>()),
             NameAndTypePair("_part", std::make_shared<DataTypeString>()),
             NameAndTypePair("_part_index", std::make_shared<DataTypeUInt64>()),
+            NameAndTypePair("_part_uuid", std::make_shared<DataTypeUUID>()),
             NameAndTypePair("_partition_id", std::make_shared<DataTypeString>()),
             NameAndTypePair("_sample_factor", std::make_shared<DataTypeFloat64>()),
             NameAndTypePair("_shard_num", std::make_shared<DataTypeUInt32>()),
diff --git a/src/Storages/ya.make b/src/Storages/ya.make
index 69e319cbad5..dbf37e58695 100644
--- a/src/Storages/ya.make
+++ b/src/Storages/ya.make
@@ -48,6 +48,7 @@ SRCS(
     MergeTree/MergeTreeDataPartInMemory.cpp
     MergeTree/MergeTreeDataPartTTLInfo.cpp
     MergeTree/MergeTreeDataPartType.cpp
+    MergeTree/MergeTreeDataPartUUID.cpp
     MergeTree/MergeTreeDataPartWide.cpp
     MergeTree/MergeTreeDataPartWriterCompact.cpp
     MergeTree/MergeTreeDataPartWriterInMemory.cpp
diff --git a/tests/integration/test_query_deduplication/__init__.py b/tests/integration/test_query_deduplication/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_query_deduplication/configs/deduplication_settings.xml b/tests/integration/test_query_deduplication/configs/deduplication_settings.xml
new file mode 100644
index 00000000000..8369c916848
--- /dev/null
+++ b/tests/integration/test_query_deduplication/configs/deduplication_settings.xml
@@ -0,0 +1,5 @@
+<yandex>
+    <merge_tree>
+        <assign_part_uuids>1</assign_part_uuids>
+    </merge_tree>
+</yandex>
diff --git a/tests/integration/test_query_deduplication/configs/remote_servers.xml b/tests/integration/test_query_deduplication/configs/remote_servers.xml
new file mode 100644
index 00000000000..f12558ca529
--- /dev/null
+++ b/tests/integration/test_query_deduplication/configs/remote_servers.xml
@@ -0,0 +1,24 @@
+<yandex>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>node2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>node3</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</yandex>
diff --git a/tests/integration/test_query_deduplication/test.py b/tests/integration/test_query_deduplication/test.py
new file mode 100644
index 00000000000..8d935b98579
--- /dev/null
+++ b/tests/integration/test_query_deduplication/test.py
@@ -0,0 +1,165 @@
+import uuid
+
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import TSV
+
+DUPLICATED_UUID = uuid.uuid4()
+
+cluster = ClickHouseCluster(__file__)
+
+node1 = cluster.add_instance(
+    'node1',
+    main_configs=['configs/remote_servers.xml', 'configs/deduplication_settings.xml'])
+
+node2 = cluster.add_instance(
+    'node2',
+    main_configs=['configs/remote_servers.xml', 'configs/deduplication_settings.xml'])
+
+node3 = cluster.add_instance(
+    'node3',
+    main_configs=['configs/remote_servers.xml', 'configs/deduplication_settings.xml'])
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def prepare_node(node, parts_uuid=None):
+    node.query("""
+    CREATE TABLE t(_prefix UInt8 DEFAULT 0, key UInt64, value UInt64)
+    ENGINE MergeTree()
+    ORDER BY tuple()
+    PARTITION BY _prefix
+    SETTINGS index_granularity = 1
+    """)
+
+    node.query("""
+    CREATE TABLE d AS t ENGINE=Distributed(test_cluster, default, t)
+    """)
+
+    # Stop merges while populating test data
+    node.query("SYSTEM STOP MERGES")
+
+    # Create 5 parts
+    for i in range(1, 6):
+        node.query("INSERT INTO t VALUES ({}, {}, {})".format(i, i, i))
+
+    node.query("DETACH TABLE t")
+
+    if parts_uuid:
+        for part, part_uuid in parts_uuid:
+            script = """
+            echo -n '{}' > /var/lib/clickhouse/data/default/t/{}/uuid.txt
+            """.format(part_uuid, part)
+            node.exec_in_container(["bash", "-c", script])
+
+    # Attach table back
+    node.query("ATTACH TABLE t")
+
+    # NOTE:
+    # due to absence of the ability to lock part, need to operate on parts with preventin merges
+    # node.query("SYSTEM START MERGES")
+    # node.query("OPTIMIZE TABLE t FINAL")
+
+    print(node.name)
+    print(node.query("SELECT name, uuid, partition FROM system.parts WHERE table = 't' AND active ORDER BY name"))
+
+    assert '5' == node.query("SELECT count() FROM system.parts WHERE table = 't' AND active").strip()
+    if parts_uuid:
+        for part, part_uuid in parts_uuid:
+            assert '1' == node.query(
+                "SELECT count() FROM system.parts WHERE table = 't' AND uuid = '{}' AND active".format(
+                    part_uuid)).strip()
+
+
+@pytest.fixture(scope="module")
+def prepared_cluster(started_cluster):
+    print("duplicated UUID: {}".format(DUPLICATED_UUID))
+    prepare_node(node1, parts_uuid=[("3_3_3_0", DUPLICATED_UUID)])
+    prepare_node(node2, parts_uuid=[("3_3_3_0", DUPLICATED_UUID)])
+    prepare_node(node3)
+
+
+def test_virtual_column(prepared_cluster):
+    # Part containing `key=3` has the same fingerprint on both nodes,
+    #   we expect it to be included only once in the end result.;
+    # select query is using virtucal column _part_fingerprint to filter out part in one shard
+    expected = """
+    1	2
+    2	2
+    3	1
+    4	2
+    5	2
+    """
+    assert TSV(expected) == TSV(node1.query("""
+    SELECT
+        key,
+        count() AS c
+    FROM d
+    WHERE ((_shard_num = 1) AND (_part_uuid != '{}')) OR (_shard_num = 2)
+    GROUP BY key
+    ORDER BY
+        key ASC
+    """.format(DUPLICATED_UUID)))
+
+
+def test_with_deduplication(prepared_cluster):
+    # Part containing `key=3` has the same fingerprint on both nodes,
+    # we expect it to be included only once in the end result
+    expected = """
+1	3
+2	3
+3	2
+4	3
+5	3
+"""
+    assert TSV(expected) == TSV(node1.query(
+        "SET allow_experimental_query_deduplication=1; SELECT key, count() c FROM d GROUP BY key ORDER BY key"))
+
+
+def test_no_merge_with_deduplication(prepared_cluster):
+    # Part containing `key=3` has the same fingerprint on both nodes,
+    # we expect it to be included only once in the end result.
+    # even with distributed_group_by_no_merge=1 the duplicated part should be excluded from the final result
+    expected = """
+1	1
+2	1
+3	1
+4	1
+5	1
+1	1
+2	1
+3	1
+4	1
+5	1
+1	1
+2	1
+4	1
+5	1
+"""
+    assert TSV(expected) == TSV(node1.query("SELECT key, count() c FROM d GROUP BY key ORDER BY key", settings={
+        "allow_experimental_query_deduplication": 1,
+        "distributed_group_by_no_merge": 1,
+    }))
+
+
+def test_without_deduplication(prepared_cluster):
+    # Part containing `key=3` has the same fingerprint on both nodes,
+    # but allow_experimental_query_deduplication is disabled,
+    # so it will not be excluded
+    expected = """
+1	3
+2	3
+3	3
+4	3
+5	3
+"""
+    assert TSV(expected) == TSV(node1.query(
+        "SET allow_experimental_query_deduplication=0; SELECT key, count() c FROM d GROUP BY key ORDER BY key"))

From 807476cc6a63a4edf25a1c2b04649fbd373f6f52 Mon Sep 17 00:00:00 2001
From: Dmitriy <sevirov@yandex-team.ru>
Date: Tue, 2 Feb 2021 21:08:10 +0300
Subject: [PATCH 0541/1238] Add some columns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Добавил отсутствующие столбцы, пример, ссылки на типы.
---
 docs/en/operations/system-tables/part_log.md | 65 +++++++++++++++-----
 docs/ru/operations/system-tables/part_log.md | 65 +++++++++++++++-----
 2 files changed, 98 insertions(+), 32 deletions(-)

diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md
index 9aa95b1a493..1d2543a2be2 100644
--- a/docs/en/operations/system-tables/part_log.md
+++ b/docs/en/operations/system-tables/part_log.md
@@ -6,29 +6,62 @@ This table contains information about events that occurred with [data parts](../
 
 The `system.part_log` table contains the following columns:
 
--   `event_type` (Enum) — Type of the event that occurred with the data part. Can have one of the following values:
+-   `query_id` ([String](../../sql-reference/data-types/string.md)) — Id of the query to insert data parts.
+-   `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values:
     -   `NEW_PART` — Inserting of a new data part.
     -   `MERGE_PARTS` — Merging of data parts.
     -   `DOWNLOAD_PART` — Downloading a data part.
     -   `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition).
     -   `MUTATE_PART` — Mutating of a data part.
     -   `MOVE_PART` — Moving the data part from the one disk to another one.
--   `event_date` (Date) — Event date.
--   `event_time` (DateTime) — Event time.
--   `duration_ms` (UInt64) — Duration.
--   `database` (String) — Name of the database the data part is in.
--   `table` (String) — Name of the table the data part is in.
--   `part_name` (String) — Name of the data part.
--   `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the ‘all’ value if the partitioning is by `tuple()`.
--   `rows` (UInt64) — The number of rows in the data part.
--   `size_in_bytes` (UInt64) — Size of the data part in bytes.
--   `merged_from` (Array(String)) — An array of names of the parts which the current part was made up from (after the merge).
--   `bytes_uncompressed` (UInt64) — Size of uncompressed bytes.
--   `read_rows` (UInt64) — The number of rows was read during the merge.
--   `read_bytes` (UInt64) — The number of bytes was read during the merge.
--   `error` (UInt16) — The code number of the occurred error.
--   `exception` (String) — Text message of the occurred error.
+-   `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
+-   `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
+-   `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration.
+-   `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database the data part is in.
+-   `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table the data part is in.
+-   `part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part.
+-   `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition that the data part was inserted to. The column takes the ‘all’ value if the partitioning is by `tuple()`.
+-   `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files.
+-   `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows in the data part.
+-   `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of the data part in bytes.
+-   `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — An array of names of the parts which the current part was made up from (after the merge).
+-   `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of uncompressed bytes.
+-   `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows was read during the merge.
+-   `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes was read during the merge.
+-   `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
+-   `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The code number of the occurred error.
+-   `exception` ([String](../../sql-reference/data-types/string.md)) — Text message of the occurred error.
 
 The `system.part_log` table is created after the first inserting data to the `MergeTree` table.
 
+**Example**
+
+``` sql
+SELECT * FROM system.part_log LIMIT 1 \G
+```
+
+``` text
+Row 1:
+──────
+query_id:                      983ad9c7-28d5-4ae1-844e-603116b7de31
+event_type:                    NewPart
+event_date:                    2021-02-02
+event_time:                    2021-02-02 11:14:28
+duration_ms:                   35
+database:                      default
+table:                         log_mt_2
+part_name:                     all_1_1_0
+partition_id:                  all
+path_on_disk:                  db/data/default/log_mt_2/all_1_1_0/
+rows:                          115418
+size_in_bytes:                 1074311
+merged_from:                   []
+bytes_uncompressed:            0
+read_rows:                     0
+read_bytes:                    0
+peak_memory_usage:             0
+error:                         0
+exception:                   
+```
+
 [Original article](https://clickhouse.tech/docs/en/operations/system_tables/part_log) <!--hide-->
diff --git a/docs/ru/operations/system-tables/part_log.md b/docs/ru/operations/system-tables/part_log.md
index 255ece76ee2..923e8579e20 100644
--- a/docs/ru/operations/system-tables/part_log.md
+++ b/docs/ru/operations/system-tables/part_log.md
@@ -6,29 +6,62 @@
 
 Столбцы:
 
--   `event_type` (Enum) — тип события. Столбец может содержать одно из следующих значений:
+-   `query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса на вставку кусков данных.
+-   `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип события. Столбец может содержать одно из следующих значений:
     -   `NEW_PART` — вставка нового куска.
     -   `MERGE_PARTS` — слияние кусков.
     -   `DOWNLOAD_PART` — загрузка с реплики.
     -   `REMOVE_PART` — удаление или отсоединение из таблицы с помощью [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition).
     -   `MUTATE_PART` — изменение куска.
     -   `MOVE_PART` — перемещение куска между дисками.
--   `event_date` (Date) — дата события.
--   `event_time` (DateTime) — время события.
--   `duration_ms` (UInt64) — длительность.
--   `database` (String) — имя базы данных, в которой находится кусок.
--   `table` (String) — имя таблицы, в которой находится кусок.
--   `part_name` (String) — имя куска.
--   `partition_id` (String) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение ‘all’, если таблица партициируется по выражению `tuple()`.
--   `rows` (UInt64) — число строк в куске.
--   `size_in_bytes` (UInt64) — размер куска данных в байтах.
--   `merged_from` (Array(String)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска).
--   `bytes_uncompressed` (UInt64) — количество прочитанных разжатых байт.
--   `read_rows` (UInt64) — сколько было прочитано строк при слиянии кусков.
--   `read_bytes` (UInt64) — сколько было прочитано байт при слиянии кусков.
--   `error` (UInt16) — код ошибки, возникшей при текущем событии.
--   `exception` (String) — текст ошибки.
+-   `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события.
+-   `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события.
+-   `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — длительность.
+-   `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится кусок.
+-   `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы, в которой находится кусок.
+-   `part_name` ([String](../../sql-reference/data-types/string.md)) — имя куска.
+-   `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение ‘all’, если таблица партициируется по выражению `tuple()`.
+-   `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к папке с файлами кусков данных.
+-   `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — число строк в куске.
+-   `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер куска данных в байтах.
+-   `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска).
+-   `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество прочитанных разжатых байт.
+-   `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано строк при слиянии кусков.
+-   `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано байт при слиянии кусков.
+-   `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между выделенной и освобождённой памятью в контексте потока.
+-   `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — код ошибки, возникшей при текущем событии.
+-   `exception` ([String](../../sql-reference/data-types/string.md)) — текст ошибки.
 
 Системная таблица `system.part_log` будет создана после первой вставки данных в таблицу `MergeTree`.
 
+**Пример**
+
+``` sql
+SELECT * FROM system.part_log LIMIT 1 \G
+```
+
+``` text
+Row 1:
+──────
+query_id:                      983ad9c7-28d5-4ae1-844e-603116b7de31
+event_type:                    NewPart
+event_date:                    2021-02-02
+event_time:                    2021-02-02 11:14:28
+duration_ms:                   35
+database:                      default
+table:                         log_mt_2
+part_name:                     all_1_1_0
+partition_id:                  all
+path_on_disk:                  db/data/default/log_mt_2/all_1_1_0/
+rows:                          115418
+size_in_bytes:                 1074311
+merged_from:                   []
+bytes_uncompressed:            0
+read_rows:                     0
+read_bytes:                    0
+peak_memory_usage:             0
+error:                         0
+exception:                   
+```
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/part_log) <!--hide-->

From abeeebc66156b85a690e0bc33f7759f26dcd61da Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Tue, 2 Feb 2021 22:01:06 +0300
Subject: [PATCH 0542/1238] Symbolic links deleted

---
 docs/es/sql-reference/data-types/map.md | 57 -------------------------
 docs/fr/sql-reference/data-types/map.md | 57 -------------------------
 docs/ja/sql-reference/data-types/map.md | 57 -------------------------
 docs/zh/sql-reference/data-types/map.md | 57 -------------------------
 4 files changed, 228 deletions(-)
 delete mode 100644 docs/es/sql-reference/data-types/map.md
 delete mode 100644 docs/fr/sql-reference/data-types/map.md
 delete mode 100644 docs/ja/sql-reference/data-types/map.md
 delete mode 100644 docs/zh/sql-reference/data-types/map.md

diff --git a/docs/es/sql-reference/data-types/map.md b/docs/es/sql-reference/data-types/map.md
deleted file mode 100644
index 0f0f69d421d..00000000000
--- a/docs/es/sql-reference/data-types/map.md
+++ /dev/null
@@ -1,57 +0,0 @@
----
-toc_priority: 65
-toc_title: Map(key, value)
----
-
-# Map(key, value) {#data_type-map}
-
-`Map(key, value)` data type stores `key:value` pairs in structures like JSON. 
-
-**Parameters** 
--   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
--   `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
-
-!!! warning "Warning"
-    Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
-
-To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax.
-
-**Example**
-
-Query:
-
-``` sql
-CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
-INSERT INTO table_map VALUES ({'key1':1, 'key2':100}), ({'key1':2,'key2':200}), ({'key1':3,'key2':300});
-SELECT a['key2'] FROM table_map;
-```
-Result:
-
-```text
-┌─arrayElement(a, 'key2')─┐
-│                     100 │
-│                     200 │
-│                     300 │
-└─────────────────────────┘
-```
-
-## Convert Tuple to Map Type {#map-and-tuple}
-
-You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function:
-
-``` sql
-SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
-```
-
-``` text
-┌─map───────────────────────────┐
-│ {1:'Ready',2:'Steady',3:'Go'} │
-└───────────────────────────────┘
-```
-
-**See Also**
-
--   [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
--   [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
-
-[Original article](https://clickhouse.tech/docs/en/data-types/map/) <!--hide-->
diff --git a/docs/fr/sql-reference/data-types/map.md b/docs/fr/sql-reference/data-types/map.md
deleted file mode 100644
index 0f0f69d421d..00000000000
--- a/docs/fr/sql-reference/data-types/map.md
+++ /dev/null
@@ -1,57 +0,0 @@
----
-toc_priority: 65
-toc_title: Map(key, value)
----
-
-# Map(key, value) {#data_type-map}
-
-`Map(key, value)` data type stores `key:value` pairs in structures like JSON. 
-
-**Parameters** 
--   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
--   `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
-
-!!! warning "Warning"
-    Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
-
-To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax.
-
-**Example**
-
-Query:
-
-``` sql
-CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
-INSERT INTO table_map VALUES ({'key1':1, 'key2':100}), ({'key1':2,'key2':200}), ({'key1':3,'key2':300});
-SELECT a['key2'] FROM table_map;
-```
-Result:
-
-```text
-┌─arrayElement(a, 'key2')─┐
-│                     100 │
-│                     200 │
-│                     300 │
-└─────────────────────────┘
-```
-
-## Convert Tuple to Map Type {#map-and-tuple}
-
-You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function:
-
-``` sql
-SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
-```
-
-``` text
-┌─map───────────────────────────┐
-│ {1:'Ready',2:'Steady',3:'Go'} │
-└───────────────────────────────┘
-```
-
-**See Also**
-
--   [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
--   [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
-
-[Original article](https://clickhouse.tech/docs/en/data-types/map/) <!--hide-->
diff --git a/docs/ja/sql-reference/data-types/map.md b/docs/ja/sql-reference/data-types/map.md
deleted file mode 100644
index 0f0f69d421d..00000000000
--- a/docs/ja/sql-reference/data-types/map.md
+++ /dev/null
@@ -1,57 +0,0 @@
----
-toc_priority: 65
-toc_title: Map(key, value)
----
-
-# Map(key, value) {#data_type-map}
-
-`Map(key, value)` data type stores `key:value` pairs in structures like JSON. 
-
-**Parameters** 
--   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
--   `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
-
-!!! warning "Warning"
-    Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
-
-To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax.
-
-**Example**
-
-Query:
-
-``` sql
-CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
-INSERT INTO table_map VALUES ({'key1':1, 'key2':100}), ({'key1':2,'key2':200}), ({'key1':3,'key2':300});
-SELECT a['key2'] FROM table_map;
-```
-Result:
-
-```text
-┌─arrayElement(a, 'key2')─┐
-│                     100 │
-│                     200 │
-│                     300 │
-└─────────────────────────┘
-```
-
-## Convert Tuple to Map Type {#map-and-tuple}
-
-You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function:
-
-``` sql
-SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
-```
-
-``` text
-┌─map───────────────────────────┐
-│ {1:'Ready',2:'Steady',3:'Go'} │
-└───────────────────────────────┘
-```
-
-**See Also**
-
--   [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
--   [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
-
-[Original article](https://clickhouse.tech/docs/en/data-types/map/) <!--hide-->
diff --git a/docs/zh/sql-reference/data-types/map.md b/docs/zh/sql-reference/data-types/map.md
deleted file mode 100644
index 0f0f69d421d..00000000000
--- a/docs/zh/sql-reference/data-types/map.md
+++ /dev/null
@@ -1,57 +0,0 @@
----
-toc_priority: 65
-toc_title: Map(key, value)
----
-
-# Map(key, value) {#data_type-map}
-
-`Map(key, value)` data type stores `key:value` pairs in structures like JSON. 
-
-**Parameters** 
--   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
--   `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
-
-!!! warning "Warning"
-    Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
-
-To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax.
-
-**Example**
-
-Query:
-
-``` sql
-CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
-INSERT INTO table_map VALUES ({'key1':1, 'key2':100}), ({'key1':2,'key2':200}), ({'key1':3,'key2':300});
-SELECT a['key2'] FROM table_map;
-```
-Result:
-
-```text
-┌─arrayElement(a, 'key2')─┐
-│                     100 │
-│                     200 │
-│                     300 │
-└─────────────────────────┘
-```
-
-## Convert Tuple to Map Type {#map-and-tuple}
-
-You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function:
-
-``` sql
-SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
-```
-
-``` text
-┌─map───────────────────────────┐
-│ {1:'Ready',2:'Steady',3:'Go'} │
-└───────────────────────────────┘
-```
-
-**See Also**
-
--   [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
--   [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
-
-[Original article](https://clickhouse.tech/docs/en/data-types/map/) <!--hide-->

From 3d3d6777d3aa41a0c19cc39355c468a1c34fec58 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 2 Feb 2021 10:47:26 +0300
Subject: [PATCH 0543/1238] test_dictionaries_dependency_xml: decrease
 dictionaries lifetime

Since ExternalLoader::PeriodicUpdater::check_period_sec = 5, and so if
it will be scheduled too early the reload will be skipped, and indeed
this is what you can see in logs [1], the reload is done each 10
seconds, not 5:

    2021.01.31 14:20:22.590999 [ 48 ] {} <Trace> ExternalDictionariesLoader: Supposed update time for 'dep_x' is 2021-01-31 14:20:27 (loaded, lifetime [5, 5], no errors)
    2021.01.31 14:20:22.591016 [ 48 ] {} <Trace> ExternalDictionariesLoader: Next update time for 'dep_x' was set to 2021-01-31 14:20:27
    ...
    2021.01.31 14:20:32.164882 [ 50 ] {} <Trace> ExternalDictionariesLoader: Start loading object 'dep_x'

  [1]: https://clickhouse-test-reports.s3.yandex.net/19584/37797fdf5b30dc97147e73b3ac8ca9025b80aaed/integration_tests_(release).html#fail1
---
 .../configs/dictionaries/dep_x.xml                             | 3 ++-
 .../configs/dictionaries/dep_y.xml                             | 3 ++-
 .../configs/dictionaries/dep_z.xml                             | 3 ++-
 tests/integration/test_dictionaries_dependency_xml/test.py     | 2 +-
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_x.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_x.xml
index 097fc7cf503..74936c04db3 100644
--- a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_x.xml
+++ b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_x.xml
@@ -11,7 +11,8 @@
                 <table>dep_z</table>
             </clickhouse>
         </source>
-        <lifetime>5</lifetime>
+        <!-- ExternalLoader::PeriodicUpdater::check_period_sec=5 anyway -->
+        <lifetime>4</lifetime>
         <layout>
             <flat/>
         </layout>
diff --git a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml
index 227d87ca92a..ed7f66b1b41 100644
--- a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml
+++ b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml
@@ -11,7 +11,8 @@
                <table>elements</table>
            </clickhouse>
        </source>
-       <lifetime>5</lifetime>
+       <!-- ExternalLoader::PeriodicUpdater::check_period_sec=5 anyway -->
+       <lifetime>4</lifetime>
        <layout>
            <flat/>
        </layout>
diff --git a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml
index 8eff3a6407b..d2d7dff61ad 100644
--- a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml
+++ b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml
@@ -12,7 +12,8 @@
                <invalidate_query>SELECT intDiv(count(), 5) from dict.dep_y</invalidate_query>
            </clickhouse>
        </source>
-       <lifetime>5</lifetime>
+       <!-- ExternalLoader::PeriodicUpdater::check_period_sec=5 anyway -->
+       <lifetime>4</lifetime>
        <layout>
            <flat/>
        </layout>
diff --git a/tests/integration/test_dictionaries_dependency_xml/test.py b/tests/integration/test_dictionaries_dependency_xml/test.py
index d5453bb4814..b8ebcc6cc4b 100644
--- a/tests/integration/test_dictionaries_dependency_xml/test.py
+++ b/tests/integration/test_dictionaries_dependency_xml/test.py
@@ -65,7 +65,7 @@ def test_get_data(started_cluster):
     assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "fire\n"
     assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "ZZ\n"
 
-    # dep_x and dep_z are updated only when there `intDiv(count(), 4)`  is changed.
+    # dep_x and dep_z are updated only when there `intDiv(count(), 5)`  is changed.
     query("INSERT INTO test.elements VALUES (4, 'ether', 404, 0.001)")
     assert_eq_with_retry(instance, "SELECT dictHas('dep_x', toUInt64(4))", "1", sleep_time=2, retry_count=10)
     assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "fire\n"

From 33628e1bd99f1393ca55f32c695ab6cb12c17b94 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 2 Feb 2021 10:52:14 +0300
Subject: [PATCH 0544/1238] test_materialize_mysql_database: add retries for
 test_select_without_columns_8_0

Here is an example:

    2021.01.31 14:04:43.615618 [ 54 ] {} <Debug> executeQuery: (internal) /*Materialize MySQL step 1: execute dump data*/ INSERT INTO t(a, b, _sign, _version) VALUES
    2021.01.31 14:04:43.616453 [ 54 ] {} <Trace> db.t (fc55a47e-1cae-4f41-9a13-91ccde5a73bb): Renaming temporary part tmp_insert_0_1_1_0 to 0_1_1_0.
    ...
    2021.01.31 14:04:43.786823 [ 12 ] {4a0d1f80-4a96-4809-a84e-d55d7cd25cdd} <Debug> executeQuery: (from 172.18.0.1:45162, using production parser) SELECT count(_version) FROM db.t FORMAT TSV
    ...
    2021.01.31 14:04:44.566037 [ 54 ] {} <Debug> executeQuery: (internal) /*Materialize MySQL step 1: execute dump data*/ INSERT INTO t(a, b, _sign, _version) VALUES
    2021.01.31 14:04:44.566943 [ 54 ] {} <Trace> db.t (fc55a47e-1cae-4f41-9a13-91ccde5a73bb): Renaming temporary part tmp_insert_0_2_2_0 to 0_2_2_0.
    ...
    2021.01.31 14:04:52.343911 [ 54 ] {} <Error> MaterializeMySQLSyncThread: Code: 1002, e.displayText() = DB::Exception: Master maybe lost, Stack trace (when copying this message, always include the lines below):

  [1]: https://clickhouse-test-reports.s3.yandex.net/19584/978f23e3437d0d0eb64727e87ffe1b80289c9fc2/integration_tests_(release).html#fail1
---
 .../test_materialize_mysql_database/materialize_with_ddl.py    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
index e3ac7fc0c14..38ff8fd752b 100644
--- a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
+++ b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
@@ -10,6 +10,7 @@ import random
 
 import threading
 from multiprocessing.dummy import Pool
+from helpers.test_tools import assert_eq_with_retry
 
 def check_query(clickhouse_node, query, result_set, retry_count=60, interval_seconds=3):
     lastest_result = ''
@@ -485,7 +486,7 @@ def select_without_columns(clickhouse_node, mysql_node, service_name):
     check_query(clickhouse_node, "SELECT count((_sign, _version)) FROM db.t FORMAT TSV", res[0])
 
     assert clickhouse_node.query("SELECT count(_sign) FROM db.t FORMAT TSV") == res[1]
-    assert clickhouse_node.query("SELECT count(_version) FROM db.t FORMAT TSV") == res[2]
+    assert_eq_with_retry(clickhouse_node, "SELECT count(_version) FROM db.t", res[2].strip(), sleep_time=2, retry_count=3)
 
     assert clickhouse_node.query("SELECT count() FROM db.t FORMAT TSV") == "1\n"
     assert clickhouse_node.query("SELECT count(*) FROM db.t FORMAT TSV") == "1\n"

From ac510f9770da9e59cc51cabcca8302ee9ec0f95d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 2 Feb 2021 22:00:19 +0300
Subject: [PATCH 0545/1238] test_read_temporary_tables_on_failure: increase
 timeout to avoid false-positive

In [1]:

    start_cluster = <helpers.cluster.ClickHouseCluster object at 0x7f2d46895dd8>

        def test_different_versions(start_cluster):
            with pytest.raises(QueryTimeoutExceedException):
                node.query("SELECT sleep(3)", timeout=1)
            with pytest.raises(QueryRuntimeException):
    >           node.query("SELECT 1", settings={'max_concurrent_queries_for_user': 1})
    E           Failed: DID NOT RAISE <class 'helpers.client.QueryRuntimeException'>

  [1]: https://clickhouse-test-reports.s3.yandex.net/19451/b68508002d134ead05bf2ca0e22a13a34a5c55c6/integration_tests_(thread).html#fail1
---
 tests/integration/test_read_temporary_tables_on_failure/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_read_temporary_tables_on_failure/test.py b/tests/integration/test_read_temporary_tables_on_failure/test.py
index f7df52f67e9..e62c7c9eaec 100644
--- a/tests/integration/test_read_temporary_tables_on_failure/test.py
+++ b/tests/integration/test_read_temporary_tables_on_failure/test.py
@@ -19,7 +19,7 @@ def start_cluster():
 
 def test_different_versions(start_cluster):
     with pytest.raises(QueryTimeoutExceedException):
-        node.query("SELECT sleep(3)", timeout=1)
+        node.query("SELECT sleepEachRow(3) FROM numbers(10)", timeout=5)
     with pytest.raises(QueryRuntimeException):
         node.query("SELECT 1", settings={'max_concurrent_queries_for_user': 1})
     assert node.contains_in_log('Too many simultaneous queries for user')

From 12485eee6b8feeceb2c537933efd4ce7df630a90 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 2 Feb 2021 22:07:23 +0300
Subject: [PATCH 0546/1238] Fix some of the issues found by Coverity

---
 base/common/ReplxxLineReader.cpp              |  10 +-
 base/daemon/BaseDaemon.cpp                    |   6 +-
 base/mysqlxx/Connection.h                     |   1 -
 base/mysqlxx/Query.cpp                        |  10 --
 base/mysqlxx/Query.h                          |   6 -
 base/mysqlxx/ResultBase.h                     |   2 +-
 base/mysqlxx/Row.h                            |   2 +-
 base/mysqlxx/UseQueryResult.h                 |   3 +-
 base/mysqlxx/Value.h                          |   2 +-
 base/mysqlxx/tests/mysqlxx_test.cpp           | 139 ------------------
 src/Columns/ColumnDecimal.h                   |   2 +-
 src/Parsers/ASTWithElement.cpp                |   2 +-
 .../Formats/Impl/ORCBlockOutputFormat.cpp     |  86 +++++------
 .../Formats/Impl/ORCBlockOutputFormat.h       |  10 +-
 .../MergeTree/MergeTreeIndexFullText.cpp      |   2 +-
 15 files changed, 64 insertions(+), 219 deletions(-)

diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp
index 28c7990c353..920d86849f9 100644
--- a/base/common/ReplxxLineReader.cpp
+++ b/base/common/ReplxxLineReader.cpp
@@ -12,6 +12,8 @@
 #include <dlfcn.h>
 #include <fcntl.h>
 #include <fstream>
+#include <fmt/format.h>
+
 
 namespace
 {
@@ -189,8 +191,8 @@ void ReplxxLineReader::openEditor()
         return;
     }
 
-    String editor = std::getenv("EDITOR");
-    if (editor.empty())
+    const char * editor = std::getenv("EDITOR");
+    if (!editor)
         editor = "vim";
 
     replxx::Replxx::State state(rx.get_state());
@@ -204,7 +206,7 @@ void ReplxxLineReader::openEditor()
         if ((-1 == res || 0 == res) && errno != EINTR)
         {
             rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
-            return;
+            break;
         }
         bytes_written += res;
     }
@@ -215,7 +217,7 @@ void ReplxxLineReader::openEditor()
         return;
     }
 
-    if (0 == execute(editor + " " + filename))
+    if (0 == execute(fmt::format("{} {}", editor, filename)))
     {
         try
         {
diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp
index 4cf8a8d7ce9..c0026cbe64d 100644
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@@ -230,10 +230,10 @@ public:
             }
             else
             {
-                siginfo_t info;
-                ucontext_t context;
+                siginfo_t info{};
+                ucontext_t context{};
                 StackTrace stack_trace(NoCapture{});
-                UInt32 thread_num;
+                UInt32 thread_num{};
                 std::string query_id;
                 DB::ThreadStatus * thread_ptr{};
 
diff --git a/base/mysqlxx/Connection.h b/base/mysqlxx/Connection.h
index 0e5a608108c..ca67db0e0c6 100644
--- a/base/mysqlxx/Connection.h
+++ b/base/mysqlxx/Connection.h
@@ -39,7 +39,6 @@ private:
 /** MySQL connection.
   * Usage:
   *        mysqlxx::Connection connection("Test", "127.0.0.1", "root", "qwerty", 3306);
-  *        std::cout << connection.query("SELECT 'Hello, World!'").store().at(0).at(0).getString() << std::endl;
   *
   * Or with Poco library configuration:
   *        mysqlxx::Connection connection("mysql_params");
diff --git a/base/mysqlxx/Query.cpp b/base/mysqlxx/Query.cpp
index ab9bb174d4a..f3485c54edc 100644
--- a/base/mysqlxx/Query.cpp
+++ b/base/mysqlxx/Query.cpp
@@ -71,16 +71,6 @@ UseQueryResult Query::use()
     return UseQueryResult(res, conn, this);
 }
 
-StoreQueryResult Query::store()
-{
-    executeImpl();
-    MYSQL_RES * res = mysql_store_result(conn->getDriver());
-    if (!res)
-        checkError(conn->getDriver());
-
-    return StoreQueryResult(res, conn, this);
-}
-
 void Query::execute()
 {
     executeImpl();
diff --git a/base/mysqlxx/Query.h b/base/mysqlxx/Query.h
index 1d3ab9678d5..036e8952bc3 100644
--- a/base/mysqlxx/Query.h
+++ b/base/mysqlxx/Query.h
@@ -3,7 +3,6 @@
 #include <sstream>
 
 #include <mysqlxx/UseQueryResult.h>
-#include <mysqlxx/StoreQueryResult.h>
 
 
 namespace mysqlxx
@@ -46,11 +45,6 @@ public:
       */
     UseQueryResult use();
 
-    /** Выполнить запрос с загрузкой на клиента всех строк.
-      * Требуется оперативка, чтобы вместить весь результат, зато к строкам можно обращаться в произвольном порядке.
-      */
-    StoreQueryResult store();
-
     /// Значение auto increment после последнего INSERT-а.
     UInt64 insertID();
 
diff --git a/base/mysqlxx/ResultBase.h b/base/mysqlxx/ResultBase.h
index 4f2ab2eb0a2..d08922a269c 100644
--- a/base/mysqlxx/ResultBase.h
+++ b/base/mysqlxx/ResultBase.h
@@ -9,7 +9,7 @@ class Connection;
 class Query;
 
 
-/** Базовый класс для UseQueryResult и StoreQueryResult.
+/** Базовый класс для UseQueryResult.
   * Содержит общую часть реализации,
   * Ссылается на Connection. Если уничтожить Connection, то пользоваться ResultBase и любым результатом нельзя.
   * Использовать объект можно только для результата одного запроса!
diff --git a/base/mysqlxx/Row.h b/base/mysqlxx/Row.h
index a0b88638546..d668fdbd29a 100644
--- a/base/mysqlxx/Row.h
+++ b/base/mysqlxx/Row.h
@@ -35,7 +35,7 @@ public:
     {
     }
 
-    /** Для того, чтобы создать Row, используйте соответствующие методы UseQueryResult или StoreQueryResult. */
+    /** Для того, чтобы создать Row, используйте соответствующие методы UseQueryResult. */
     Row(MYSQL_ROW row_, ResultBase * res_, MYSQL_LENGTHS lengths_)
         : row(row_), res(res_), lengths(lengths_)
     {
diff --git a/base/mysqlxx/UseQueryResult.h b/base/mysqlxx/UseQueryResult.h
index 3a641020dcf..37cbbd19669 100644
--- a/base/mysqlxx/UseQueryResult.h
+++ b/base/mysqlxx/UseQueryResult.h
@@ -12,8 +12,7 @@ class Connection;
 
 /** Результат выполнения запроса, предназначенный для чтения строк, одна за другой.
   * В памяти при этом хранится только одна, текущая строка.
-  * В отличие от StoreQueryResult, произвольный доступ к строкам невозможен,
-  *  а также, при чтении следующей строки, предыдущая становится некорректной.
+  * При чтении следующей строки, предыдущая становится некорректной.
   * Вы обязаны прочитать все строки из результата
   *  (вызывать функцию fetch(), пока она не вернёт значение, преобразующееся к false),
   *  иначе при следующем запросе будет выкинуто исключение с текстом "Commands out of sync".
diff --git a/base/mysqlxx/Value.h b/base/mysqlxx/Value.h
index dfa86e8aa7d..57cfd452045 100644
--- a/base/mysqlxx/Value.h
+++ b/base/mysqlxx/Value.h
@@ -25,7 +25,7 @@ class ResultBase;
 
 /** Represents a single value read from MySQL.
   * It doesn't owns the value. It's just a wrapper of a pair (const char *, size_t).
-  * If the UseQueryResult/StoreQueryResult or Connection is destroyed,
+  * If the UseQueryResult or Connection is destroyed,
   *  or you have read the next Row while using UseQueryResult, then the object is invalidated.
   * Allows to transform (parse) the value to various data types:
   * - with getUInt(), getString(), ... (recommended);
diff --git a/base/mysqlxx/tests/mysqlxx_test.cpp b/base/mysqlxx/tests/mysqlxx_test.cpp
index cf304a5cb5f..c505d34a58d 100644
--- a/base/mysqlxx/tests/mysqlxx_test.cpp
+++ b/base/mysqlxx/tests/mysqlxx_test.cpp
@@ -38,15 +38,6 @@ int main(int, char **)
             }
         }
 
-        {
-            mysqlxx::Query query = connection.query();
-            query << "SELECT 1234567890 abc, 12345.67890 def UNION ALL SELECT 9876543210, 98765.43210";
-            mysqlxx::StoreQueryResult result = query.store();
-
-            std::cerr << result.at(0)["abc"].getUInt() << ", " << result.at(0)["def"].getDouble() << std::endl
-                << result.at(1)["abc"].getUInt() << ", " << result.at(1)["def"].getDouble() << std::endl;
-        }
-
         {
             mysqlxx::UseQueryResult result = connection.query("SELECT 'abc\\\\def' x").use();
             mysqlxx::Row row = result.fetch();
@@ -54,27 +45,6 @@ int main(int, char **)
             std::cerr << row << std::endl;
         }
 
-        {
-            mysqlxx::Query query = connection.query("SEL");
-            query << "ECT 1";
-
-            std::cerr << query.store().at(0).at(0) << std::endl;
-        }
-
-        {
-            /// Копирование Query
-            mysqlxx::Query query = connection.query("SELECT 'Ok' x");
-            using Queries = std::vector<mysqlxx::Query>;
-            Queries queries;
-            queries.push_back(query);
-
-            for (auto & q : queries)
-            {
-                std::cerr << q.str() << std::endl;
-                std::cerr << q.store().at(0) << std::endl;
-            }
-        }
-
         {
             /// Копирование Query
             mysqlxx::Query query1 = connection.query("SELECT");
@@ -84,62 +54,6 @@ int main(int, char **)
             std::cerr << query1.str() << ", " << query2.str() << std::endl;
         }
 
-        {
-            /// Копирование Query
-            using Queries = std::list<mysqlxx::Query>;
-            Queries queries;
-            queries.push_back(connection.query("SELECT"));
-            mysqlxx::Query & qref = queries.back();
-            qref << " 1";
-
-            for (auto & query : queries)
-            {
-                std::cerr << query.str() << std::endl;
-                std::cerr << query.store().at(0) << std::endl;
-            }
-        }
-
-        {
-            /// Транзакции
-            connection.query("DROP TABLE IF EXISTS tmp").execute();
-            connection.query("CREATE TABLE tmp (x INT, PRIMARY KEY (x)) ENGINE = InnoDB").execute();
-
-            mysqlxx::Transaction trans(connection);
-            connection.query("INSERT INTO tmp VALUES (1)").execute();
-
-            std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl;
-
-            trans.rollback();
-
-            std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl;
-        }
-
-        {
-            /// Транзакции
-            connection.query("DROP TABLE IF EXISTS tmp").execute();
-            connection.query("CREATE TABLE tmp (x INT, PRIMARY KEY (x)) ENGINE = InnoDB").execute();
-
-            {
-                mysqlxx::Transaction trans(connection);
-                connection.query("INSERT INTO tmp VALUES (1)").execute();
-                std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl;
-            }
-
-            std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl;
-        }
-
-        {
-            /// Транзакции
-            mysqlxx::Connection connection2("test", "127.0.0.1", "root", "qwerty", 3306);
-            connection2.query("DROP TABLE IF EXISTS tmp").execute();
-            connection2.query("CREATE TABLE tmp (x INT, PRIMARY KEY (x)) ENGINE = InnoDB").execute();
-
-            mysqlxx::Transaction trans(connection2);
-            connection2.query("INSERT INTO tmp VALUES (1)").execute();
-            std::cerr << connection2.query("SELECT * FROM tmp").store().size() << std::endl;
-        }
-        std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl;
-
         {
             /// NULL
             mysqlxx::Null<int> x = mysqlxx::null;
@@ -152,59 +66,6 @@ int main(int, char **)
             std::cerr << (x == 1 ? "Ok" : "Fail") << std::endl;
             std::cerr << (x.isNull() ? "Fail" : "Ok") << std::endl;
         }
-
-        {
-            /// Исключения при попытке достать значение не того типа
-            try
-            {
-                connection.query("SELECT -1").store().at(0).at(0).getUInt();
-                std::cerr << "Fail" << std::endl;
-            }
-            catch (const mysqlxx::Exception & e)
-            {
-                std::cerr << "Ok, " << e.message() << std::endl;
-            }
-
-            try
-            {
-                connection.query("SELECT 'xxx'").store().at(0).at(0).getInt();
-                std::cerr << "Fail" << std::endl;
-            }
-            catch (const mysqlxx::Exception & e)
-            {
-                std::cerr << "Ok, " << e.message() << std::endl;
-            }
-
-            try
-            {
-                connection.query("SELECT NULL").store().at(0).at(0).getString();
-                std::cerr << "Fail" << std::endl;
-            }
-            catch (const mysqlxx::Exception & e)
-            {
-                std::cerr << "Ok, " << e.message() << std::endl;
-            }
-
-            try
-            {
-                connection.query("SELECT 123").store().at(0).at(0).getDate();
-                std::cerr << "Fail" << std::endl;
-            }
-            catch (const mysqlxx::Exception & e)
-            {
-                std::cerr << "Ok, " << e.message() << std::endl;
-            }
-
-            try
-            {
-                connection.query("SELECT '2011-01-01'").store().at(0).at(0).getDateTime();
-                std::cerr << "Fail" << std::endl;
-            }
-            catch (const mysqlxx::Exception & e)
-            {
-                std::cerr << "Ok, " << e.message() << std::endl;
-            }
-        }
     }
     catch (const mysqlxx::Exception & e)
     {
diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h
index 51ad1486520..1578633c13d 100644
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@@ -136,7 +136,7 @@ public:
     Field operator[](size_t n) const override { return DecimalField(data[n], scale); }
     void get(size_t n, Field & res) const override { res = (*this)[n]; }
     bool getBool(size_t n) const override { return bool(data[n].value); }
-    Int64 getInt(size_t n) const override { return Int64(data[n].value * scale); }
+    Int64 getInt(size_t n) const override { return Int64(data[n].value) * scale; }
     UInt64 get64(size_t n) const override;
     bool isDefaultAt(size_t n) const override { return data[n].value == 0; }
 
diff --git a/src/Parsers/ASTWithElement.cpp b/src/Parsers/ASTWithElement.cpp
index 3f67722a5c7..ce39086eb4a 100644
--- a/src/Parsers/ASTWithElement.cpp
+++ b/src/Parsers/ASTWithElement.cpp
@@ -18,7 +18,7 @@ void ASTWithElement::formatImpl(const FormatSettings & settings, FormatState & s
 {
     settings.writeIdentifier(name);
     settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "");
-    dynamic_cast<const ASTWithAlias *>(subquery.get())->formatImplWithoutAlias(settings, state, frame);
+    dynamic_cast<const ASTWithAlias &>(*subquery).formatImplWithoutAlias(settings, state, frame);
 }
 
 }
diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
index 019ffc73d81..ec6a7a65573 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
@@ -133,112 +133,112 @@ ORC_UNIQUE_PTR<orc::Type> ORCBlockOutputFormat::getORCType(const DataTypePtr & t
 
 template <typename NumberType, typename NumberVectorBatch, typename ConvertFunc>
 void ORCBlockOutputFormat::writeNumbers(
-        orc::ColumnVectorBatch * orc_column,
+        orc::ColumnVectorBatch & orc_column,
         const IColumn & column,
         const PaddedPODArray<UInt8> * null_bytemap,
         ConvertFunc convert)
 {
-    NumberVectorBatch * number_orc_column = dynamic_cast<NumberVectorBatch *>(orc_column);
+    NumberVectorBatch & number_orc_column = dynamic_cast<NumberVectorBatch &>(orc_column);
     const auto & number_column = assert_cast<const ColumnVector<NumberType> &>(column);
-    number_orc_column->resize(number_column.size());
+    number_orc_column.resize(number_column.size());
 
     for (size_t i = 0; i != number_column.size(); ++i)
     {
         if (null_bytemap && (*null_bytemap)[i])
         {
-            number_orc_column->notNull[i] = 0;
+            number_orc_column.notNull[i] = 0;
             continue;
         }
-        number_orc_column->data[i] = convert(number_column.getElement(i));
+        number_orc_column.data[i] = convert(number_column.getElement(i));
     }
-    number_orc_column->numElements = number_column.size();
+    number_orc_column.numElements = number_column.size();
 }
 
 template <typename Decimal, typename DecimalVectorBatch, typename ConvertFunc>
 void ORCBlockOutputFormat::writeDecimals(
-        orc::ColumnVectorBatch * orc_column,
+        orc::ColumnVectorBatch & orc_column,
         const IColumn & column,
         DataTypePtr & type,
         const PaddedPODArray<UInt8> * null_bytemap,
         ConvertFunc convert)
 {
-    DecimalVectorBatch *decimal_orc_column = dynamic_cast<DecimalVectorBatch *>(orc_column);
+    DecimalVectorBatch & decimal_orc_column = dynamic_cast<DecimalVectorBatch &>(orc_column);
     const auto & decimal_column = assert_cast<const ColumnDecimal<Decimal> &>(column);
     const auto * decimal_type = typeid_cast<const DataTypeDecimal<Decimal> *>(type.get());
-    decimal_orc_column->precision = decimal_type->getPrecision();
-    decimal_orc_column->scale = decimal_type->getScale();
-    decimal_orc_column->resize(decimal_column.size());
+    decimal_orc_column.precision = decimal_type->getPrecision();
+    decimal_orc_column.scale = decimal_type->getScale();
+    decimal_orc_column.resize(decimal_column.size());
     for (size_t i = 0; i != decimal_column.size(); ++i)
     {
         if (null_bytemap && (*null_bytemap)[i])
         {
-            decimal_orc_column->notNull[i] = 0;
+            decimal_orc_column.notNull[i] = 0;
             continue;
         }
-        decimal_orc_column->values[i] = convert(decimal_column.getElement(i).value);
+        decimal_orc_column.values[i] = convert(decimal_column.getElement(i).value);
     }
-    decimal_orc_column->numElements = decimal_column.size();
+    decimal_orc_column.numElements = decimal_column.size();
 }
 
 template <typename ColumnType>
 void ORCBlockOutputFormat::writeStrings(
-        orc::ColumnVectorBatch * orc_column,
+        orc::ColumnVectorBatch & orc_column,
         const IColumn & column,
         const PaddedPODArray<UInt8> * null_bytemap)
 {
-    orc::StringVectorBatch * string_orc_column = dynamic_cast<orc::StringVectorBatch *>(orc_column);
+    orc::StringVectorBatch & string_orc_column = dynamic_cast<orc::StringVectorBatch &>(orc_column);
     const auto & string_column = assert_cast<const ColumnType &>(column);
-    string_orc_column->resize(string_column.size());
+    string_orc_column.resize(string_column.size());
 
     for (size_t i = 0; i != string_column.size(); ++i)
     {
         if (null_bytemap && (*null_bytemap)[i])
         {
-            string_orc_column->notNull[i] = 0;
+            string_orc_column.notNull[i] = 0;
             continue;
         }
         const StringRef & string = string_column.getDataAt(i);
-        string_orc_column->data[i] = const_cast<char *>(string.data);
-        string_orc_column->length[i] = string.size;
+        string_orc_column.data[i] = const_cast<char *>(string.data);
+        string_orc_column.length[i] = string.size;
     }
-    string_orc_column->numElements = string_column.size();
+    string_orc_column.numElements = string_column.size();
 }
 
 template <typename ColumnType, typename GetSecondsFunc, typename GetNanosecondsFunc>
 void ORCBlockOutputFormat::writeDateTimes(
-        orc::ColumnVectorBatch * orc_column,
+        orc::ColumnVectorBatch & orc_column,
         const IColumn & column,
         const PaddedPODArray<UInt8> * null_bytemap,
         GetSecondsFunc get_seconds,
         GetNanosecondsFunc get_nanoseconds)
 {
-    orc::TimestampVectorBatch * timestamp_orc_column = dynamic_cast<orc::TimestampVectorBatch *>(orc_column);
+    orc::TimestampVectorBatch & timestamp_orc_column = dynamic_cast<orc::TimestampVectorBatch &>(orc_column);
     const auto & timestamp_column = assert_cast<const ColumnType &>(column);
-    timestamp_orc_column->resize(timestamp_column.size());
+    timestamp_orc_column.resize(timestamp_column.size());
 
     for (size_t i = 0; i != timestamp_column.size(); ++i)
     {
         if (null_bytemap && (*null_bytemap)[i])
         {
-            timestamp_orc_column->notNull[i] = 0;
+            timestamp_orc_column.notNull[i] = 0;
             continue;
         }
-        timestamp_orc_column->data[i] = get_seconds(timestamp_column.getElement(i));
-        timestamp_orc_column->nanoseconds[i] = get_nanoseconds(timestamp_column.getElement(i));
+        timestamp_orc_column.data[i] = get_seconds(timestamp_column.getElement(i));
+        timestamp_orc_column.nanoseconds[i] = get_nanoseconds(timestamp_column.getElement(i));
     }
-    timestamp_orc_column->numElements = timestamp_column.size();
+    timestamp_orc_column.numElements = timestamp_column.size();
 }
 
 void ORCBlockOutputFormat::writeColumn(
-        orc::ColumnVectorBatch * orc_column,
-        const IColumn & column,
-        DataTypePtr & type,
-        const PaddedPODArray<UInt8> * null_bytemap)
+    orc::ColumnVectorBatch & orc_column,
+    const IColumn & column,
+    DataTypePtr & type,
+    const PaddedPODArray<UInt8> * null_bytemap)
 {
     if (null_bytemap)
     {
-        orc_column->hasNulls = true;
-        orc_column->notNull.resize(column.size());
+        orc_column.hasNulls = true;
+        orc_column.notNull.resize(column.size());
     }
     switch (type->getTypeId())
     {
@@ -364,20 +364,20 @@ void ORCBlockOutputFormat::writeColumn(
         }
         case TypeIndex::Array:
         {
-            orc::ListVectorBatch * list_orc_column = dynamic_cast<orc::ListVectorBatch *>(orc_column);
+            orc::ListVectorBatch & list_orc_column = dynamic_cast<orc::ListVectorBatch &>(orc_column);
             const auto & list_column = assert_cast<const ColumnArray &>(column);
             auto nested_type = assert_cast<const DataTypeArray &>(*type).getNestedType();
             const ColumnArray::Offsets & offsets = list_column.getOffsets();
-            list_orc_column->resize(list_column.size());
+            list_orc_column.resize(list_column.size());
             /// The length of list i in ListVectorBatch is offsets[i+1] - offsets[i].
-            list_orc_column->offsets[0] = 0;
+            list_orc_column.offsets[0] = 0;
             for (size_t i = 0; i != list_column.size(); ++i)
             {
-                list_orc_column->offsets[i + 1] = offsets[i];
+                list_orc_column.offsets[i + 1] = offsets[i];
             }
-            orc::ColumnVectorBatch * nested_orc_column = list_orc_column->elements.get();
+            orc::ColumnVectorBatch & nested_orc_column = *list_orc_column.elements;
             writeColumn(nested_orc_column, list_column.getData(), nested_type, null_bytemap);
-            list_orc_column->numElements = list_column.size();
+            list_orc_column.numElements = list_column.size();
             break;
         }
         default:
@@ -414,12 +414,12 @@ void ORCBlockOutputFormat::consume(Chunk chunk)
     /// getMaxColumnSize is needed to write arrays.
     /// The size of the batch must be no less than total amount of array elements.
     ORC_UNIQUE_PTR<orc::ColumnVectorBatch> batch = writer->createRowBatch(getMaxColumnSize(chunk));
-    orc::StructVectorBatch *root = dynamic_cast<orc::StructVectorBatch *>(batch.get());
+    orc::StructVectorBatch & root = dynamic_cast<orc::StructVectorBatch &>(*batch);
     for (size_t i = 0; i != columns_num; ++i)
     {
-        writeColumn(root->fields[i], *chunk.getColumns()[i], data_types[i], nullptr);
+        writeColumn(*root.fields[i], *chunk.getColumns()[i], data_types[i], nullptr);
     }
-    root->numElements = rows_num;
+    root.numElements = rows_num;
     writer->add(*batch);
 }
 
diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h
index ce599dabe23..05053317533 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h
@@ -48,23 +48,23 @@ private:
     /// ConvertFunc is needed for type UInt8, because firstly UInt8 (char8_t) must be
     /// converted to unsigned char (bugprone-signed-char-misuse in clang).
     template <typename NumberType, typename NumberVectorBatch, typename ConvertFunc>
-    void writeNumbers(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray<UInt8> * null_bytemap, ConvertFunc convert);
+    void writeNumbers(orc::ColumnVectorBatch & orc_column, const IColumn & column, const PaddedPODArray<UInt8> * null_bytemap, ConvertFunc convert);
 
     /// ConvertFunc is needed to convert ClickHouse Int128 to ORC Int128.
     template <typename Decimal, typename DecimalVectorBatch, typename ConvertFunc>
-    void writeDecimals(orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type,
+    void writeDecimals(orc::ColumnVectorBatch & orc_column, const IColumn & column, DataTypePtr & type,
                         const PaddedPODArray<UInt8> * null_bytemap, ConvertFunc convert);
 
     template <typename ColumnType>
-    void writeStrings(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray<UInt8> * null_bytemap);
+    void writeStrings(orc::ColumnVectorBatch & orc_column, const IColumn & column, const PaddedPODArray<UInt8> * null_bytemap);
 
     /// ORC column TimestampVectorBatch stores only seconds and nanoseconds,
     /// GetSecondsFunc and GetNanosecondsFunc are needed to extract them from DataTime type.
     template <typename ColumnType, typename GetSecondsFunc, typename GetNanosecondsFunc>
-    void writeDateTimes(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray<UInt8> * null_bytemap,
+    void writeDateTimes(orc::ColumnVectorBatch & orc_column, const IColumn & column, const PaddedPODArray<UInt8> * null_bytemap,
                         GetSecondsFunc get_seconds, GetNanosecondsFunc get_nanoseconds);
 
-    void writeColumn(orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray<UInt8> * null_bytemap);
+    void writeColumn(orc::ColumnVectorBatch & orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray<UInt8> * null_bytemap);
 
     /// These two functions are needed to know maximum nested size of arrays to
     /// create an ORC Batch with the appropriate size
diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index c140ebafec0..3e8b9cc704b 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -357,7 +357,7 @@ bool MergeTreeConditionFullText::atomFromAST(
             return false;
         }
 
-        if (key_arg_pos == 1 && (func_name != "equals" || func_name != "notEquals"))
+        if (key_arg_pos == 1 && (func_name != "equals" && func_name != "notEquals"))
             return false;
         else if (!token_extractor->supportLike() && (func_name == "like" || func_name == "notLike"))
             return false;

From 191389d040b822956a02775b7b36b4a2327f1acd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 2 Feb 2021 22:08:32 +0300
Subject: [PATCH 0547/1238] Fix some of the issues found by Coverity

---
 base/common/ReplxxLineReader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp
index 920d86849f9..fcd1610e589 100644
--- a/base/common/ReplxxLineReader.cpp
+++ b/base/common/ReplxxLineReader.cpp
@@ -192,7 +192,7 @@ void ReplxxLineReader::openEditor()
     }
 
     const char * editor = std::getenv("EDITOR");
-    if (!editor)
+    if (!editor || !*editor)
         editor = "vim";
 
     replxx::Replxx::State state(rx.get_state());

From 9cb0b36249d9a2053dbefb37f2f75f423ac51f67 Mon Sep 17 00:00:00 2001
From: sevirov <72220289+sevirov@users.noreply.github.com>
Date: Tue, 2 Feb 2021 22:17:52 +0300
Subject: [PATCH 0548/1238] Update docs/ru/operations/system-tables/part_log.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/ru/operations/system-tables/part_log.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/system-tables/part_log.md b/docs/ru/operations/system-tables/part_log.md
index 923e8579e20..166ee96c867 100644
--- a/docs/ru/operations/system-tables/part_log.md
+++ b/docs/ru/operations/system-tables/part_log.md
@@ -20,7 +20,7 @@
 -   `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится кусок.
 -   `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы, в которой находится кусок.
 -   `part_name` ([String](../../sql-reference/data-types/string.md)) — имя куска.
--   `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение ‘all’, если таблица партициируется по выражению `tuple()`.
+-   `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение `all`, если таблица партициируется по выражению `tuple()`.
 -   `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к папке с файлами кусков данных.
 -   `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — число строк в куске.
 -   `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер куска данных в байтах.

From d4b3e3018f54ee4f410ff6943178cfc582c57113 Mon Sep 17 00:00:00 2001
From: sevirov <72220289+sevirov@users.noreply.github.com>
Date: Tue, 2 Feb 2021 22:20:43 +0300
Subject: [PATCH 0549/1238] Update docs/ru/operations/system-tables/part_log.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/ru/operations/system-tables/part_log.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/system-tables/part_log.md b/docs/ru/operations/system-tables/part_log.md
index 166ee96c867..1dcebd0fc3f 100644
--- a/docs/ru/operations/system-tables/part_log.md
+++ b/docs/ru/operations/system-tables/part_log.md
@@ -25,7 +25,7 @@
 -   `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — число строк в куске.
 -   `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер куска данных в байтах.
 -   `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска).
--   `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество прочитанных разжатых байт.
+-   `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество прочитанных не сжатых байт.
 -   `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано строк при слиянии кусков.
 -   `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано байт при слиянии кусков.
 -   `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между выделенной и освобождённой памятью в контексте потока.

From 698d5ac022e8ec66a44933aaca8510f3c281eb18 Mon Sep 17 00:00:00 2001
From: Dmitriy <sevirov@yandex-team.ru>
Date: Tue, 2 Feb 2021 22:27:06 +0300
Subject: [PATCH 0550/1238] Fix the description of the system.part_log system
 table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Поправил на основании комментарий в PR.
---
 docs/en/operations/system-tables/part_log.md | 4 ++--
 docs/ru/operations/system-tables/part_log.md | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md
index 1d2543a2be2..a85be5b210e 100644
--- a/docs/en/operations/system-tables/part_log.md
+++ b/docs/en/operations/system-tables/part_log.md
@@ -20,7 +20,7 @@ The `system.part_log` table contains the following columns:
 -   `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database the data part is in.
 -   `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table the data part is in.
 -   `part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part.
--   `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition that the data part was inserted to. The column takes the ‘all’ value if the partitioning is by `tuple()`.
+-   `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`.
 -   `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files.
 -   `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows in the data part.
 -   `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of the data part in bytes.
@@ -37,7 +37,7 @@ The `system.part_log` table is created after the first inserting data to the `Me
 **Example**
 
 ``` sql
-SELECT * FROM system.part_log LIMIT 1 \G
+SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical;
 ```
 
 ``` text
diff --git a/docs/ru/operations/system-tables/part_log.md b/docs/ru/operations/system-tables/part_log.md
index 1dcebd0fc3f..d214cc8f899 100644
--- a/docs/ru/operations/system-tables/part_log.md
+++ b/docs/ru/operations/system-tables/part_log.md
@@ -37,7 +37,7 @@
 **Пример**
 
 ``` sql
-SELECT * FROM system.part_log LIMIT 1 \G
+SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical;
 ```
 
 ``` text

From 1e0a528bac833fd9aef353483af3b4739aa3e3cf Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 2 Feb 2021 22:33:57 +0300
Subject: [PATCH 0551/1238] Fix possible test flakyness

---
 tests/integration/test_testkeeper_multinode/test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index 16ca00124a5..0a28b76750b 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -228,6 +228,7 @@ def test_blocade_leader_twice(started_cluster):
     for node in [node1, node2, node3]:
         for i in range(100):
             try:
+                node.query("SYSTEM RESTART REPLICA t2", timeout=10)
                 node.query("SYSTEM SYNC REPLICA t2", timeout=10)
                 break
             except Exception as ex:

From 6456ccf0da4ae12568c559b40015459da07fb6d6 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Tue, 2 Feb 2021 22:39:04 +0300
Subject: [PATCH 0552/1238] better test

---
 src/Databases/DatabaseReplicatedWorker.h      |  2 +-
 src/Interpreters/DatabaseCatalog.cpp          | 18 +++--
 src/Interpreters/DatabaseCatalog.h            |  7 +-
 src/Interpreters/InterpreterAlterQuery.cpp    | 10 +--
 src/Interpreters/InterpreterCreateQuery.cpp   |  2 +-
 src/Interpreters/InterpreterDropQuery.cpp     |  2 +-
 src/Interpreters/InterpreterRenameQuery.cpp   | 12 +++-
 src/Interpreters/InterpreterRenameQuery.h     |  5 +-
 .../MergeTree/registerStorageMergeTree.cpp    |  8 ++-
 .../configs/config.xml                        | 31 ++++++++
 .../test_replicated_database/test.py          | 71 +++++++++++--------
 11 files changed, 112 insertions(+), 56 deletions(-)

diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h
index 7e6d64dab0b..6e29e48469b 100644
--- a/src/Databases/DatabaseReplicatedWorker.h
+++ b/src/Databases/DatabaseReplicatedWorker.h
@@ -21,7 +21,7 @@ private:
 
     DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper) override;
 
-    DatabaseReplicated * database;
+    DatabaseReplicated * const database;
     mutable std::mutex mutex;
     std::condition_variable wait_current_task_change;
     String current_task;
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index 18cf69675ba..4ab3fb28785 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -956,21 +956,25 @@ DDLGuard::DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock<s
     ++it->second.counter;
     guards_lock.unlock();
     table_lock = std::unique_lock(*it->second.mutex);
-    bool is_database = elem.empty();
-    if (!is_database)
+    is_database_guard = elem.empty();
+    if (!is_database_guard)
     {
 
         bool locked_database_for_read = db_mutex.try_lock_shared();
         if (!locked_database_for_read)
         {
-            removeTableLock();
+            releaseTableLock();
             throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} is currently dropped or renamed", database_name);
         }
     }
 }
 
-void DDLGuard::removeTableLock()
+void DDLGuard::releaseTableLock() noexcept
 {
+    if (table_lock_removed)
+        return;
+
+    table_lock_removed = true;
     guards_lock.lock();
     --it->second.counter;
     if (!it->second.counter)
@@ -978,14 +982,14 @@ void DDLGuard::removeTableLock()
         table_lock.unlock();
         map.erase(it);
     }
+    guards_lock.unlock();
 }
 
 DDLGuard::~DDLGuard()
 {
-    bool is_database = it->first.empty();
-    if (!is_database)
+    if (!is_database_guard)
         db_mutex.unlock_shared();
-    removeTableLock();
+    releaseTableLock();
 }
 
 }
diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h
index 5146c786f64..c9f031ef678 100644
--- a/src/Interpreters/DatabaseCatalog.h
+++ b/src/Interpreters/DatabaseCatalog.h
@@ -54,14 +54,17 @@ public:
     DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock<std::mutex> guards_lock_, const String & elem, const String & database_name);
     ~DDLGuard();
 
+    /// Unlocks table name, keeps holding read lock for database name
+    void releaseTableLock() noexcept;
+
 private:
     Map & map;
     std::shared_mutex & db_mutex;
     Map::iterator it;
     std::unique_lock<std::mutex> guards_lock;
     std::unique_lock<std::mutex> table_lock;
-
-    void removeTableLock();
+    bool table_lock_removed = false;
+    bool is_database_guard = false;
 };
 
 
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index 0edd1a401b3..612f9833af5 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -47,17 +47,19 @@ BlockIO InterpreterAlterQuery::execute()
 
     context.checkAccess(getRequiredAccess());
     auto table_id = context.resolveStorageID(alter, Context::ResolveOrdinary);
-    StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context);
-    auto alter_lock = table->lockForAlter(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
-    auto metadata_snapshot = table->getInMemoryMetadataPtr();
 
     DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
     if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
     {
-        alter_lock.reset();
+        auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name);
+        guard->releaseTableLock();
         return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
     }
 
+    StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context);
+    auto alter_lock = table->lockForAlter(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
+    auto metadata_snapshot = table->getInMemoryMetadataPtr();
+
     //FIXME commit MetadataTransaction for all ALTER kinds. Now its' implemented only for metadata alter.
 
     /// Add default database to table identifiers that we can encounter in e.g. default expressions,
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index d91f3140a96..8d344545c8a 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -885,7 +885,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
         if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
         {
             assertOrSetUUID(create, database);
-            guard.reset();
+            guard->releaseTableLock();
             return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
         }
     }
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index 68680f27ea4..db2f463893e 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -137,7 +137,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
                 throw Exception(ErrorCodes::INCORRECT_QUERY, "DETACH TABLE is not allowed for Replicated databases. "
                                                              "Use DETACH TABLE PERMANENTLY or SYSTEM RESTART REPLICA");
 
-            ddl_guard.reset();
+            ddl_guard->releaseTableLock();
             table.reset();
             return typeid_cast<DatabaseReplicated *>(database.get())->propose(query.clone());
         }
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index 52faa89eff1..d2f79ba071c 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -43,6 +43,9 @@ BlockIO InterpreterRenameQuery::execute()
     RenameDescriptions descriptions;
     descriptions.reserve(rename.elements.size());
 
+    /// Don't allow to drop tables (that we are renaming); don't allow to create tables in places where tables will be renamed.
+    TableGuards table_guards;
+
     for (const auto & elem : rename.elements)
     {
         descriptions.emplace_back(elem, current_database);
@@ -64,10 +67,10 @@ BlockIO InterpreterRenameQuery::execute()
     if (rename.database)
         return executeToDatabase(rename, descriptions);
     else
-        return executeToTables(rename, descriptions);
+        return executeToTables(rename, descriptions, table_guards);
 }
 
-BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions)
+BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards)
 {
     auto & database_catalog = DatabaseCatalog::instance();
 
@@ -83,7 +86,10 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
                 throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Database {} is Replicated, "
                                 "it does not support renaming of multiple tables in single query.", elem.from_database_name);
 
-            table_guards.clear();
+            UniqueTableName from(elem.from_database_name, elem.from_table_name);
+            UniqueTableName to(elem.to_database_name, elem.to_table_name);
+            ddl_guards[from]->releaseTableLock();
+            ddl_guards[to]->releaseTableLock();
             return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
         }
         else
diff --git a/src/Interpreters/InterpreterRenameQuery.h b/src/Interpreters/InterpreterRenameQuery.h
index 2bc84514b4c..0da25f63e8d 100644
--- a/src/Interpreters/InterpreterRenameQuery.h
+++ b/src/Interpreters/InterpreterRenameQuery.h
@@ -57,16 +57,13 @@ public:
     void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, const Context &) const override;
 
 private:
-    BlockIO executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions);
+    BlockIO executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards);
     static BlockIO executeToDatabase(const ASTRenameQuery & rename, const RenameDescriptions & descriptions);
 
     AccessRightsElements getRequiredAccess() const;
 
     ASTPtr query_ptr;
     Context & context;
-
-    /// Don't allow to drop tables (that we are renaming); don't allow to create tables in places where tables will be renamed.
-    TableGuards table_guards;
 };
 
 }
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 9a881a60a69..1d68f788a42 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -450,17 +450,21 @@ static StoragePtr create(const StorageFactory::Arguments & args)
             arg_cnt += 2;
         }
         else
-            throw Exception("Expected two string literal arguments: zookeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS);
+            throw Exception("Expected two string literal arguments: zookeeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS);
 
         /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries
         bool is_on_cluster = args.local_context.getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;
-        bool is_replicated_database = args.local_context.getClientInfo().query_kind == ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
+        bool is_replicated_database = args.local_context.getClientInfo().query_kind == ClientInfo::QueryKind::REPLICATED_LOG_QUERY &&
+                                      DatabaseCatalog::instance().getDatabase(args.table_id.database_name)->getEngineName() == "Replicated";
         bool allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach;
 
         /// Unfold {database} and {table} macro on table creation, so table can be renamed.
         /// We also unfold {uuid} macro, so path will not be broken after moving table from Atomic to Ordinary database.
         if (!args.attach)
         {
+            if (is_replicated_database && !is_extended_storage_def)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Old syntax is not allowed for ReplicatedMergeTree tables in Replicated databases");
+
             Macros::MacroExpansionInfo info;
             /// NOTE: it's not recursive
             info.expand_special_macros_only = true;
diff --git a/tests/integration/test_replicated_database/configs/config.xml b/tests/integration/test_replicated_database/configs/config.xml
index d751454437c..ebceee3aa5c 100644
--- a/tests/integration/test_replicated_database/configs/config.xml
+++ b/tests/integration/test_replicated_database/configs/config.xml
@@ -1,3 +1,34 @@
 <yandex>
     <database_atomic_delay_before_drop_table_sec>10</database_atomic_delay_before_drop_table_sec>
+
+    <remote_servers>
+        <cluster>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>main_node</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>dummy_node</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>competing_node</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>snapshotting_node</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>snapshot_recovering_node</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </cluster>
+    </remote_servers>
 </yandex>
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index f99f4517e5a..2471228b55e 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -13,6 +13,8 @@ competing_node = cluster.add_instance('competing_node', main_configs=['configs/c
 snapshotting_node = cluster.add_instance('snapshotting_node', main_configs=['configs/config.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 1})
 snapshot_recovering_node = cluster.add_instance('snapshot_recovering_node', main_configs=['configs/config.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 2})
 
+all_nodes = [main_node, dummy_node, competing_node, snapshotting_node, snapshot_recovering_node]
+
 uuid_regex = re.compile("[0-9a-f]{8}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{12}")
 def assert_create_query(nodes, table_name, expected):
     replace_uuid = lambda x: re.sub(uuid_regex, "uuid", x)
@@ -31,11 +33,10 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-#TODO better tests
-
 def test_create_replicated_table(started_cluster):
-    #FIXME should fail (replicated with old syntax)
-    #main_node.query("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree(d, k, 8192);")
+    assert "Old syntax is not allowed" in \
+           main_node.query_and_get_error("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/test/tmp', 'r', d, k, 8192);")
+
     main_node.query("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);")
 
     expected = "CREATE TABLE testdb.replicated_table\\n(\\n    `d` Date,\\n    `k` UInt64,\\n    `i32` Int32\\n)\\n" \
@@ -47,6 +48,7 @@ def test_create_replicated_table(started_cluster):
 
 @pytest.mark.parametrize("engine", ['MergeTree', 'ReplicatedMergeTree'])
 def test_simple_alter_table(started_cluster, engine):
+    # test_simple_alter_table
     name  = "testdb.alter_test_{}".format(engine)
     main_node.query("CREATE TABLE {} "
                     "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
@@ -69,10 +71,7 @@ def test_simple_alter_table(started_cluster, engine):
 
     assert_create_query([main_node, dummy_node], name, expected)
 
-
-@pytest.mark.dependency(depends=['test_simple_alter_table'])
-@pytest.mark.parametrize("engine", ['MergeTree', 'ReplicatedMergeTree'])
-def test_create_replica_after_delay(started_cluster, engine):
+    # test_create_replica_after_delay
     competing_node.query("CREATE DATABASE IF NOT EXISTS testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica3');")
 
     name  = "testdb.alter_test_{}".format(engine)
@@ -90,13 +89,17 @@ def test_create_replica_after_delay(started_cluster, engine):
 
     assert_create_query([main_node, dummy_node, competing_node], name, expected)
 
-@pytest.mark.dependency(depends=['test_create_replica_after_delay'])
+
 def test_alters_from_different_replicas(started_cluster):
+    # test_alters_from_different_replicas
+    competing_node.query("CREATE DATABASE IF NOT EXISTS testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica3');")
+
     main_node.query("CREATE TABLE testdb.concurrent_test "
                     "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
                     "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
 
-    time.sleep(1)   #FIXME
+    main_node.query("CREATE TABLE testdb.dist AS testdb.concurrent_test ENGINE = Distributed(cluster, testdb, concurrent_test, CounterID)")
+
     dummy_node.kill_clickhouse(stop_start_wait_sec=0)
 
     competing_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added0 UInt32;")
@@ -115,50 +118,56 @@ def test_alters_from_different_replicas(started_cluster):
 
     assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
 
-@pytest.mark.dependency(depends=['test_alters_from_different_replicas'])
-def test_drop_and_create_table(started_cluster):
+    # test_create_replica_after_delay
     main_node.query("DROP TABLE testdb.concurrent_test")
     main_node.query("CREATE TABLE testdb.concurrent_test "
                     "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
-                    "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);")
+                    "ENGINE = ReplicatedMergeTree ORDER BY CounterID;")
 
     expected = "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
                "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n" \
-               "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
+               "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192"
 
     assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
 
-@pytest.mark.dependency(depends=['test_drop_and_create_table'])
-def test_replica_restart(started_cluster):
+    main_node.query("INSERT INTO testdb.dist (CounterID, StartDate, UserID) SELECT number, addDays(toDate('2020-02-02'), number), intHash32(number) FROM numbers(10)")
+
+    # test_replica_restart
     main_node.restart_clickhouse()
 
     expected = "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
                "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n" \
-               "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
-
-    assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
+               "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192"
 
 
-@pytest.mark.dependency(depends=['test_replica_restart'])
-def test_snapshot_and_snapshot_recover(started_cluster):
-    snapshotting_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica4');")
-    snapshot_recovering_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica5');")
+    # test_snapshot_and_snapshot_recover
+    snapshotting_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard2', 'replica1');")
+    snapshot_recovering_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard2', 'replica2');")
+    assert_create_query(all_nodes, "testdb.concurrent_test", expected)
 
-    assert_eq_with_retry(snapshotting_node, "select count() from system.tables where name like 'alter_test_%'", "2\n")
-    assert_eq_with_retry(snapshot_recovering_node, "select count() from system.tables where name like 'alter_test_%'", "2\n")
-    assert snapshotting_node.query("desc table testdb.alter_test_MergeTree") == snapshot_recovering_node.query("desc table testdb.alter_test_MergeTree")
-    assert snapshotting_node.query("desc table testdb.alter_test_ReplicatedMergeTree") == snapshot_recovering_node.query("desc table testdb.alter_test_ReplicatedMergeTree")
+    main_node.query("SYSTEM FLUSH DISTRIBUTED testdb.dist")
+    main_node.query("ALTER TABLE testdb.concurrent_test UPDATE StartDate = addYears(StartDate, 1) WHERE 1")
+    main_node.query("ALTER TABLE testdb.concurrent_test DELETE WHERE UserID % 2")
 
-@pytest.mark.dependency(depends=['test_replica_restart'])
-def test_drop_and_create_replica(started_cluster):
+    # test_drop_and_create_replica
     main_node.query("DROP DATABASE testdb")
     main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');")
 
     expected = "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
                "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n" \
-               "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)"
+               "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192"
 
     assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
+    assert_create_query(all_nodes, "testdb.concurrent_test", expected)
 
-#TODO tests with Distributed
+    for node in all_nodes:
+        node.query("SYSTEM SYNC REPLICA testdb.concurrent_test")
+
+    expected = "0\t2021-02-02\t4249604106\n" \
+               "1\t2021-02-03\t1343103100\n" \
+               "4\t2021-02-06\t3902320246\n" \
+               "7\t2021-02-09\t3844986530\n" \
+               "9\t2021-02-11\t1241149650\n"
+
+    assert_eq_with_retry(dummy_node, "SELECT CounterID, StartDate, UserID FROM testdb.dist ORDER BY CounterID", expected)
 

From 6743dd46562b43570fe1c57dafb59547c1d5ed89 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 2 Feb 2021 22:56:07 +0300
Subject: [PATCH 0553/1238] Same for the first test

---
 tests/integration/test_testkeeper_multinode/test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index 0a28b76750b..cb457e24435 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -119,6 +119,7 @@ def test_blocade_leader(started_cluster):
     for n, node in enumerate([node1, node2, node3]):
         for i in range(100):
             try:
+                node.query("SYSTEM RESTART REPLICA t1", timeout=10)
                 node.query("SYSTEM SYNC REPLICA t1", timeout=10)
                 break
             except Exception as ex:

From fabace394d163817674283c82179e8e56b6e3a60 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 3 Feb 2021 00:31:49 +0300
Subject: [PATCH 0554/1238] Cleanup message brokers output streams

---
 src/Storages/Kafka/KafkaBlockOutputStream.cpp           | 7 -------
 src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp     | 9 ---------
 src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp | 2 +-
 3 files changed, 1 insertion(+), 17 deletions(-)

diff --git a/src/Storages/Kafka/KafkaBlockOutputStream.cpp b/src/Storages/Kafka/KafkaBlockOutputStream.cpp
index e1742741670..cfbb7ad2523 100644
--- a/src/Storages/Kafka/KafkaBlockOutputStream.cpp
+++ b/src/Storages/Kafka/KafkaBlockOutputStream.cpp
@@ -6,11 +6,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int CANNOT_CREATE_IO_BUFFER;
-}
-
 KafkaBlockOutputStream::KafkaBlockOutputStream(
     StorageKafka & storage_,
     const StorageMetadataPtr & metadata_snapshot_,
@@ -29,8 +24,6 @@ Block KafkaBlockOutputStream::getHeader() const
 void KafkaBlockOutputStream::writePrefix()
 {
     buffer = storage.createWriteBuffer(getHeader());
-    if (!buffer)
-        throw Exception("Failed to create Kafka producer!", ErrorCodes::CANNOT_CREATE_IO_BUFFER);
 
     auto format_settings = getFormatSettings(*context);
     format_settings.protobuf.allow_many_rows_no_delimiters = true;
diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp
index 289f0c61b7d..d239586bb65 100644
--- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp
@@ -8,12 +8,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int CANNOT_CREATE_IO_BUFFER;
-}
-
-
 RabbitMQBlockOutputStream::RabbitMQBlockOutputStream(
     StorageRabbitMQ & storage_,
     const StorageMetadataPtr & metadata_snapshot_,
@@ -37,9 +31,6 @@ void RabbitMQBlockOutputStream::writePrefix()
         storage.unbindExchange();
 
     buffer = storage.createWriteBuffer();
-    if (!buffer)
-        throw Exception("Failed to create RabbitMQ producer!", ErrorCodes::CANNOT_CREATE_IO_BUFFER);
-
     buffer->activateWriting();
 
     auto format_settings = getFormatSettings(context);
diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
index ac94659d321..8bf979755d2 100644
--- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
+++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
@@ -321,7 +321,7 @@ void WriteBufferToRabbitMQProducer::writingFunc()
             setupChannel();
     }
 
-    LOG_DEBUG(log, "Prodcuer on channel {} completed", channel_id);
+    LOG_DEBUG(log, "Producer on channel {} completed", channel_id);
 }
 
 
From 5e350bb7619bcac0d07c728a0ff4797128255d7c Mon Sep 17 00:00:00 2001
From: filimonov <1549571+filimonov@users.noreply.github.com>
Date: Tue, 2 Feb 2021 23:36:09 +0100
Subject: [PATCH 0555/1238] Update entrypoint.sh

Fix for #20002
---
 docker/server/entrypoint.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh
index 8a4d02a6014..549ff601c59 100755
--- a/docker/server/entrypoint.sh
+++ b/docker/server/entrypoint.sh
@@ -120,7 +120,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
         sleep 1
     done
 
-    clickhouseclient=( clickhouse-client --multiquery -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )
+    clickhouseclient=( clickhouse-client --multiquery --host "127.0.0.1" -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )
 
     echo
 

From d05c6446b9187411f29cd9cf052e99152502eda8 Mon Sep 17 00:00:00 2001
From: Aleksei Semiglazov <asemiglazov@cloudflare.com>
Date: Tue, 2 Feb 2021 23:21:07 +0000
Subject: [PATCH 0556/1238] Send cancel packet and cancel read_context before
 retrying the query

---
 src/DataStreams/RemoteQueryExecutor.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp
index ce7db264eef..27b3de66497 100644
--- a/src/DataStreams/RemoteQueryExecutor.cpp
+++ b/src/DataStreams/RemoteQueryExecutor.cpp
@@ -244,7 +244,7 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext>
             return Block();
     }
 
-    if (!read_context)
+    if (!read_context || resent_query)
     {
         std::lock_guard lock(was_cancelled_mutex);
         if (was_cancelled)
@@ -270,7 +270,7 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext>
                 if (got_duplicated_part_uuids)
                 {
                     /// Cancel previous query and disconnect before retry.
-                    cancel();
+                    cancel(&read_context);
                     multiplexed_connections->disconnect();
 
                     /// Only resend once, otherwise throw an exception
@@ -523,7 +523,7 @@ bool RemoteQueryExecutor::isQueryPending() const
 
 bool RemoteQueryExecutor::hasThrownException() const
 {
-    return got_exception_from_replica || got_unknown_packet_from_replica || got_duplicated_part_uuids;
+    return got_exception_from_replica || got_unknown_packet_from_replica;
 }
 
 }

From 76cd42f485960cba1f0a0590565f3e038a7f879a Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Tue, 2 Feb 2021 18:15:24 -0800
Subject: [PATCH 0557/1238] Docs - HTTPS interserver replication

---
 .../settings.md                               | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index a1ed34f10bb..6d9e41016cc 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -296,6 +296,27 @@ Useful for breaking away from a specific network interface.
 <interserver_http_host>example.yandex.ru</interserver_http_host>
 ```
 
+## interserver_https_port {#interserver-https-port}
+
+Port for exchanging data between ClickHouse servers over `HTTPS`.
+
+**Example**
+
+``` xml
+<interserver_https_port>9010</interserver_https_port>
+```
+
+## interserver_https_host {#interserver-https-host}
+
+Similar to `interserver_http_host`, except that this hostname can be used by other servers to access this server over `HTTPS`.
+
+
+**Example**
+
+``` xml
+<interserver_https_host>example.yandex.ru</interserver_https_host>
+```
+
 ## interserver_http_credentials {#server-settings-interserver-http-credentials}
 
 The username and password used to authenticate during [replication](../../engines/table-engines/mergetree-family/replication.md) with the Replicated\* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster.

From c60c0664b7bca51104a406b2557de731eb0b5a6d Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Tue, 2 Feb 2021 18:20:16 -0800
Subject: [PATCH 0558/1238] Docs - add minor note about
 htt_interserver_credentials

---
 docs/en/operations/server-configuration-parameters/settings.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 6d9e41016cc..9d544a0d587 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -322,6 +322,8 @@ Similar to `interserver_http_host`, except that this hostname can be used by oth
 The username and password used to authenticate during [replication](../../engines/table-engines/mergetree-family/replication.md) with the Replicated\* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster.
 By default, the authentication is not used.
 
+**Note:** These credentials are common for replication through `HTTP` and `HTTPS`.
+
 This section contains the following parameters:
 
 -   `user` — username.

From 73fe3264afffbf6ffbbe6ff9b26a50624a997bae Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Tue, 2 Feb 2021 18:26:36 -0800
Subject: [PATCH 0559/1238] Fix spacing

---
 docs/en/operations/server-configuration-parameters/settings.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 9d544a0d587..89fcbafe663 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -310,7 +310,6 @@ Port for exchanging data between ClickHouse servers over `HTTPS`.
 
 Similar to `interserver_http_host`, except that this hostname can be used by other servers to access this server over `HTTPS`.
 
-
 **Example**
 
 ``` xml

From 85f8918753110ec2d7f8a2a9a2757e5638bc2984 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 3 Feb 2021 07:47:53 +0300
Subject: [PATCH 0560/1238] fix the row order to be like in the original test

---
 tests/performance/group_by_sundy_li.xml | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/tests/performance/group_by_sundy_li.xml b/tests/performance/group_by_sundy_li.xml
index 762a07b8702..a022692a017 100644
--- a/tests/performance/group_by_sundy_li.xml
+++ b/tests/performance/group_by_sundy_li.xml
@@ -4,20 +4,21 @@
     </settings>
 
     <create_query>
-CREATE TABLE a
-(
-    d Date,
-    os String
-)
-ENGINE = MergeTree
-PARTITION BY d
-ORDER BY d
+        CREATE TABLE a
+        (
+            d Date,
+            os String,
+            n UInt64
+        )
+        ENGINE = MergeTree
+        PARTITION BY d
+        ORDER BY d, n
     </create_query>
 
-    <fill_query>insert into a select '2000-01-01', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers_mt(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-02', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers_mt(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-03', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers_mt(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-04', ['aa','bb','cc','dd'][number % 4 + 1] from  numbers_mt(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-01', ['aa','bb','cc','dd'][number % 4 + 1], number from  numbers_mt(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-02', ['aa','bb','cc','dd'][number % 4 + 1], number from  numbers_mt(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-03', ['aa','bb','cc','dd'][number % 4 + 1], number from  numbers_mt(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-04', ['aa','bb','cc','dd'][number % 4 + 1], number from  numbers_mt(100000000)</fill_query>
 
     <fill_query>OPTIMIZE TABLE a FINAL</fill_query>
 

From 09c49d38421e10af5a334f15df1ce15ca56742d8 Mon Sep 17 00:00:00 2001
From: PHO <pho@cielonegro.org>
Date: Mon, 21 Dec 2020 12:08:37 +0900
Subject: [PATCH 0561/1238] Add function runningConcurrency()

Given a series of beginning time and ending time of events, this function calculates concurrency of the events at each of the data point, that is, the beginning time.
---
 .../functions/other-functions.md              |  60 +++++
 .../registerFunctionsMiscellaneous.cpp        |   2 +
 src/Functions/runningConcurrency.cpp          | 223 ++++++++++++++++++
 src/Functions/ya.make                         |   1 +
 .../01602_runningConcurrency.reference        |  19 ++
 .../0_stateless/01602_runningConcurrency.sql  |  49 ++++
 6 files changed, 354 insertions(+)
 create mode 100644 src/Functions/runningConcurrency.cpp
 create mode 100644 tests/queries/0_stateless/01602_runningConcurrency.reference
 create mode 100644 tests/queries/0_stateless/01602_runningConcurrency.sql

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 08d34770f57..dae6670dc14 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -820,6 +820,66 @@ WHERE diff != 1
 
 Same as for [runningDifference](../../sql-reference/functions/other-functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row.
 
+## runningConcurrency {#runningconcurrency}
+
+Given a series of beginning time and ending time of events, this function calculates concurrency of the events at each of the data point, that is, the beginning time.
+
+!!! warning "Warning"
+    Events spanning multiple data blocks will not be processed correctly. The function resets its state for each new data block.
+
+The result of the function depends on the order of data in the block. It assumes the beginning time is sorted in ascending order.
+
+**Syntax**
+
+``` sql
+runningConcurrency(begin, end)
+```
+
+**Parameters**
+
+-   `begin` — A column for the beginning time of events (inclusive). [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
+-   `end` — A column for the ending time of events (exclusive).  [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
+
+Note that two columns `begin` and `end` must have the same type.
+
+**Returned values**
+
+-   The concurrency of events at the data point.
+
+Type: [UInt32](../../sql-reference/data-types/int-uint.md)
+
+**Example**
+
+Input table:
+
+``` text
+┌───────────────begin─┬─────────────────end─┐
+│ 2020-12-01 00:00:00 │ 2020-12-01 00:59:59 │
+│ 2020-12-01 00:30:00 │ 2020-12-01 00:59:59 │
+│ 2020-12-01 00:40:00 │ 2020-12-01 01:30:30 │
+│ 2020-12-01 01:10:00 │ 2020-12-01 01:30:30 │
+│ 2020-12-01 01:50:00 │ 2020-12-01 01:59:59 │
+└─────────────────────┴─────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT runningConcurrency(begin, end) FROM example
+```
+
+Result:
+
+``` text
+┌─runningConcurrency(begin, end)─┐
+│                              1 │
+│                              2 │
+│                              3 │
+│                              2 │
+│                              1 │
+└────────────────────────────────┘
+```
+
 ## MACNumToString(num) {#macnumtostringnum}
 
 Accepts a UInt64 number. Interprets it as a MAC address in big endian. Returns a string containing the corresponding MAC address in the format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form).
diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp
index 653922bbced..8d8af4fcbf1 100644
--- a/src/Functions/registerFunctionsMiscellaneous.cpp
+++ b/src/Functions/registerFunctionsMiscellaneous.cpp
@@ -45,6 +45,7 @@ void registerFunctionTimeZone(FunctionFactory &);
 void registerFunctionRunningAccumulate(FunctionFactory &);
 void registerFunctionRunningDifference(FunctionFactory &);
 void registerFunctionRunningDifferenceStartingWithFirstValue(FunctionFactory &);
+void registerFunctionRunningConcurrency(FunctionFactory &);
 void registerFunctionFinalizeAggregation(FunctionFactory &);
 void registerFunctionToLowCardinality(FunctionFactory &);
 void registerFunctionLowCardinalityIndices(FunctionFactory &);
@@ -112,6 +113,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
     registerFunctionRunningAccumulate(factory);
     registerFunctionRunningDifference(factory);
     registerFunctionRunningDifferenceStartingWithFirstValue(factory);
+    registerFunctionRunningConcurrency(factory);
     registerFunctionFinalizeAggregation(factory);
     registerFunctionToLowCardinality(factory);
     registerFunctionLowCardinalityIndices(factory);
diff --git a/src/Functions/runningConcurrency.cpp b/src/Functions/runningConcurrency.cpp
new file mode 100644
index 00000000000..a225e3152e7
--- /dev/null
+++ b/src/Functions/runningConcurrency.cpp
@@ -0,0 +1,223 @@
+#include <Columns/ColumnVector.h>
+#include <Core/callOnTypeIndex.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <Formats/FormatSettings.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/IFunctionImpl.h>
+#include <IO/WriteBufferFromString.h>
+#include <common/defines.h>
+#include <set>
+
+namespace DB
+{
+    namespace ErrorCodes
+    {
+        extern const int ILLEGAL_COLUMN;
+        extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+        extern const int INCORRECT_DATA;
+    }
+
+    template <typename Name, typename ArgDataType, typename ConcurrencyDataType>
+    class ExecutableFunctionRunningConcurrency : public IExecutableFunctionImpl
+    {
+    public:
+        String getName() const override
+        {
+            return Name::name;
+        }
+
+        ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+        {
+            using ColVecArg = typename ArgDataType::ColumnType;
+            const ColVecArg * col_begin = checkAndGetColumn<ColVecArg>(arguments[0].column.get());
+            const ColVecArg * col_end   = checkAndGetColumn<ColVecArg>(arguments[1].column.get());
+            if (!col_begin || !col_end)
+                throw Exception("Constant columns are not supported at the moment",
+                                ErrorCodes::ILLEGAL_COLUMN);
+            const typename ColVecArg::Container & vec_begin = col_begin->getData();
+            const typename ColVecArg::Container & vec_end   = col_end->getData();
+
+            using ColVecConc = typename ConcurrencyDataType::ColumnType;
+            typename ColVecConc::MutablePtr col_concurrency = ColVecConc::create(input_rows_count);
+            typename ColVecConc::Container & vec_concurrency = col_concurrency->getData();
+
+            std::multiset<typename ArgDataType::FieldType> ongoing_until;
+            for (size_t i = 0; i < input_rows_count; ++i)
+            {
+                const auto begin = vec_begin[i];
+                const auto end   = vec_end[i];
+
+                if (unlikely(begin > end))
+                {
+                    const FormatSettings default_format;
+                    WriteBufferFromOwnString buf_begin, buf_end;
+                    arguments[0].type->serializeAsTextQuoted(*(arguments[0].column), i, buf_begin, default_format);
+                    arguments[1].type->serializeAsTextQuoted(*(arguments[1].column), i, buf_end, default_format);
+                    throw Exception(
+                        "Incorrect order of events: " + buf_begin.str() + " > " + buf_end.str(),
+                        ErrorCodes::INCORRECT_DATA);
+                }
+
+                ongoing_until.insert(end);
+
+                // Erase all the elements from "ongoing_until" which
+                // are less than or equal to "begin", i.e. durations
+                // that have already ended. We consider "begin" to be
+                // inclusive, and "end" to be exclusive.
+                ongoing_until.erase(
+                    ongoing_until.begin(), ongoing_until.upper_bound(begin));
+
+                vec_concurrency[i] = ongoing_until.size();
+            }
+
+            return col_concurrency;
+        }
+
+        bool useDefaultImplementationForConstants() const override
+        {
+            return true;
+        }
+    };
+
+    template <typename Name, typename ArgDataType, typename ConcurrencyDataType>
+    class FunctionBaseRunningConcurrency : public IFunctionBaseImpl
+    {
+    public:
+        explicit FunctionBaseRunningConcurrency(DataTypes argument_types_, DataTypePtr return_type_)
+            : argument_types(std::move(argument_types_))
+            , return_type(std::move(return_type_)) {}
+
+        String getName() const override
+        {
+            return Name::name;
+        }
+
+        const DataTypes & getArgumentTypes() const override
+        {
+            return argument_types;
+        }
+
+        const DataTypePtr & getResultType() const override
+        {
+            return return_type;
+        }
+
+        ExecutableFunctionImplPtr prepare(const ColumnsWithTypeAndName &) const override
+        {
+            return std::make_unique<ExecutableFunctionRunningConcurrency<Name, ArgDataType, ConcurrencyDataType>>();
+        }
+
+        bool isStateful() const override
+        {
+            return true;
+        }
+
+    private:
+        DataTypes argument_types;
+        DataTypePtr return_type;
+    };
+
+    template <typename Name, typename ConcurrencyDataType>
+    class RunningConcurrencyOverloadResolver : public IFunctionOverloadResolverImpl
+    {
+        template <typename T>
+        struct TypeTag
+        {
+            using Type = T;
+        };
+
+        /// Call a polymorphic lambda with a type tag of src_type.
+        template <typename F>
+        void dispatchForSourceType(const IDataType & src_type, F && f) const
+        {
+            WhichDataType which(src_type);
+
+            switch (which.idx)
+            {
+            case TypeIndex::Date:       f(TypeTag<DataTypeDate>());       break;
+            case TypeIndex::DateTime:   f(TypeTag<DataTypeDateTime>());   break;
+            case TypeIndex::DateTime64: f(TypeTag<DataTypeDateTime64>()); break;
+            default:
+                throw Exception(
+                    "Arguments for function " + getName() + " must be Date, DateTime, or DateTime64.",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            }
+        }
+
+    public:
+        static constexpr auto name = Name::name;
+
+        static FunctionOverloadResolverImplPtr create(const Context &)
+        {
+            return std::make_unique<RunningConcurrencyOverloadResolver<Name, ConcurrencyDataType>>();
+        }
+
+        String getName() const override
+        {
+            return Name::name;
+        }
+
+        FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
+        {
+            // The type of the second argument must match with that of the first one.
+            if (unlikely(!arguments[1].type->equals(*(arguments[0].type))))
+            {
+                throw Exception(
+                    "Function " + getName() + " must be called with two arguments having the same type.",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            }
+
+            DataTypes argument_types = { arguments[0].type, arguments[1].type };
+            FunctionBaseImplPtr base;
+            dispatchForSourceType(*(arguments[0].type), [&](auto arg_type_tag) // Throws when the type is inappropriate.
+            {
+                using Tag = decltype(arg_type_tag);
+                using ArgDataType = typename Tag::Type;
+
+                base = std::make_unique<FunctionBaseRunningConcurrency<Name, ArgDataType, ConcurrencyDataType>>(argument_types, return_type);
+            });
+
+            return base;
+        }
+
+        DataTypePtr getReturnType(const DataTypes &) const override
+        {
+            return std::make_shared<ConcurrencyDataType>();
+        }
+
+        size_t getNumberOfArguments() const override
+        {
+            return 2;
+        }
+
+        bool isInjective(const ColumnsWithTypeAndName &) const override
+        {
+            return false;
+        }
+
+        bool isStateful() const override
+        {
+            return true;
+        }
+
+        bool useDefaultImplementationForNulls() const override
+        {
+            return false;
+        }
+    };
+
+    struct NameRunningConcurrency
+    {
+        static constexpr auto name = "runningConcurrency";
+    };
+
+    void registerFunctionRunningConcurrency(FunctionFactory & factory)
+    {
+        factory.registerFunction<RunningConcurrencyOverloadResolver<NameRunningConcurrency, DataTypeUInt32>>();
+    }
+}
diff --git a/src/Functions/ya.make b/src/Functions/ya.make
index 9488c9d7d4e..f567c70eec4 100644
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@@ -425,6 +425,7 @@ SRCS(
     rowNumberInAllBlocks.cpp
     rowNumberInBlock.cpp
     runningAccumulate.cpp
+    runningConcurrency.cpp
     runningDifference.cpp
     runningDifferenceStartingWithFirstValue.cpp
     sigmoid.cpp
diff --git a/tests/queries/0_stateless/01602_runningConcurrency.reference b/tests/queries/0_stateless/01602_runningConcurrency.reference
new file mode 100644
index 00000000000..1bd238ccde8
--- /dev/null
+++ b/tests/queries/0_stateless/01602_runningConcurrency.reference
@@ -0,0 +1,19 @@
+Invocation with Date columns
+1
+2
+3
+2
+1
+Invocation with DateTime
+1
+2
+3
+2
+1
+Invocation with DateTime64
+1
+2
+3
+2
+1
+Erroneous cases
diff --git a/tests/queries/0_stateless/01602_runningConcurrency.sql b/tests/queries/0_stateless/01602_runningConcurrency.sql
new file mode 100644
index 00000000000..40fdc54ba7a
--- /dev/null
+++ b/tests/queries/0_stateless/01602_runningConcurrency.sql
@@ -0,0 +1,49 @@
+--
+SELECT 'Invocation with Date columns';
+
+DROP TABLE IF EXISTS runningConcurrency_test;
+CREATE TABLE runningConcurrency_test(begin Date, end Date) ENGINE = Memory;
+
+INSERT INTO runningConcurrency_test VALUES ('2020-12-01', '2020-12-10'), ('2020-12-02', '2020-12-10'), ('2020-12-03', '2020-12-12'), ('2020-12-10', '2020-12-12'), ('2020-12-13', '2020-12-20');
+SELECT runningConcurrency(begin, end) FROM runningConcurrency_test;
+
+DROP TABLE runningConcurrency_test;
+
+--
+SELECT 'Invocation with DateTime';
+
+DROP TABLE IF EXISTS runningConcurrency_test;
+CREATE TABLE runningConcurrency_test(begin DateTime, end DateTime) ENGINE = Memory;
+
+INSERT INTO runningConcurrency_test VALUES ('2020-12-01 00:00:00', '2020-12-01 00:59:59'), ('2020-12-01 00:30:00', '2020-12-01 00:59:59'), ('2020-12-01 00:40:00', '2020-12-01 01:30:30'), ('2020-12-01 01:10:00', '2020-12-01 01:30:30'), ('2020-12-01 01:50:00', '2020-12-01 01:59:59');
+SELECT runningConcurrency(begin, end) FROM runningConcurrency_test;
+
+DROP TABLE runningConcurrency_test;
+
+--
+SELECT 'Invocation with DateTime64';
+
+DROP TABLE IF EXISTS runningConcurrency_test;
+CREATE TABLE runningConcurrency_test(begin DateTime64(3), end DateTime64(3)) ENGINE = Memory;
+
+INSERT INTO runningConcurrency_test VALUES ('2020-12-01 00:00:00.000', '2020-12-01 00:00:00.100'), ('2020-12-01 00:00:00.010', '2020-12-01 00:00:00.100'), ('2020-12-01 00:00:00.020', '2020-12-01 00:00:00.200'), ('2020-12-01 00:00:00.150', '2020-12-01 00:00:00.200'), ('2020-12-01 00:00:00.250', '2020-12-01 00:00:00.300');
+SELECT runningConcurrency(begin, end) FROM runningConcurrency_test;
+
+DROP TABLE runningConcurrency_test;
+
+--
+SELECT 'Erroneous cases';
+
+-- Constant columns are currently not supported.
+SELECT runningConcurrency(toDate(arrayJoin([1, 2])), toDate('2000-01-01')); -- { serverError 44 }
+
+-- Unsupported data types
+SELECT runningConcurrency('strings are', 'not supported'); -- { serverError 43 }
+SELECT runningConcurrency(NULL, NULL); -- { serverError 43 }
+SELECT runningConcurrency(CAST(NULL, 'Nullable(DateTime)'), CAST(NULL, 'Nullable(DateTime)')); -- { serverError 43 }
+
+-- Mismatching data types
+SELECT runningConcurrency(toDate('2000-01-01'), toDateTime('2000-01-01 00:00:00')); -- { serverError 43 }
+
+-- begin > end
+SELECT runningConcurrency(toDate('2000-01-02'), toDate('2000-01-01')); -- { serverError 117 }

From 7e945bab03bef9260bfc4776bdffe44fb7f8c99f Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 3 Feb 2021 08:53:21 +0300
Subject: [PATCH 0562/1238] fix the calculation for moving frame start

---
 src/Common/StackTrace.h                       |  10 +-
 src/Processors/Transforms/WindowTransform.cpp | 164 ++++++++++--------
 src/Processors/Transforms/WindowTransform.h   |   9 +-
 tests/performance/window_functions.xml        |  26 ++-
 4 files changed, 130 insertions(+), 79 deletions(-)

diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h
index 3ae4b964838..b2e14a01f03 100644
--- a/src/Common/StackTrace.h
+++ b/src/Common/StackTrace.h
@@ -34,7 +34,15 @@ public:
         std::optional<std::string> file;
         std::optional<UInt64> line;
     };
-    static constexpr size_t capacity = 32;
+
+    static constexpr size_t capacity =
+#ifndef NDEBUG
+        /* The stacks are normally larger in debug version due to less inlining. */
+        64
+#else
+        32
+#endif
+        ;
     using FramePointers = std::array<void *, capacity>;
     using Frames = std::array<Frame, capacity>;
 
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 23acc85aef0..cc526eba8c4 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -348,49 +348,6 @@ void WindowTransform::advanceFrameStart()
         // further.
         assert(frame_started);
     }
-
-    // We're very dumb and have to reinitialize aggregate functions if the frame
-    // start changed. No point in doing it if we don't yet know where the frame
-    // starts.
-    if (!frame_started)
-    {
-        return;
-    }
-
-    // frame_end value might not be valid yet, but we know that it is greater or
-    // equal than frame_start. If it's less than the new frame_start, we have to
-    // skip rows between frame_end and frame_start, because they are not in the
-    // frame and must not contribute to the value of aggregate functions.
-    if (frame_end < frame_start)
-    {
-        frame_end = frame_start;
-    }
-
-    for (auto & ws : workspaces)
-    {
-        const auto & f = ws.window_function;
-        const auto * a = f.aggregate_function.get();
-        auto * buf = ws.aggregate_function_state.data();
-
-        a->destroy(buf);
-        a->create(buf);
-
-        for (auto row = frame_start; row < frame_end; advanceRowNumber(row))
-        {
-            if (row.block != ws.cached_block_number)
-            {
-                ws.argument_columns.clear();
-                for (const auto i : ws.argument_column_indices)
-                {
-                    ws.argument_columns.push_back(inputAt(row)[i].get());
-                }
-                ws.cached_block_number = row.block;
-            }
-
-            a->add(buf, ws.argument_columns.data(), row.row, arena.get());
-//            fmt::print(stderr, "(1) add row {}\n", row.row);
-        }
-    }
 }
 
 bool WindowTransform::arePeers(const RowNumber & x, const RowNumber & y) const
@@ -516,7 +473,6 @@ void WindowTransform::advanceFrameEnd()
     switch (window_description.frame.end_type)
     {
         case WindowFrame::BoundaryType::Current:
-            // The only frame end we have for now is CURRENT ROW.
             advanceFrameEndCurrentRow();
             break;
         case WindowFrame::BoundaryType::Unbounded:
@@ -536,45 +492,81 @@ void WindowTransform::advanceFrameEnd()
     {
         return;
     }
+}
 
-    // Add the rows over which we advanced the frame to the aggregate function
-    // states. We could have advanced over at most the entire last block.
-    uint64_t rows_end = frame_end.row;
-    if (frame_end.row == 0)
+// Update the aggregation states after the frame has changed.
+void WindowTransform::updateAggregationState()
+{
+//    fmt::print(stderr, "update agg states [{}, {}) -> [{}, {})\n",
+//        prev_frame_start, prev_frame_end, frame_start, frame_end);
+
+    // Assert that the frame boundaries are known, have proper order wrt each
+    // other, and have not gone back wrt the previous frame.
+    assert(frame_started);
+    assert(frame_ended);
+    assert(frame_start <= frame_end);
+    assert(prev_frame_start <= prev_frame_end);
+    assert(prev_frame_start <= frame_start);
+    assert(prev_frame_end <= frame_end);
+
+    // We might have to reset aggregation state and/or add some rows to it.
+    // Figure out what to do.
+    bool reset_aggregation = false;
+    RowNumber rows_to_add_start;
+    RowNumber rows_to_add_end;
+    if (frame_start == prev_frame_start)
     {
-        assert(frame_end == blocksEnd());
-        rows_end = blockRowsNumber(frame_end_before);
+        // The frame start didn't change, add the tail rows.
+        reset_aggregation = false;
+        rows_to_add_start = prev_frame_end;
+        rows_to_add_end = frame_end;
     }
     else
     {
-        assert(frame_end_before.block == frame_end.block);
+        // The frame start changed, reset the state and aggregate over the
+        // entire frame. This can be made per-function after we learn to
+        // subtract rows from some types of aggregation states, but for now we
+        // always have to reset when the frame start changes.
+        reset_aggregation = true;
+        rows_to_add_start = frame_start;
+        rows_to_add_end = frame_end;
     }
-    // Equality would mean "no data to process", for which we checked above.
-    assert(frame_end_before.row < rows_end);
 
     for (auto & ws : workspaces)
     {
-        if (frame_end_before.block != ws.cached_block_number)
-        {
-            const auto & block
-                = blocks[frame_end_before.block - first_block_number];
-            ws.argument_columns.clear();
-            for (const auto i : ws.argument_column_indices)
-            {
-                ws.argument_columns.push_back(block.input_columns[i].get());
-            }
-            ws.cached_block_number = frame_end_before.block;
-        }
-
         const auto * a = ws.window_function.aggregate_function.get();
         auto * buf = ws.aggregate_function_state.data();
-        auto * columns = ws.argument_columns.data();
-        for (auto row = frame_end_before.row; row < rows_end; ++row)
+
+        if (reset_aggregation)
         {
+//            fmt::print(stderr, "(2) reset aggregation\n");
+            a->destroy(buf);
+            a->create(buf);
+        }
+
+        for (auto row = rows_to_add_start; row < rows_to_add_end;
+            advanceRowNumber(row))
+        {
+            if (row.block != ws.cached_block_number)
+            {
+                const auto & block
+                    = blocks[row.block - first_block_number];
+                ws.argument_columns.clear();
+                for (const auto i : ws.argument_column_indices)
+                {
+                    ws.argument_columns.push_back(block.input_columns[i].get());
+                }
+                ws.cached_block_number = row.block;
+            }
+
 //            fmt::print(stderr, "(2) add row {}\n", row);
-            a->add(buf, columns, row, arena.get());
+            auto * columns = ws.argument_columns.data();
+            a->add(buf, columns, row.row, arena.get());
         }
     }
+
+    prev_frame_start = frame_start;
+    prev_frame_end = frame_end;
 }
 
 void WindowTransform::writeOutCurrentRow()
@@ -646,8 +638,11 @@ void WindowTransform::appendChunk(Chunk & chunk)
         // which is precisely the definition of `partition_end`.
         while (current_row < partition_end)
         {
-            // Advance the frame start, updating the state of the aggregate
-            // functions.
+//            fmt::print(stderr, "(1) row {} frame [{}, {}) {}, {}\n",
+//                current_row, frame_start, frame_end,
+//                frame_started, frame_ended);
+
+            // Advance the frame start.
             advanceFrameStart();
 
             if (!frame_started)
@@ -655,15 +650,19 @@ void WindowTransform::appendChunk(Chunk & chunk)
                 // Wait for more input data to find the start of frame.
                 assert(!input_is_finished);
                 assert(!partition_ended);
+                return;
             }
 
-            // Advance the frame end, updating the state of the aggregate
-            // functions.
-            advanceFrameEnd();
+            // frame_end must be greater or equal than frame_start, so if the
+            // frame_start is already past the current frame_end, we can start
+            // from it to save us some work.
+            if (frame_end < frame_start)
+            {
+                frame_end = frame_start;
+            }
 
-//            fmt::print(stderr, "row {} frame [{}, {}) {}, {}\n",
-//                current_row, frame_start, frame_end,
-//                frame_started, frame_ended);
+            // Advance the frame end.
+            advanceFrameEnd();
 
             if (!frame_ended)
             {
@@ -673,6 +672,10 @@ void WindowTransform::appendChunk(Chunk & chunk)
                 return;
             }
 
+//            fmt::print(stderr, "(2) row {} frame [{}, {}) {}, {}\n",
+//                current_row, frame_start, frame_end,
+//                frame_started, frame_ended);
+
             // The frame can be empty sometimes, e.g. the boundaries coincide
             // or the start is after the partition end. But hopefully start is
             // not after end.
@@ -680,6 +683,13 @@ void WindowTransform::appendChunk(Chunk & chunk)
             assert(frame_ended);
             assert(frame_start <= frame_end);
 
+            // Now that we know the new frame boundaries, update the aggregation
+            // states. Theoretically we could do this simultaneously with moving
+            // the frame boundaries, but it would require some care not to
+            // perform unnecessary work while we are still looking for the frame
+            // start, so do it the simple way for now.
+            updateAggregationState();
+
             // Write out the aggregation results.
             writeOutCurrentRow();
 
@@ -716,6 +726,8 @@ void WindowTransform::appendChunk(Chunk & chunk)
         // for now.
         frame_start = partition_start;
         frame_end = partition_start;
+        prev_frame_start = partition_start;
+        prev_frame_end = partition_start;
         assert(current_row == partition_start);
 
 //        fmt::print(stderr, "reinitialize agg data at start of {}\n",
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index afc44b2f706..5ad1132bfab 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -113,6 +113,7 @@ private:
     void advanceFrameEndCurrentRow();
     void advanceFrameEndUnbounded();
     bool arePeers(const RowNumber & x, const RowNumber & y) const;
+    void updateAggregationState();
     void writeOutCurrentRow();
 
     Columns & inputAt(const RowNumber & x)
@@ -254,7 +255,7 @@ public:
     RowNumber partition_end;
     bool partition_ended = false;
 
-    // This is the row for which we are computing the window functions now.
+    // The row for which we are now computing the window functions.
     RowNumber current_row;
 
     // The frame is [frame_start, frame_end) if frame_ended && frame_started,
@@ -270,6 +271,12 @@ public:
     RowNumber frame_end;
     bool frame_ended = false;
     bool frame_started = false;
+
+    // The previous frame boundaries that correspond to the current state of the
+    // aggregate function. We use them to determine how to update the aggregation
+    // state after we find the new frame.
+    RowNumber prev_frame_start;
+    RowNumber prev_frame_end;
 };
 
 }
diff --git a/tests/performance/window_functions.xml b/tests/performance/window_functions.xml
index f42345d0696..93983e9b1bf 100644
--- a/tests/performance/window_functions.xml
+++ b/tests/performance/window_functions.xml
@@ -25,7 +25,31 @@
         select *
         from (
             select CounterID, UserID, count(*) user_hits,
-                count() over (partition by CounterID order by user_hits desc)
+                count()
+                    over (partition by CounterID order by user_hits desc
+                        rows unbounded preceding)
+                    user_rank
+            from hits_100m_single
+            where CounterID < 10000
+            group by CounterID, UserID
+        )
+        where user_rank <= 10
+        format Null
+    ]]></query>
+
+    <!--
+        The RANGE version should give (almost) the same result, because counts
+        for the top ranking users are probably different, so the ranks won't be
+        influenced by grouping. But it is going to be slower than ROWS because
+        of the additional work of finding the group boundaries.
+    -->
+    <query><![CDATA[
+        select *
+        from (
+            select CounterID, UserID, count(*) user_hits,
+                count()
+                    over (partition by CounterID order by user_hits desc
+                        range unbounded preceding)
                     user_rank
             from hits_100m_single
             where CounterID < 10000

From a164abf23e70e9b6bc4064c0fd4431664ba3a747 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 3 Feb 2021 09:42:54 +0300
Subject: [PATCH 0563/1238] add frame fuzzing and one query that fails under
 msan

---
 programs/client/QueryFuzzer.cpp               | 44 +++++++++++++++++++
 programs/client/QueryFuzzer.h                 |  2 +
 .../01591_window_functions.reference          |  2 +
 .../0_stateless/01591_window_functions.sql    |  4 ++
 4 files changed, 52 insertions(+)

diff --git a/programs/client/QueryFuzzer.cpp b/programs/client/QueryFuzzer.cpp
index 3892e8e5732..e07d06e6080 100644
--- a/programs/client/QueryFuzzer.cpp
+++ b/programs/client/QueryFuzzer.cpp
@@ -325,6 +325,49 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
     // the generic recursion into IAST.children.
 }
 
+void QueryFuzzer::fuzzWindowFrame(WindowFrame & frame)
+{
+    switch (fuzz_rand() % 40)
+    {
+        case 0:
+        {
+            const auto r = fuzz_rand() % 3;
+            frame.type = r == 0 ? WindowFrame::FrameType::Rows
+                : r == 1 ? WindowFrame::FrameType::Range
+                    : WindowFrame::FrameType::Groups;
+            break;
+        }
+        case 1:
+        {
+            const auto r = fuzz_rand() % 3;
+            frame.begin_type = r == 0 ? WindowFrame::BoundaryType::Unbounded
+                : r == 1 ? WindowFrame::BoundaryType::Current
+                    : WindowFrame::BoundaryType::Offset;
+            break;
+        }
+        case 2:
+        {
+            const auto r = fuzz_rand() % 3;
+            frame.end_type = r == 0 ? WindowFrame::BoundaryType::Unbounded
+                : r == 1 ? WindowFrame::BoundaryType::Current
+                    : WindowFrame::BoundaryType::Offset;
+            break;
+        }
+        case 3:
+        {
+            frame.begin_offset = getRandomField(0).get<Int64>();
+            break;
+        }
+        case 4:
+        {
+            frame.end_offset = getRandomField(0).get<Int64>();
+            break;
+        }
+        default:
+            break;
+    }
+}
+
 void QueryFuzzer::fuzz(ASTs & asts)
 {
     for (auto & ast : asts)
@@ -409,6 +452,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
             auto & def = fn->window_definition->as<ASTWindowDefinition &>();
             fuzzColumnLikeExpressionList(def.partition_by.get());
             fuzzOrderByList(def.order_by.get());
+            fuzzWindowFrame(def.frame);
         }
 
         fuzz(fn->children);
diff --git a/programs/client/QueryFuzzer.h b/programs/client/QueryFuzzer.h
index e9d3f150283..38714205967 100644
--- a/programs/client/QueryFuzzer.h
+++ b/programs/client/QueryFuzzer.h
@@ -14,6 +14,7 @@ namespace DB
 
 class ASTExpressionList;
 class ASTOrderByElement;
+struct WindowFrame;
 
 /*
  * This is an AST-based query fuzzer that makes random modifications to query
@@ -65,6 +66,7 @@ struct QueryFuzzer
     void fuzzOrderByElement(ASTOrderByElement * elem);
     void fuzzOrderByList(IAST * ast);
     void fuzzColumnLikeExpressionList(IAST * ast);
+    void fuzzWindowFrame(WindowFrame & frame);
     void fuzz(ASTs & asts);
     void fuzz(ASTPtr & ast);
     void collectFuzzInfoMain(const ASTPtr ast);
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 7e286f753e5..bd1a954ddc4 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -558,3 +558,5 @@ settings max_block_size = 2;
 28	5	3	2	1
 29	5	2	1	0
 30	6	1	1	0
+-- seen a use-after-free under MSan in this query once
+SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null;
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 3b4bdd03724..3a6d2f3d18a 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -174,3 +174,7 @@ select number, p,
 from (select number, intDiv(number, 5) p from numbers(31))
 order by p, number
 settings max_block_size = 2;
+
+-- seen a use-after-free under MSan in this query once
+SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null;
+

From f8592f45393a0b6f642de9f7fc6cf068a0e7ad0c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 3 Feb 2021 10:03:13 +0300
Subject: [PATCH 0564/1238] Remove useless files

---
 base/mysqlxx/StoreQueryResult.cpp | 30 ---------------------
 base/mysqlxx/StoreQueryResult.h   | 45 -------------------------------
 2 files changed, 75 deletions(-)
 delete mode 100644 base/mysqlxx/StoreQueryResult.cpp
 delete mode 100644 base/mysqlxx/StoreQueryResult.h

diff --git a/base/mysqlxx/StoreQueryResult.cpp b/base/mysqlxx/StoreQueryResult.cpp
deleted file mode 100644
index 620ed8def56..00000000000
--- a/base/mysqlxx/StoreQueryResult.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-#if __has_include(<mysql.h>)
-#include <mysql.h>
-#else
-#include <mysql/mysql.h>
-#endif
-
-#include <mysqlxx/Connection.h>
-#include <mysqlxx/StoreQueryResult.h>
-
-
-namespace mysqlxx
-{
-
-StoreQueryResult::StoreQueryResult(MYSQL_RES * res_, Connection * conn_, const Query * query_) : ResultBase(res_, conn_, query_)
-{
-    UInt64 rows = mysql_num_rows(res);
-    reserve(rows);
-    lengths.resize(rows * num_fields);
-
-    for (UInt64 i = 0; MYSQL_ROW row = mysql_fetch_row(res); ++i)
-    {
-        MYSQL_LENGTHS lengths_for_row = mysql_fetch_lengths(res);
-        memcpy(&lengths[i * num_fields], lengths_for_row, sizeof(lengths[0]) * num_fields);
-
-        push_back(Row(row, this, &lengths[i * num_fields]));
-    }
-    checkError(conn->getDriver());
-}
-
-}
diff --git a/base/mysqlxx/StoreQueryResult.h b/base/mysqlxx/StoreQueryResult.h
deleted file mode 100644
index 9c242d2782f..00000000000
--- a/base/mysqlxx/StoreQueryResult.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#pragma once
-
-#include <vector>
-
-#include <mysqlxx/ResultBase.h>
-#include <mysqlxx/Row.h>
-
-
-namespace mysqlxx
-{
-
-class Connection;
-
-
-/** Результат выполнения запроса, загруженный полностью на клиента.
-  * Это требует оперативку, чтобы вместить весь результат,
-  *  но зато реализует произвольный доступ к строкам по индексу.
-  * Если размер результата большой - используйте лучше UseQueryResult.
-  * Объект содержит ссылку на Connection.
-  * Если уничтожить Connection, то объект становится некорректным и все строки результата - тоже.
-  * Если задать следующий запрос в соединении, то объект и все строки тоже становятся некорректными.
-  * Использовать объект можно только для результата одного запроса!
-  * (При попытке присвоить объекту результат следующего запроса - UB.)
-  */
-class StoreQueryResult : public std::vector<Row>, public ResultBase
-{
-public:
-    StoreQueryResult(MYSQL_RES * res_, Connection * conn_, const Query * query_);
-
-    size_t num_rows() const { return size(); }
-
-private:
-
-    /** Не смотря на то, что весь результат выполнения запроса загружается на клиента,
-      *  и все указатели MYSQL_ROW на отдельные строки различные,
-      *  при этом функция mysql_fetch_lengths() возвращает длины
-      *  для текущей строки по одному и тому же адресу.
-      * То есть, чтобы можно было пользоваться несколькими Row одновременно,
-      *  необходимо заранее куда-то сложить все длины.
-      */
-    using Lengths = std::vector<MYSQL_LENGTH>;
-    Lengths lengths;
-};
-
-}

From 940b337eec4bad040f38e656ba288d3a787be038 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 3 Feb 2021 10:03:32 +0300
Subject: [PATCH 0565/1238] Remove useless files

---
 base/mysqlxx/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/base/mysqlxx/CMakeLists.txt b/base/mysqlxx/CMakeLists.txt
index b410c38cfad..849c58a8527 100644
--- a/base/mysqlxx/CMakeLists.txt
+++ b/base/mysqlxx/CMakeLists.txt
@@ -3,7 +3,6 @@ add_library (mysqlxx
     Exception.cpp
     Query.cpp
     ResultBase.cpp
-    StoreQueryResult.cpp
     UseQueryResult.cpp
     Row.cpp
     Value.cpp

From c334bdca1f3a865425d4886bf3c543fbeb6f77d1 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 3 Feb 2021 10:11:19 +0300
Subject: [PATCH 0566/1238] Fix NuKeeper server parameters

---
 src/Coordination/NuKeeperServer.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index bb74ea19aa7..1d99bf54ec8 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -45,9 +45,9 @@ void NuKeeperServer::addServer(int server_id_, const std::string & server_uri_,
 void NuKeeperServer::startup()
 {
     nuraft::raft_params params;
-    params.heart_beat_interval_ = 1000;
-    params.election_timeout_lower_bound_ = 500;
-    params.election_timeout_upper_bound_ = 1000;
+    params.heart_beat_interval_ = 500;
+    params.election_timeout_lower_bound_ = 1000;
+    params.election_timeout_upper_bound_ = 2000;
     params.reserved_log_items_ = 5000;
     params.snapshot_distance_ = 5000;
     params.client_req_timeout_ = 10000;
@@ -184,7 +184,7 @@ NuKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const NuKeeper
                 auto response = request->makeResponse();
                 response->xid = request->xid;
                 response->zxid = 0; /// FIXME what we can do with it?
-                response->error = Coordination::Error::ZSESSIONEXPIRED;
+                response->error = Coordination::Error::ZOPERATIONTIMEOUT;
                 responses.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
             }
             return responses;

From 0c3ef018bbd62f8c8570bb6649427d716bc8af88 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 3 Feb 2021 10:15:57 +0300
Subject: [PATCH 0567/1238] Fix ya.make

---
 src/Coordination/ya.make | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Coordination/ya.make b/src/Coordination/ya.make
index 470fe8c75be..f3eae68806c 100644
--- a/src/Coordination/ya.make
+++ b/src/Coordination/ya.make
@@ -8,8 +8,6 @@ PEERDIR(
 )
 
 SRCS(
-    NuKeeperStorageDispatcher.cpp
-
 )
 
 END()

From 3d522e5da9e1681180cbdf97bcc4e3785b08af2b Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Wed, 3 Feb 2021 10:56:53 +0300
Subject: [PATCH 0568/1238] Updated description

---
 docs/en/engines/database-engines/materialize-mysql.md | 1 +
 docs/en/operations/settings/settings.md               | 6 ++++--
 docs/en/sql-reference/statements/create/view.md       | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/en/engines/database-engines/materialize-mysql.md b/docs/en/engines/database-engines/materialize-mysql.md
index 89fe9304c4c..f78b6efba33 100644
--- a/docs/en/engines/database-engines/materialize-mysql.md
+++ b/docs/en/engines/database-engines/materialize-mysql.md
@@ -93,6 +93,7 @@ ClickHouse has only one physical order, which is determined by `ORDER BY` clause
 - Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine.
 - Replication can be easily broken.
 - Manual operations on database and tables are forbidden.
+- `MaterializeMySQL` is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) settings. There is a merge of data in the corresponding table in `MaterializeMySQL` database when a table in MySQL server changes.
 
 ## Examples of Use {#examples-of-use}
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 09328015712..99bd8d879d1 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2594,7 +2594,7 @@ Default value: `16`.
 
 ## optimize_on_insert {#optimize-on-insert}
 
-Do the same transformation for inserted block of data as if merge was done on this block.
+Do the same transformation for inserted block of data as if merge was done on this block (e.g. replacing, collapsing, aggregating...).
 
 Possible values:
 
@@ -2605,7 +2605,7 @@ Default value: 1.
 
 **Example**
 
-The difference enabled and disabled:
+The difference between enabled and disabled:
 
 Query:
 
@@ -2644,4 +2644,6 @@ Result:
 └─────────────┘
 ```
 
+Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
+
 [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index e5b92bf187a..bca06ff87f9 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -59,7 +59,7 @@ A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Note
 
 The execution of [ALTER](../../../sql-reference/statements/alter/index.md) queries on materialized views has limitations, so they might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view.
 
-Note that Materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) settings, because there is a merge of data before the insertion in a view.
+Note that materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) settings. There is a merge of data before the insertion in a view.
 
 Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query.
 

From 678f53eaef9987f1411a8558636c170645977412 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Wed, 3 Feb 2021 11:15:25 +0300
Subject: [PATCH 0569/1238] Fixed a link

---
 docs/en/engines/database-engines/materialize-mysql.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/database-engines/materialize-mysql.md b/docs/en/engines/database-engines/materialize-mysql.md
index f78b6efba33..6f92e484a28 100644
--- a/docs/en/engines/database-engines/materialize-mysql.md
+++ b/docs/en/engines/database-engines/materialize-mysql.md
@@ -157,4 +157,4 @@ SELECT * FROM mysql.test;
 └───┴─────┴──────┘
 ```
 
-[Original article](https://clickhouse.tech/docs/en/database_engines/materialize-mysql/) <!--hide-->
+[Original article](https://clickhouse.tech/docs/en/engines/database-engines/materialize-mysql/) <!--hide-->

From 004c33847e20c653783a48792bd77d249516be4b Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Wed, 3 Feb 2021 11:17:33 +0300
Subject: [PATCH 0570/1238] Added original article

---
 docs/en/sql-reference/statements/create/view.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index bca06ff87f9..596f52202e2 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -64,3 +64,5 @@ Note that materialized view is influenced by [optimize_on_insert](../../../opera
 Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query.
 
 There isn’t a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md).
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/create/view/) <!--hide-->

From b1a43b9d853a445124a548d6a8b412da0b85d792 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Wed, 3 Feb 2021 11:20:32 +0300
Subject: [PATCH 0571/1238] fixed link

---
 docs/en/engines/database-engines/materialize-mysql.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/database-engines/materialize-mysql.md b/docs/en/engines/database-engines/materialize-mysql.md
index 6f92e484a28..e459132c0ae 100644
--- a/docs/en/engines/database-engines/materialize-mysql.md
+++ b/docs/en/engines/database-engines/materialize-mysql.md
@@ -93,7 +93,7 @@ ClickHouse has only one physical order, which is determined by `ORDER BY` clause
 - Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine.
 - Replication can be easily broken.
 - Manual operations on database and tables are forbidden.
-- `MaterializeMySQL` is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) settings. There is a merge of data in the corresponding table in `MaterializeMySQL` database when a table in MySQL server changes.
+- `MaterializeMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) settings. There is a merge of data in the corresponding table in `MaterializeMySQL` database when a table in MySQL server changes.
 
 ## Examples of Use {#examples-of-use}
 

From 5e11e13351c194bc88e2e7f0686e7f2ebf377c19 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Wed, 3 Feb 2021 12:05:47 +0300
Subject: [PATCH 0572/1238] Update version_date.tsv after release 20.11.7.16

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index fc1cf7c1b67..9c4b9097622 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -3,6 +3,7 @@ v20.12.5.14-stable	2020-12-28
 v20.12.4.5-stable	2020-12-24
 v20.12.3.3-stable	2020-12-09
 v20.12.2.1-stable	2020-12-09
+v20.11.7.16-stable	2021-02-03
 v20.11.6.6-stable	2020-12-24
 v20.11.5.18-stable	2020-12-06
 v20.11.4.13-stable	2020-11-20

From ebdea09ec07540df4db62019468a86d2af3e69c9 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Wed, 3 Feb 2021 12:08:04 +0300
Subject: [PATCH 0573/1238] Update version_date.tsv after release 21.1.3.32

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 9c4b9097622..3b498530b37 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v21.1.3.32-stable	2021-02-03
 v21.1.2.15-stable	2021-01-18
 v20.12.5.14-stable	2020-12-28
 v20.12.4.5-stable	2020-12-24

From dad4f82e0e53ab687fa2583ae5b4c75651de05b1 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 3 Feb 2021 12:08:46 +0300
Subject: [PATCH 0574/1238] fix invalid iterator addition

---
 src/Processors/Transforms/WindowTransform.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index cc526eba8c4..3dcd0a91bca 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -910,7 +910,7 @@ void WindowTransform::work()
 //            first_used_block);
 
         blocks.erase(blocks.begin(),
-            blocks.begin() + first_used_block - first_block_number);
+            blocks.begin() + (first_used_block - first_block_number));
         first_block_number = first_used_block;
 
         assert(next_output_block_number >= first_block_number);

From 3c0a5d222663fb9df136633a303845bcdaf60600 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 3 Feb 2021 12:08:46 +0300
Subject: [PATCH 0575/1238] fix invalid iterator addition

---
 src/Processors/Transforms/WindowTransform.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 388e7a4af3b..fe55fe6bcad 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -659,7 +659,7 @@ void WindowTransform::work()
 //            first_used_block);
 
         blocks.erase(blocks.begin(),
-            blocks.begin() + first_used_block - first_block_number);
+            blocks.begin() + (first_used_block - first_block_number));
         first_block_number = first_used_block;
 
         assert(next_output_block_number >= first_block_number);

From ebab5df236a2e800f8995e43c847f96158dab7ad Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 3 Feb 2021 12:13:44 +0300
Subject: [PATCH 0576/1238] Update ya.make

---
 src/Functions/ya.make | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Functions/ya.make b/src/Functions/ya.make
index b97a4a187e9..647bbbb47cb 100644
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@@ -452,6 +452,7 @@ SRCS(
     timeSlot.cpp
     timeSlots.cpp
     timezone.cpp
+    timezoneOffset.cpp
     toColumnTypeName.cpp
     toCustomWeek.cpp
     toDayOfMonth.cpp

From 53052f70403ce8f86ac7ec14947b575455a0a28e Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Wed, 3 Feb 2021 12:22:02 +0300
Subject: [PATCH 0577/1238] Update group_by_sundy_li.xml

---
 tests/performance/group_by_sundy_li.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/group_by_sundy_li.xml b/tests/performance/group_by_sundy_li.xml
index a022692a017..c49712a8519 100644
--- a/tests/performance/group_by_sundy_li.xml
+++ b/tests/performance/group_by_sundy_li.xml
@@ -12,7 +12,7 @@
         )
         ENGINE = MergeTree
         PARTITION BY d
-        ORDER BY d, n
+        ORDER BY (d, n)
     </create_query>
 
     <fill_query>insert into a select '2000-01-01', ['aa','bb','cc','dd'][number % 4 + 1], number from  numbers_mt(100000000)</fill_query>

From def3c8fef7fe40077d9a4fed9be1fe8e474c8a76 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Wed, 3 Feb 2021 12:26:09 +0300
Subject: [PATCH 0578/1238] Update version_date.tsv after release 20.12.5.18

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 3b498530b37..c4b27f3199d 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,5 +1,6 @@
 v21.1.3.32-stable	2021-02-03
 v21.1.2.15-stable	2021-01-18
+v20.12.5.18-stable	2021-02-03
 v20.12.5.14-stable	2020-12-28
 v20.12.4.5-stable	2020-12-24
 v20.12.3.3-stable	2020-12-09

From 02da869d10e04f0031a2a28e4128a4730e555325 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 3 Feb 2021 12:46:41 +0300
Subject: [PATCH 0579/1238] Remove useless code

---
 base/common/LocalDate.h     |  8 --------
 base/common/LocalDateTime.h | 14 --------------
 2 files changed, 22 deletions(-)

diff --git a/base/common/LocalDate.h b/base/common/LocalDate.h
index a063d6e98a3..e5ebe877bc5 100644
--- a/base/common/LocalDate.h
+++ b/base/common/LocalDate.h
@@ -168,14 +168,6 @@ public:
 static_assert(sizeof(LocalDate) == 4);
 
 
-inline std::ostream & operator<< (std::ostream & ostr, const LocalDate & date)
-{
-    return ostr << date.year()
-        << '-' << (date.month() / 10) << (date.month() % 10)
-        << '-' << (date.day() / 10) << (date.day() % 10);
-}
-
-
 namespace std
 {
 inline string to_string(const LocalDate & date)
diff --git a/base/common/LocalDateTime.h b/base/common/LocalDateTime.h
index d19d862f2ca..0e237789bd1 100644
--- a/base/common/LocalDateTime.h
+++ b/base/common/LocalDateTime.h
@@ -169,20 +169,6 @@ public:
 static_assert(sizeof(LocalDateTime) == 8);
 
 
-inline std::ostream & operator<< (std::ostream & ostr, const LocalDateTime & datetime)
-{
-    ostr << std::setfill('0') << std::setw(4) << datetime.year();
-
-    ostr << '-' << (datetime.month() / 10) << (datetime.month() % 10)
-        << '-' << (datetime.day() / 10) << (datetime.day() % 10)
-        << ' ' << (datetime.hour() / 10) << (datetime.hour() % 10)
-        << ':' << (datetime.minute() / 10) << (datetime.minute() % 10)
-        << ':' << (datetime.second() / 10) << (datetime.second() % 10);
-
-    return ostr;
-}
-
-
 namespace std
 {
 inline string to_string(const LocalDateTime & datetime)

From 5796b6fbf9da32b1e377dc0a8e5eada3f4adde31 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 3 Feb 2021 13:22:08 +0300
Subject: [PATCH 0580/1238] update changelog

---
 CHANGELOG.md                        |  32 ++++++
 docs/en/whats-new/changelog/2020.md | 151 ++++++++++++++++++++++++++++
 2 files changed, 183 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b328dcf5c88..0fcd75639ce 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,37 @@
 ## ClickHouse release 21.1
 
+### ClickHouse release v21.1.3.32-stable, 2021-02-03
+
+#### Bug Fix
+
+* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)).
+* Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix crash when pushing down predicates to union distinct subquery. This fixes [#19855](https://github.com/ClickHouse/ClickHouse/issues/19855). [#19861](https://github.com/ClickHouse/ClickHouse/pull/19861) ([Amos Bird](https://github.com/amosbird)).
+* Fix filtering by UInt8 greater than 127. [#19799](https://github.com/ClickHouse/ClickHouse/pull/19799) ([Anton Popov](https://github.com/CurtizJ)).
+* In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([tavplubix](https://github.com/tavplubix)).
+* Fix crash when nested column name was used in `WHERE` or `PREWHERE`. Fixes [#19755](https://github.com/ClickHouse/ClickHouse/issues/19755). [#19763](https://github.com/ClickHouse/ClickHouse/pull/19763) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)).
+* Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)).
+* `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([tavplubix](https://github.com/tavplubix)).
+* Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)).
+* Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Uninitialized memory read was possible in encrypt/decrypt functions if empty string was passed as IV. This closes [#19391](https://github.com/ClickHouse/ClickHouse/issues/19391). [#19397](https://github.com/ClickHouse/ClickHouse/pull/19397) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Added `cast`, `accurateCast`, `accurateCastOrNull` performance tests. [#19354](https://github.com/ClickHouse/ClickHouse/pull/19354) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)).
+* Fixed possible wrong result or segfault on aggregation when Materialized View and its target table have different structure. Fixes [#18063](https://github.com/ClickHouse/ClickHouse/issues/18063). [#19322](https://github.com/ClickHouse/ClickHouse/pull/19322) ([tavplubix](https://github.com/tavplubix)).
+* Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)).
+* Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([tavplubix](https://github.com/tavplubix)).
+* Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)).
+* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)).
+
+
+
 ### ClickHouse release v21.1.2.15-stable 2021-01-18
 
 #### Backward Incompatible Change
diff --git a/docs/en/whats-new/changelog/2020.md b/docs/en/whats-new/changelog/2020.md
index 5975edd3c6c..bf4e4fb0fcc 100644
--- a/docs/en/whats-new/changelog/2020.md
+++ b/docs/en/whats-new/changelog/2020.md
@@ -5,6 +5,22 @@ toc_title: '2020'
 
 ### ClickHouse release 20.12
 
+### ClickHouse release v20.12.5.14-stable, 2020-12-28
+
+#### Bug Fix
+
+* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)).
+* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)).
+* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix error when query `MODIFY COLUMN ... REMOVE TTL` doesn't actually remove column TTL. [#18130](https://github.com/ClickHouse/ClickHouse/pull/18130) ([alesapin](https://github.com/alesapin)).
+
+#### Build/Testing/Packaging Improvement
+
+* Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)).
+
+
 ### ClickHouse release v20.12.4.5-stable, 2020-12-24
 
 #### Bug Fix
@@ -142,6 +158,70 @@ toc_title: '2020'
 
 ## ClickHouse release 20.11
 
+### ClickHouse release v20.11.7.16-stable, 2021-03-02
+
+#### Improvement
+
+* Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)).
+
+#### Bug Fix
+
+* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)).
+* Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([tavplubix](https://github.com/tavplubix)).
+* Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)).
+* Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)).
+* `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([tavplubix](https://github.com/tavplubix)).
+* Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)).
+* Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)).
+* Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)).
+* Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)).
+* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)).
+* Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([tavplubix](https://github.com/tavplubix)).
+* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)).
+* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([tavplubix](https://github.com/tavplubix)).
+* Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)).
+* Attach partition should reset the mutation. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)).
+* Fix possible hang at shutdown in clickhouse-local. This fixes [#18891](https://github.com/ClickHouse/ClickHouse/issues/18891). [#18893](https://github.com/ClickHouse/ClickHouse/pull/18893) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix *If combinator with unary function and Nullable types. [#18806](https://github.com/ClickHouse/ClickHouse/pull/18806) ([Azat Khuzhin](https://github.com/azat)).
+* Asynchronous distributed INSERTs can be rejected by the server if the setting `network_compression_method` is globally set to non-default value. This fixes [#18741](https://github.com/ClickHouse/ClickHouse/issues/18741). [#18776](https://github.com/ClickHouse/ClickHouse/pull/18776) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)).
+* Fix Logger with unmatched arg size. [#18717](https://github.com/ClickHouse/ClickHouse/pull/18717) ([sundyli](https://github.com/sundy-li)).
+* Add FixedString Data type support. I'll get this exception "Code: 50, e.displayText() = DB::Exception: Unsupported type FixedString(1)" when replicating data from MySQL to ClickHouse. This patch fixes bug [#18450](https://github.com/ClickHouse/ClickHouse/issues/18450) Also fixes [#6556](https://github.com/ClickHouse/ClickHouse/issues/6556). [#18553](https://github.com/ClickHouse/ClickHouse/pull/18553) ([awesomeleo](https://github.com/awesomeleo)).
+* Fix possible `Pipeline stuck` error while using `ORDER BY` after subquery with `RIGHT` or `FULL` join. [#18550](https://github.com/ClickHouse/ClickHouse/pull/18550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix bug which may lead to `ALTER` queries hung after corresponding mutation kill. Found by thread fuzzer. [#18518](https://github.com/ClickHouse/ClickHouse/pull/18518) ([alesapin](https://github.com/alesapin)).
+* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)).
+* Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)).
+* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)).
+* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)).
+* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)).
+* `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)).
+* `SELECT JOIN` now requires the `SELECT` privilege on each of the joined tables. This PR fixes [#17654](https://github.com/ClickHouse/ClickHouse/issues/17654). [#18232](https://github.com/ClickHouse/ClickHouse/pull/18232) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix possible incomplete query result while reading from `MergeTree*` in case of read backoff (message `<Debug> MergeTreeReadPool: Will lower number of threads` in logs). Was introduced in [#16423](https://github.com/ClickHouse/ClickHouse/issues/16423). Fixes [#18137](https://github.com/ClickHouse/ClickHouse/issues/18137). [#18216](https://github.com/ClickHouse/ClickHouse/pull/18216) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix error when query `MODIFY COLUMN ... REMOVE TTL` doesn't actually remove column TTL. [#18130](https://github.com/ClickHouse/ClickHouse/pull/18130) ([alesapin](https://github.com/alesapin)).
+* Fix indeterministic functions with predicate optimizer. This fixes [#17244](https://github.com/ClickHouse/ClickHouse/issues/17244). [#17273](https://github.com/ClickHouse/ClickHouse/pull/17273) ([Winter Zhang](https://github.com/zhang2014)).
+* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)).
+
+#### Build/Testing/Packaging Improvement
+
+* Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)).
+
+
+
 ### ClickHouse release v20.11.6.6-stable, 2020-12-24
 
 #### Bug Fix
@@ -588,6 +668,60 @@ toc_title: '2020'
 
 ## ClickHouse release 20.9
 
+### ClickHouse release v20.9.7.11-stable, 2020-12-07
+
+#### Performance Improvement
+
+* Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)).
+
+#### Bug Fix
+
+* Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)).
+* Fixed segfault when there is not enough space when inserting into `Distributed` table. [#17737](https://github.com/ClickHouse/ClickHouse/pull/17737) ([tavplubix](https://github.com/tavplubix)).
+* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)).
+* Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([tavplubix](https://github.com/tavplubix)).
+* When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)).
+* Fix bug when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)).
+* Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246) . [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)).
+* Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)).
+* Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([tavplubix](https://github.com/tavplubix)).
+* Bug fix for funciton fuzzBits, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)).
+* Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)).
+* TODO. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([tavplubix](https://github.com/tavplubix)).
+* Return number of affected rows for INSERT queries via MySQL protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)).
+
+#### Build/Testing/Packaging Improvement
+
+* Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)).
+
+
+### ClickHouse release v20.9.6.14-stable, 2020-11-20
+
+#### Improvement
+
+* Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)).
+* Conditional aggregate functions (for example: `avgIf`, `sumIf`, `maxIf`) should return `NULL` when miss rows and use nullable arguments. [#13964](https://github.com/ClickHouse/ClickHouse/pull/13964) ([Winter Zhang](https://github.com/zhang2014)).
+
+#### Bug Fix
+
+* Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)).
+* Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)).
+* Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)).
+* Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)).
+* Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Abort multipart upload if no data was written to WriteBufferFromS3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)).
+* Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)).
+* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)).
+* This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)).
+* fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) fix remote query failure when using 'if' suffix aggregate function. [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)).
+* Query is finished faster in case of exception. Cancel execution on remote replicas if exception happens. [#15578](https://github.com/ClickHouse/ClickHouse/pull/15578) ([Azat Khuzhin](https://github.com/azat)).
+
+
 ### ClickHouse release v20.9.5.5-stable, 2020-11-13
 
 #### Bug Fix
@@ -744,6 +878,23 @@ toc_title: '2020'
 
 ## ClickHouse release 20.8
 
+### ClickHouse release v20.8.12.2-lts, 2021-01-16
+
+#### Bug Fix
+
+* Fix *If combinator with unary function and Nullable types. [#18806](https://github.com/ClickHouse/ClickHouse/pull/18806) ([Azat Khuzhin](https://github.com/azat)).
+* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)).
+
+
+### ClickHouse release v20.8.11.17-lts, 2020-12-25
+
+#### Bug Fix
+
+* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)).
+* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)).
+* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)).
+
+
 ### ClickHouse release v20.8.10.13-lts, 2020-12-24
 
 #### Bug Fix

From b24f73fd886cbca37b522649cbc1d2604935be9c Mon Sep 17 00:00:00 2001
From: tavplubix <avtokmakov@yandex-team.ru>
Date: Wed, 3 Feb 2021 13:27:41 +0300
Subject: [PATCH 0581/1238] remove outdated comment

---
 src/IO/PeekableReadBuffer.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h
index c914ff2d6d3..e425f9bc953 100644
--- a/src/IO/PeekableReadBuffer.h
+++ b/src/IO/PeekableReadBuffer.h
@@ -38,11 +38,6 @@ public:
             peeked_size = 0;
         }
         checkpoint.emplace(pos);
-
-        // FIXME: we are checking checkpoint existence in few places (rollbackToCheckpoint/dropCheckpoint)
-        // by simple if(checkpoint) but checkpoint can be nullptr after
-        // setCheckpoint called on empty (non initialized/eof) buffer
-        // and we can't just use simple if(checkpoint)
     }
 
     /// Forget checkpoint and all data between checkpoint and position

From 73002eb3b7927cd9b7d97d62e1f833de2a217f09 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 3 Feb 2021 13:29:44 +0300
Subject: [PATCH 0582/1238] Fix constant propagation.

---
 src/Interpreters/ActionsDAG.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 4bed322076a..167e7a0a2ec 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -126,7 +126,6 @@ ActionsDAG::Node & ActionsDAG::addAlias(Node & child, std::string alias, bool ca
     node.result_type = child.result_type;
     node.result_name = std::move(alias);
     node.column = child.column;
-    node.allow_constant_folding = child.allow_constant_folding;
     node.children.emplace_back(&child);
 
     return addNode(std::move(node), can_replace);
@@ -194,7 +193,6 @@ ActionsDAG::Node & ActionsDAG::addFunction(
     for (size_t i = 0; i < num_arguments; ++i)
     {
         auto & child = *node.children[i];
-        node.allow_constant_folding = node.allow_constant_folding && child.allow_constant_folding;
 
         ColumnWithTypeAndName argument;
         argument.column = child.column;
@@ -359,10 +357,15 @@ void ActionsDAG::removeUnusedActions()
         stack.push(node);
     }
 
-    /// We cannot remove arrayJoin because it changes the number of rows.
     for (auto & node : nodes)
     {
-        if (node.type == ActionType::ARRAY_JOIN && visited_nodes.count(&node) == 0)
+        /// We cannot remove function with side effects even if it returns constant (e.g. ignore(...)).
+        bool prevent_constant_folding = node.column && isColumnConst(*node.column) && !node.allow_constant_folding;
+        /// We cannot remove arrayJoin because it changes the number of rows.
+        bool is_array_join = node.type == ActionType::ARRAY_JOIN;
+
+        bool must_keep_node = is_array_join || prevent_constant_folding;
+        if (must_keep_node && visited_nodes.count(&node) == 0)
         {
             visited_nodes.insert(&node);
             stack.push(&node);
@@ -420,7 +423,6 @@ void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector<Node *
             node.result_type = child->result_type;
             node.result_name = std::move(item.second);
             node.column = child->column;
-            node.allow_constant_folding = child->allow_constant_folding;
             node.children.emplace_back(child);
 
             auto & alias = addNode(std::move(node), true);

From 45aee71fffea2268dcb611b8a6aadaf098c16425 Mon Sep 17 00:00:00 2001
From: hexiaoting <hewenting_ict@163.com>
Date: Wed, 3 Feb 2021 18:52:20 +0800
Subject: [PATCH 0583/1238] Modified some implementation

---
 src/Interpreters/CollectJoinOnKeysVisitor.cpp | 29 ++++++++++---------
 src/Interpreters/CollectJoinOnKeysVisitor.h   |  3 +-
 src/Interpreters/TreeRewriter.cpp             |  6 ++--
 ...conditions_from_join_on_to_where.reference | 16 ++++++++++
 ..._move_conditions_from_join_on_to_where.sql |  9 ++++++
 5 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
index a17f68fbf75..99b8e24ff59 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
@@ -78,9 +78,11 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
     {
         ASTPtr left = func.arguments->children.at(0);
         ASTPtr right = func.arguments->children.at(1);
-        auto table_numbers = getTableNumbers(left, right, data);
-        if (table_numbers.first != 0)
+        bool need_optimize = false;
+        auto table_numbers = getTableNumbers(left, right, data, &need_optimize);
+        if (!need_optimize)
         {
+            // related to two different tables
             data.addJoinKeys(left, right, table_numbers);
             if (!data.new_on_expression)
                 data.new_on_expression = ast->clone();
@@ -93,8 +95,6 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
                 data.new_where_conditions = ast->clone();
             else
                 data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone());
-
-            data.move_to_where = true;
         }
 
     }
@@ -104,7 +104,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
         {
             ASTPtr left = func.arguments->children.at(0);
             ASTPtr right = func.arguments->children.at(1);
-            auto table_numbers = getTableNumbers(left, right, data);
+            bool need_optimize_unused = false;
+            auto table_numbers = getTableNumbers(left, right, data, &need_optimize_unused);
             if (table_numbers.first != 0)
             {
                 throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'",
@@ -116,8 +117,6 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
                     data.new_where_conditions = ast->clone();
                 else
                     data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone());
-
-                data.move_to_where = true;
             }
         }
 
@@ -127,7 +126,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
 
         ASTPtr left = func.arguments->children.at(0);
         ASTPtr right = func.arguments->children.at(1);
-        auto table_numbers = getTableNumbers(left, right, data);
+        bool need_optimize_unused;
+        auto table_numbers = getTableNumbers(left, right, data, &need_optimize_unused);
 
         data.addAsofJoinKeys(left, right, table_numbers, inequality);
     }
@@ -153,7 +153,7 @@ void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector<co
 }
 
 std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast,
-                                                                    Data & data)
+                                                                    Data & data, bool *need_optimize)
 {
     std::vector<const ASTIdentifier *> left_identifiers;
     std::vector<const ASTIdentifier *> right_identifiers;
@@ -162,17 +162,18 @@ std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr
     getIdentifiers(right_ast, right_identifiers);
 
     if (left_identifiers.empty() || right_identifiers.empty())
-        return std::make_pair(0, 0);
+    {
+        *need_optimize = true;
+        return {0, 0};
+    }
 
     size_t left_idents_table = getTableForIdentifiers(left_identifiers, data);
     size_t right_idents_table = getTableForIdentifiers(right_identifiers, data);
 
     if (left_idents_table && left_idents_table == right_idents_table)
     {
-        auto left_name = queryToString(*left_identifiers[0]);
-        auto right_name = queryToString(*right_identifiers[0]);
-
-        return std::make_pair(0, 0);
+        *need_optimize = true;
+        return {0, 0};
     }
 
     return std::make_pair(left_idents_table, right_idents_table);
diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h
index 2c2d731a4d7..050acb87ae2 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.h
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.h
@@ -34,7 +34,6 @@ public:
         ASTPtr asof_right_key{};
         ASTPtr new_on_expression{};
         ASTPtr new_where_conditions{};
-        bool move_to_where{false};
         bool has_some{false};
 
         void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair<size_t, size_t> & table_no);
@@ -60,7 +59,7 @@ private:
     static void visit(const ASTFunction & func, const ASTPtr & ast, Data & data);
 
     static void getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out);
-    static std::pair<size_t, size_t> getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data);
+    static std::pair<size_t, size_t> getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data, bool *need_optimize);
     static const ASTIdentifier * unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases);
     static size_t getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data);
 };
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index fdb78aad021..7a194df8f30 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -425,9 +425,9 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele
                             ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
         if (is_asof)
             data.asofToJoinKeys();
-        else if (data.move_to_where)
+        else if (data.new_where_conditions != nullptr)
         {
-            table_join.on_expression = (data.new_on_expression)->clone();
+            table_join.on_expression = data.new_on_expression;
             new_where_conditions = data.new_where_conditions;
         }
     }
@@ -438,7 +438,7 @@ void moveJoinedKeyToWhere(ASTSelectQuery * select_query, ASTPtr & new_where_cond
 {
     if (select_query->where())
         select_query->setExpression(ASTSelectQuery::Expression::WHERE,
-            makeASTFunction("and", new_where_conditions->clone(), select_query->where()->clone()));
+            makeASTFunction("and", new_where_conditions, select_query->where()));
     else
         select_query->setExpression(ASTSelectQuery::Expression::WHERE, new_where_conditions->clone());
 }
diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
index a58aa254891..4f4909a0cb5 100644
--- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
+++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
@@ -60,3 +60,19 @@ ALL INNER JOIN
 ) AS table2 ON a = table2.a
 WHERE 0
 ---------Q6----------
+---------Q7----------
+0	0	0	0
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL INNER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON a = table2.a
+WHERE (table2.b < toUInt32(40)) AND (b < 1)
diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
index 5b861ecfe82..9ec8f0fe156 100644
--- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
+++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
@@ -1,3 +1,6 @@
+DROP DATABASE IF EXISTS test_01653;
+CREATE DATABASE test_01653;
+USE test_01653;
 DROP TABLE IF EXISTS table1;
 DROP TABLE IF EXISTS table2;
 
@@ -29,5 +32,11 @@ EXPLAIN SYNTAX SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.a =
 SELECT '---------Q6----------';
 SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.b = 6) AND (table2.b > 20); -- { serverError 403 } 
 
+SELECT '---------Q7----------';
+SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b < 1;
+EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b < 1;
+SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b > 10;
+
 DROP TABLE table1;
 DROP TABLE table2;
+DROP DATABASE test_01653;

From e5fb59851deccd0d07e9643997540fa7e3b6fd9d Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 3 Feb 2021 14:06:57 +0300
Subject: [PATCH 0584/1238] Update string-search-functions.md

---
 docs/en/sql-reference/functions/string-search-functions.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 4036974dd37..92591c89a37 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -14,8 +14,6 @@ The search is case-sensitive by default in all these functions. There are separa
 
 Returns the position (in bytes) of the found substring in the string, starting from 1.
 
-Works under the assumption that the string contains a set of bytes representing a single-byte encoded text. If this assumption is not met and a character can’t be represented using a single byte, the function doesn’t throw an exception and returns some unexpected result. If character can be represented using two bytes, it will use two bytes and so on.
-
 For a case-insensitive search, use the function [positionCaseInsensitive](#positioncaseinsensitive).
 
 **Syntax**

From cd1f11c85ac85a14dee55d29e623c4e923562347 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 3 Feb 2021 14:07:14 +0300
Subject: [PATCH 0585/1238] Update string-search-functions.md

---
 docs/ru/sql-reference/functions/string-search-functions.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md
index e8cbb8deec4..b7193da6f33 100644
--- a/docs/ru/sql-reference/functions/string-search-functions.md
+++ b/docs/ru/sql-reference/functions/string-search-functions.md
@@ -13,8 +13,6 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043f\u043e\u0438\u
 
 Возвращает позицию (в байтах) найденной подстроки в строке, начиная с 1, или 0, если подстрока не найдена.
 
-Работает при допущении, что строка содержит набор байт, представляющий текст в однобайтовой кодировке. Если допущение не выполнено — то возвращает неопределенный результат (не кидает исключение). Если символ может быть представлен с помощью двух байтов, он будет представлен двумя байтами и так далее.
-
 Для поиска без учета регистра используйте функцию [positionCaseInsensitive](#positioncaseinsensitive).
 
 **Синтаксис**

From a976d682a9ea0f6fabab1d2e63da414c52580fb3 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 3 Feb 2021 14:09:23 +0300
Subject: [PATCH 0586/1238] Update CHANGELOG.md

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0fcd75639ce..f39d1ac697a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,6 @@
 #### Bug Fix
 
 * BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)).
-* Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fix crash when pushing down predicates to union distinct subquery. This fixes [#19855](https://github.com/ClickHouse/ClickHouse/issues/19855). [#19861](https://github.com/ClickHouse/ClickHouse/pull/19861) ([Amos Bird](https://github.com/amosbird)).
 * Fix filtering by UInt8 greater than 127. [#19799](https://github.com/ClickHouse/ClickHouse/pull/19799) ([Anton Popov](https://github.com/CurtizJ)).
 * In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([alexey-milovidov](https://github.com/alexey-milovidov)).

From 61df1dd872e2484fe95edfd68d9a541b6fcc0db5 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 3 Feb 2021 14:11:13 +0300
Subject: [PATCH 0587/1238] Update CHANGELOG.md

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f39d1ac697a..d2cc3e51997 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,7 +20,6 @@
 * Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Uninitialized memory read was possible in encrypt/decrypt functions if empty string was passed as IV. This closes [#19391](https://github.com/ClickHouse/ClickHouse/issues/19391). [#19397](https://github.com/ClickHouse/ClickHouse/pull/19397) ([alexey-milovidov](https://github.com/alexey-milovidov)).
 * Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([alexey-milovidov](https://github.com/alexey-milovidov)).
-* Added `cast`, `accurateCast`, `accurateCastOrNull` performance tests. [#19354](https://github.com/ClickHouse/ClickHouse/pull/19354) ([Maksim Kita](https://github.com/kitaisreal)).
 * Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)).
 * Fixed possible wrong result or segfault on aggregation when Materialized View and its target table have different structure. Fixes [#18063](https://github.com/ClickHouse/ClickHouse/issues/18063). [#19322](https://github.com/ClickHouse/ClickHouse/pull/19322) ([tavplubix](https://github.com/tavplubix)).
 * Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).

From b6094859c7b643514c82f2c81a86400a9b79108f Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Wed, 3 Feb 2021 14:26:24 +0300
Subject: [PATCH 0588/1238] Throw exception during restore DiskS3 if object
 doesn't have path in metadata.

---
 src/Disks/S3/DiskS3.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index f63ca4117db..f861cff2424 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -1149,10 +1149,7 @@ void DiskS3::processRestoreFiles(const String & source_bucket, const String & so
         /// Restore file if object has 'path' in metadata.
         auto path_entry = object_metadata.find("path");
         if (path_entry == object_metadata.end())
-        {
-            LOG_WARNING(&Poco::Logger::get("DiskS3"), "Skip key {} because it doesn't have 'path' in metadata", key);
-            continue;
-        }
+            throw Exception("Failed to restore key " + key + " because it doesn't have 'path' in metadata", ErrorCodes::S3_ERROR);
 
         const auto & path = path_entry->second;
 

From b5b68f66a9cb32ad019d02e89ab632842ce574ac Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 3 Feb 2021 14:57:22 +0300
Subject: [PATCH 0589/1238] Colse input ports in DelayedPortsProcessor as soon
 as all outputs are finished.

---
 src/Processors/DelayedPortsProcessor.cpp | 59 ++++++++++++++++--------
 src/Processors/DelayedPortsProcessor.h   |  6 ++-
 2 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/src/Processors/DelayedPortsProcessor.cpp b/src/Processors/DelayedPortsProcessor.cpp
index cb181a4e4ac..d740ef08e5a 100644
--- a/src/Processors/DelayedPortsProcessor.cpp
+++ b/src/Processors/DelayedPortsProcessor.cpp
@@ -12,7 +12,7 @@ DelayedPortsProcessor::DelayedPortsProcessor(
     const Block & header, size_t num_ports, const PortNumbers & delayed_ports, bool assert_main_ports_empty)
     : IProcessor(InputPorts(num_ports, header),
                  OutputPorts((assert_main_ports_empty ? delayed_ports.size() : num_ports), header))
-    , num_delayed(delayed_ports.size())
+    , num_delayed_ports(delayed_ports.size())
 {
     port_pairs.resize(num_ports);
     output_to_pair.reserve(outputs.size());
@@ -36,21 +36,24 @@ DelayedPortsProcessor::DelayedPortsProcessor(
     }
 }
 
+void DelayedPortsProcessor::finishPair(PortsPair & pair)
+{
+    if (!pair.is_finished)
+    {
+        pair.is_finished = true;
+        ++num_finished_pairs;
+
+        if (pair.output_port)
+            ++num_finished_outputs;
+    }
+}
+
 bool DelayedPortsProcessor::processPair(PortsPair & pair)
 {
-    auto finish = [&]()
-    {
-        if (!pair.is_finished)
-        {
-            pair.is_finished = true;
-            ++num_finished;
-        }
-    };
-
     if (pair.output_port && pair.output_port->isFinished())
     {
         pair.input_port->close();
-        finish();
+        finishPair(pair);
         return false;
     }
 
@@ -58,7 +61,7 @@ bool DelayedPortsProcessor::processPair(PortsPair & pair)
     {
         if (pair.output_port)
             pair.output_port->finish();
-        finish();
+        finishPair(pair);
         return false;
     }
 
@@ -72,7 +75,7 @@ bool DelayedPortsProcessor::processPair(PortsPair & pair)
             throw Exception(ErrorCodes::LOGICAL_ERROR,
                             "Input port for DelayedPortsProcessor is assumed to have no data, but it has one");
 
-        pair.output_port->pushData(pair.input_port->pullData());
+        pair.output_port->pushData(pair.input_port->pullData(true));
     }
 
     return true;
@@ -80,7 +83,7 @@ bool DelayedPortsProcessor::processPair(PortsPair & pair)
 
 IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs)
 {
-    bool skip_delayed = (num_finished + num_delayed) < port_pairs.size();
+    bool skip_delayed = (num_finished_pairs + num_delayed_ports) < port_pairs.size();
     bool need_data = false;
 
     if (!are_inputs_initialized && !updated_outputs.empty())
@@ -95,9 +98,27 @@ IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_in
 
     for (const auto & output_number : updated_outputs)
     {
-        auto pair_num = output_to_pair[output_number];
-        if (!skip_delayed || !port_pairs[pair_num].is_delayed)
-            need_data = processPair(port_pairs[pair_num]) || need_data;
+        auto & pair = port_pairs[output_to_pair[output_number]];
+
+        /// Finish pair of ports earlier if possible.
+        if (!pair.is_finished && pair.output_port && pair.output_port->isFinished())
+            finishPair(pair);
+        else if (!skip_delayed || !pair.is_delayed)
+            need_data = processPair(pair) || need_data;
+    }
+
+    /// Do not wait for delayed ports if all output ports are finished.
+    if (num_finished_outputs == outputs.size())
+    {
+        for (auto & pair : port_pairs)
+        {
+            if (pair.output_port)
+                pair.output_port->finish();
+
+            pair.input_port->close();
+        }
+
+        return Status::Finished;
     }
 
     for (const auto & input_number : updated_inputs)
@@ -107,14 +128,14 @@ IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_in
     }
 
     /// In case if main streams are finished at current iteration, start processing delayed streams.
-    if (skip_delayed && (num_finished + num_delayed) >= port_pairs.size())
+    if (skip_delayed && (num_finished_pairs + num_delayed_ports) >= port_pairs.size())
     {
         for (auto & pair : port_pairs)
             if (pair.is_delayed)
                 need_data = processPair(pair) || need_data;
     }
 
-    if (num_finished == port_pairs.size())
+    if (num_finished_pairs == port_pairs.size())
         return Status::Finished;
 
     if (need_data)
diff --git a/src/Processors/DelayedPortsProcessor.h b/src/Processors/DelayedPortsProcessor.h
index 3e44c945bd4..a6a9590e0c8 100644
--- a/src/Processors/DelayedPortsProcessor.h
+++ b/src/Processors/DelayedPortsProcessor.h
@@ -28,13 +28,15 @@ private:
     };
 
     std::vector<PortsPair> port_pairs;
-    size_t num_delayed;
-    size_t num_finished = 0;
+    const size_t num_delayed_ports;
+    size_t num_finished_pairs = 0;
+    size_t num_finished_outputs = 0;
 
     std::vector<size_t> output_to_pair;
     bool are_inputs_initialized = false;
 
     bool processPair(PortsPair & pair);
+    void finishPair(PortsPair & pair);
 };
 
 }

From 62e96c138e59449764b2e2572790cdd910ec5d89 Mon Sep 17 00:00:00 2001
From: Pavel Kovalenko <jokserfn@yandex-team.ru>
Date: Wed, 3 Feb 2021 15:01:43 +0300
Subject: [PATCH 0590/1238] Remove unused error code in DiskS3

---
 src/Disks/S3/DiskS3.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index f861cff2424..42c022a3714 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -39,7 +39,6 @@ namespace ErrorCodes
     extern const int CANNOT_SEEK_THROUGH_FILE;
     extern const int UNKNOWN_FORMAT;
     extern const int INCORRECT_DISK_INDEX;
-    extern const int NOT_IMPLEMENTED;
     extern const int BAD_ARGUMENTS;
     extern const int PATH_ACCESS_DENIED;
     extern const int CANNOT_DELETE_DIRECTORY;

From b0f400ca1b63c30ba5c2bc57f95aec241fb07056 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Wed, 3 Feb 2021 15:31:45 +0300
Subject: [PATCH 0591/1238] Reset internal buffer position on next()

---
 src/IO/BufferBase.h       |  1 +
 src/IO/ConcatReadBuffer.h | 21 ++++++++++++---------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/IO/BufferBase.h b/src/IO/BufferBase.h
index c22dcbecf7b..198441d8bc1 100644
--- a/src/IO/BufferBase.h
+++ b/src/IO/BufferBase.h
@@ -40,6 +40,7 @@ public:
         inline Position end() const { return end_pos; }
         inline size_t size() const { return size_t(end_pos - begin_pos); }
         inline void resize(size_t size) { end_pos = begin_pos + size; }
+        inline bool empty() const { return size() == 0; }
 
         inline void swap(Buffer & other)
         {
diff --git a/src/IO/ConcatReadBuffer.h b/src/IO/ConcatReadBuffer.h
index 1df99429e93..c416b0fd892 100644
--- a/src/IO/ConcatReadBuffer.h
+++ b/src/IO/ConcatReadBuffer.h
@@ -25,11 +25,16 @@ protected:
             return false;
 
         /// First reading
-        if (working_buffer.size() == 0 && (*current)->hasPendingData())
+        if (working_buffer.empty())
         {
-            working_buffer = Buffer((*current)->position(), (*current)->buffer().end());
-            return true;
+            if ((*current)->hasPendingData())
+            {
+                working_buffer = Buffer((*current)->position(), (*current)->buffer().end());
+                return true;
+            }
         }
+        else
+            (*current)->position() = position();
 
         if (!(*current)->next())
         {
@@ -51,14 +56,12 @@ protected:
     }
 
 public:
-    ConcatReadBuffer(const ReadBuffers & buffers_) : ReadBuffer(nullptr, 0), buffers(buffers_), current(buffers.begin()) {}
-
-    ConcatReadBuffer(ReadBuffer & buf1, ReadBuffer & buf2) : ReadBuffer(nullptr, 0)
+    explicit ConcatReadBuffer(const ReadBuffers & buffers_) : ReadBuffer(nullptr, 0), buffers(buffers_), current(buffers.begin())
     {
-        buffers.push_back(&buf1);
-        buffers.push_back(&buf2);
-        current = buffers.begin();
+        assert(!buffers.empty());
     }
+
+    ConcatReadBuffer(ReadBuffer & buf1, ReadBuffer & buf2) : ConcatReadBuffer({&buf1, &buf2}) {}
 };
 
 }

From 72b0a18503569d240bd2e41355efd83120576d46 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Wed, 3 Feb 2021 15:37:32 +0300
Subject: [PATCH 0592/1238] Check for unread data on next()

---
 src/IO/ReadBuffer.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h
index 3d6eb6970ce..0b39a595075 100644
--- a/src/IO/ReadBuffer.h
+++ b/src/IO/ReadBuffer.h
@@ -55,6 +55,8 @@ public:
       */
     bool next()
     {
+        assert(!hasPendingData());
+
         bytes += offset();
         bool res = nextImpl();
         if (!res)
@@ -72,7 +74,7 @@ public:
             next();
     }
 
-    virtual ~ReadBuffer() {}
+    virtual ~ReadBuffer() = default;
 
 
     /** Unlike std::istream, it returns true if all data was read
@@ -192,7 +194,7 @@ private:
       */
     virtual bool nextImpl() { return false; }
 
-    [[noreturn]] void throwReadAfterEOF()
+    [[noreturn]] static inline void throwReadAfterEOF()
     {
         throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
     }

From f31d2206a74a7eb0afc784c0fd99367ac583ad1b Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 3 Feb 2021 15:50:25 +0300
Subject: [PATCH 0593/1238] more fuzzing and less bugs

---
 programs/client/QueryFuzzer.cpp               | 19 ++++++
 src/Interpreters/WindowDescription.cpp        | 68 +++++++++++++++++++
 src/Interpreters/WindowDescription.h          |  7 ++
 src/Parsers/ExpressionElementParsers.cpp      |  1 +
 src/Processors/QueryPlan/WindowStep.cpp       |  1 +
 src/Processors/Transforms/WindowTransform.cpp | 21 ++++--
 .../01591_window_functions.reference          |  5 ++
 .../0_stateless/01591_window_functions.sql    |  2 +
 8 files changed, 118 insertions(+), 6 deletions(-)

diff --git a/programs/client/QueryFuzzer.cpp b/programs/client/QueryFuzzer.cpp
index d569a185dba..05c20434820 100644
--- a/programs/client/QueryFuzzer.cpp
+++ b/programs/client/QueryFuzzer.cpp
@@ -366,6 +366,8 @@ void QueryFuzzer::fuzzWindowFrame(WindowFrame & frame)
         default:
             break;
     }
+
+    frame.is_default = (frame == WindowFrame{});
 }
 
 void QueryFuzzer::fuzz(ASTs & asts)
@@ -465,6 +467,23 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
 
         fuzz(select->children);
     }
+    /*
+     * The time to fuzz the settings has not yet come.
+     * Apparently we don't have any infractructure to validate the values of
+     * the settings, and the first query with max_block_size = -1 breaks
+     * because of overflows here and there.
+     *//*
+     * else if (auto * set = typeid_cast<ASTSetQuery *>(ast.get()))
+     * {
+     *      for (auto & c : set->changes)
+     *      {
+     *          if (fuzz_rand() % 50 == 0)
+     *          {
+     *              c.value = fuzzField(c.value);
+     *          }
+     *      }
+     * }
+     */
     else if (auto * literal = typeid_cast<ASTLiteral *>(ast.get()))
     {
         // There is a caveat with fuzzing the children: many ASTs also keep the
diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp
index bfb53ebb79f..3569df6fd17 100644
--- a/src/Interpreters/WindowDescription.cpp
+++ b/src/Interpreters/WindowDescription.cpp
@@ -33,4 +33,72 @@ std::string WindowDescription::dump() const
     return ss.str();
 }
 
+std::string WindowFrame::toString() const
+{
+    WriteBufferFromOwnString buf;
+    toString(buf);
+    return buf.str();
+}
+
+void WindowFrame::toString(WriteBuffer & buf) const
+{
+    buf << toString(type) << " BETWEEN ";
+    if (begin_type == BoundaryType::Current)
+    {
+        buf << "CURRENT ROW";
+    }
+    else if (begin_type == BoundaryType::Unbounded)
+    {
+        buf << "UNBOUNDED PRECEDING";
+    }
+    else
+    {
+        buf << abs(begin_offset);
+        buf << " "
+            << (begin_offset > 0 ? "FOLLOWING" : "PRECEDING");
+    }
+    buf << " AND ";
+    if (end_type == BoundaryType::Current)
+    {
+        buf << "CURRENT ROW";
+    }
+    else if (end_type == BoundaryType::Unbounded)
+    {
+        buf << "UNBOUNDED PRECEDING";
+    }
+    else
+    {
+        buf << abs(end_offset);
+        buf << " "
+            << (end_offset > 0 ? "FOLLOWING" : "PRECEDING");
+    }
+}
+
+void WindowFrame::checkValid() const
+{
+    if (begin_type == BoundaryType::Unbounded
+        || end_type == BoundaryType::Unbounded)
+    {
+        return;
+    }
+
+    if (begin_type == BoundaryType::Current
+        && end_type == BoundaryType::Offset
+        && end_offset > 0)
+    {
+        return;
+    }
+
+    if (end_type == BoundaryType::Current
+        && begin_type == BoundaryType::Offset
+        && begin_offset < 0)
+    {
+        return;
+    }
+
+    throw Exception(ErrorCodes::BAD_ARGUMENTS,
+        "Window frame '{}' is invalid",
+        toString());
+}
+
 }
diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h
index d34b7721a5e..447352f7a83 100644
--- a/src/Interpreters/WindowDescription.h
+++ b/src/Interpreters/WindowDescription.h
@@ -53,6 +53,13 @@ struct WindowFrame
     int64_t end_offset = 0;
 
 
+    // Throws BAD_ARGUMENTS exception if the frame definition is incorrect, e.g.
+    // the frame start comes later than the frame end.
+    void checkValid() const;
+
+    std::string toString() const;
+    void toString(WriteBuffer & buf) const;
+
     bool operator == (const WindowFrame & other) const
     {
         // We don't compare is_default because it's not a real property of the
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index e4a9c285223..c129c312d11 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -640,6 +640,7 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
             }
             else if (keyword_following.ignore(pos, expected))
             {
+                // Positive offset or UNBOUNDED FOLLOWING.
             }
             else
             {
diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp
index 82c589b8b20..1a71ca0adc7 100644
--- a/src/Processors/QueryPlan/WindowStep.cpp
+++ b/src/Processors/QueryPlan/WindowStep.cpp
@@ -57,6 +57,7 @@ WindowStep::WindowStep(const DataStream & input_stream_,
 {
     // We don't remove any columns, only add, so probably we don't have to update
     // the output DataStream::distinct_columns.
+    window_description.frame.checkValid();
 }
 
 void WindowStep::transformPipeline(QueryPipeline & pipeline)
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 3dcd0a91bca..b7b0c72eb94 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -22,6 +22,8 @@ WindowTransform::WindowTransform(const Block & input_header_,
     , input_header(input_header_)
     , window_description(window_description_)
 {
+    window_description.frame.checkValid();
+
     workspaces.reserve(functions.size());
     for (const auto & f : functions)
     {
@@ -210,14 +212,21 @@ auto WindowTransform::moveRowNumberNoCheck(const RowNumber & _x, int offset) con
                 break;
             }
 
+            // Move to the first row in current block. Note that the offset is
+            // negative.
+            offset += x.row;
+            x.row = 0;
+
+            // Move to the last row of the previous block, if we are not at the
+            // first one. Offset also is incremented by one, because we pass over
+            // the first row of this block.
             if (x.block == first_block_number)
             {
                 break;
             }
 
-            // offset is negative
-            offset += (x.row + 1);
             --x.block;
+            offset += 1;
             x.row = blockRowsNumber(x) - 1;
         }
     }
@@ -253,10 +262,10 @@ void WindowTransform::advanceFrameStartRowsOffset()
 
     assertValid(frame_start);
 
-//    fmt::print(stderr, "frame start {} partition start {}\n", frame_start,
-//        partition_start);
+//    fmt::print(stderr, "frame start {} left {} partition start {}\n",
+//        frame_start, offset_left, partition_start);
 
-    if (moved_row <= partition_start)
+    if (frame_start <= partition_start)
     {
         // Got to the beginning of partition and can't go further back.
         frame_start = partition_start;
@@ -269,10 +278,10 @@ void WindowTransform::advanceFrameStartRowsOffset()
     {
         // A FOLLOWING frame start ran into the end of partition.
         frame_started = true;
+        return;
     }
 
     assert(partition_start < frame_start);
-    frame_start = moved_row;
     frame_started = offset_left == 0;
 }
 
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index bd1a954ddc4..c128aae7796 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -558,5 +558,10 @@ settings max_block_size = 2;
 28	5	3	2	1
 29	5	2	1	0
 30	6	1	1	0
+SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4);
+1
+2
+3
+3
 -- seen a use-after-free under MSan in this query once
 SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null;
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 3a6d2f3d18a..6c2883eae26 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -175,6 +175,8 @@ from (select number, intDiv(number, 5) p from numbers(31))
 order by p, number
 settings max_block_size = 2;
 
+SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4);
+
 -- seen a use-after-free under MSan in this query once
 SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null;
 

From bd6d7facf1d7045798cb455c83c70c2059b0df97 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 3 Feb 2021 16:33:50 +0300
Subject: [PATCH 0594/1238] style

---
 src/Interpreters/WindowDescription.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp
index 3569df6fd17..93a75c07161 100644
--- a/src/Interpreters/WindowDescription.cpp
+++ b/src/Interpreters/WindowDescription.cpp
@@ -6,6 +6,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
 std::string WindowFunctionDescription::dump() const
 {
     WriteBufferFromOwnString ss;

From 7c55ecf67db755d15714cea06adfb28f35ac3da9 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 3 Feb 2021 16:41:59 +0300
Subject: [PATCH 0595/1238] cleanup

---
 src/Processors/Transforms/WindowTransform.h                | 2 +-
 tests/queries/0_stateless/01591_window_functions.reference | 3 +++
 tests/queries/0_stateless/01591_window_functions.sql       | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 5ad1132bfab..869a6fbfee2 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -246,7 +246,7 @@ public:
 
     // Boundaries of the current partition.
     // partition_start doesn't point to a valid block, because we want to drop
-    // the blocks early to save memory. We still have track it so that we can
+    // the blocks early to save memory. We still have to track it so that we can
     // cut off a PRECEDING frame at the partition start.
     // The `partition_end` is past-the-end, as usual. When
     // partition_ended = false, it still haven't ended, and partition_end is the
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index c128aae7796..e8db9ee7725 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -565,3 +565,6 @@ SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4);
 3
 -- seen a use-after-free under MSan in this query once
 SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null;
+-- a corner case
+select count() over ();
+1
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 6c2883eae26..486c8f871b7 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -180,3 +180,5 @@ SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4);
 -- seen a use-after-free under MSan in this query once
 SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null;
 
+-- a corner case
+select count() over ();

From 35754abb4a9ed4d5a01ef5ab0c203a95f7272329 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 3 Feb 2021 17:22:37 +0300
Subject: [PATCH 0596/1238] CURRENT ROW frame start for RANGE frame

---
 src/Processors/Transforms/WindowTransform.cpp | 31 +++++++++------
 src/Processors/Transforms/WindowTransform.h   |  4 ++
 .../01591_window_functions.reference          | 39 +++++++++++++++++++
 .../0_stateless/01591_window_functions.sql    |  9 +++++
 4 files changed, 71 insertions(+), 12 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index b7b0c72eb94..634c588beb0 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -295,18 +295,14 @@ void WindowTransform::advanceFrameStartChoose()
             frame_started = true;
             return;
         case WindowFrame::BoundaryType::Current:
-            switch (window_description.frame.type)
-            {
-                case WindowFrame::FrameType::Rows:
-                    // CURRENT ROW
-                    frame_start = current_row;
-                    frame_started = true;
-                    return;
-                default:
-                    // Fallthrough to the "not implemented" error.
-                    break;
-            }
-            break;
+            // CURRENT ROW differs between frame types only in how the peer
+            // groups are accounted.
+            assert(partition_start <= peer_group_start);
+            assert(peer_group_start < partition_end);
+            assert(peer_group_start <= current_row);
+            frame_start = peer_group_start;
+            frame_started = true;
+            return;
         case WindowFrame::BoundaryType::Offset:
             switch (window_description.frame.type)
             {
@@ -651,6 +647,13 @@ void WindowTransform::appendChunk(Chunk & chunk)
 //                current_row, frame_start, frame_end,
 //                frame_started, frame_ended);
 
+            // We now know that the current row is valid, so we can update the
+            // peer group start.
+            if (!arePeers(peer_group_start, current_row))
+            {
+                peer_group_start = current_row;
+            }
+
             // Advance the frame start.
             advanceFrameStart();
 
@@ -703,6 +706,8 @@ void WindowTransform::appendChunk(Chunk & chunk)
             writeOutCurrentRow();
 
             // Move to the next row. The frame will have to be recalculated.
+            // The peer group start is updated at the beginning of the loop,
+            // because current_row might now be past-the-end.
             advanceRowNumber(current_row);
             first_not_ready_row = current_row;
             frame_ended = false;
@@ -738,6 +743,7 @@ void WindowTransform::appendChunk(Chunk & chunk)
         prev_frame_start = partition_start;
         prev_frame_end = partition_start;
         assert(current_row == partition_start);
+        peer_group_start = partition_start;
 
 //        fmt::print(stderr, "reinitialize agg data at start of {}\n",
 //            new_partition_start);
@@ -925,6 +931,7 @@ void WindowTransform::work()
         assert(next_output_block_number >= first_block_number);
         assert(frame_start.block >= first_block_number);
         assert(current_row.block >= first_block_number);
+        assert(peer_group_start.block >= first_block_number);
     }
 }
 
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 869a6fbfee2..c5e1c8b3653 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -257,6 +257,10 @@ public:
 
     // The row for which we are now computing the window functions.
     RowNumber current_row;
+    // The start of current peer group, needed for CURRENT ROW frame start.
+    // For ROWS frame, always equal to the current row, and for RANGE and GROUP
+    // frames may be earlier.
+    RowNumber peer_group_start;
 
     // The frame is [frame_start, frame_end) if frame_ended && frame_started,
     // and unknown otherwise. Note that when we move to the next row, both the
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index e8db9ee7725..e93d0eaead3 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -568,3 +568,42 @@ SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number
 -- a corner case
 select count() over ();
 1
+-- RANGE CURRENT ROW frame start
+select number, p, o,
+    count(*) over (partition by p order by o
+        range between current row and unbounded following)
+from (select number, intDiv(number, 5) p, mod(number, 3) o
+    from numbers(31))
+order by p, o, number
+settings max_block_size = 2;
+0	0	0	5
+3	0	0	5
+1	0	1	3
+4	0	1	3
+2	0	2	1
+6	1	0	5
+9	1	0	5
+7	1	1	3
+5	1	2	2
+8	1	2	2
+12	2	0	5
+10	2	1	4
+13	2	1	4
+11	2	2	2
+14	2	2	2
+15	3	0	5
+18	3	0	5
+16	3	1	3
+19	3	1	3
+17	3	2	1
+21	4	0	5
+24	4	0	5
+22	4	1	3
+20	4	2	2
+23	4	2	2
+27	5	0	5
+25	5	1	4
+28	5	1	4
+26	5	2	2
+29	5	2	2
+30	6	0	1
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 486c8f871b7..d8e38a04eee 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -182,3 +182,12 @@ SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number
 
 -- a corner case
 select count() over ();
+
+-- RANGE CURRENT ROW frame start
+select number, p, o,
+    count(*) over (partition by p order by o
+        range between current row and unbounded following)
+from (select number, intDiv(number, 5) p, mod(number, 3) o
+    from numbers(31))
+order by p, o, number
+settings max_block_size = 2;

From ec382d881262093ce87b8693bae851fdbc42b075 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 3 Feb 2021 17:55:40 +0300
Subject: [PATCH 0597/1238] BETWEEN CURRENT ROW AND CURRENT ROW

---
 src/Interpreters/WindowDescription.cpp                    | 8 ++++++++
 .../queries/0_stateless/01591_window_functions.reference  | 7 +++++++
 tests/queries/0_stateless/01591_window_functions.sql      | 5 +++++
 3 files changed, 20 insertions(+)

diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp
index 93a75c07161..12f4bdd4124 100644
--- a/src/Interpreters/WindowDescription.cpp
+++ b/src/Interpreters/WindowDescription.cpp
@@ -101,6 +101,14 @@ void WindowFrame::checkValid() const
         return;
     }
 
+    if (end_type == BoundaryType::Current
+        && begin_type == BoundaryType::Current)
+    {
+        // BETWEEN CURRENT ROW AND CURRENT ROW makes some sense for RANGE or
+        // GROUP frames, and is technically valid for ROWS frame.
+        return;
+    }
+
     throw Exception(ErrorCodes::BAD_ARGUMENTS,
         "Window frame '{}' is invalid",
         toString());
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index e93d0eaead3..2e9c659e0af 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -607,3 +607,10 @@ settings max_block_size = 2;
 26	5	2	2
 29	5	2	2
 30	6	0	1
+select
+    count(*) over (rows between  current row and current row),
+    count(*) over (range between  current row and current row)
+from numbers(3);
+1	3
+1	3
+1	3
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index d8e38a04eee..6c4190b47d3 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -191,3 +191,8 @@ from (select number, intDiv(number, 5) p, mod(number, 3) o
     from numbers(31))
 order by p, o, number
 settings max_block_size = 2;
+
+select
+    count(*) over (rows between  current row and current row),
+    count(*) over (range between  current row and current row)
+from numbers(3);

From 82ab793731929e3739c5641a1d29ac0af20d4c60 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Tue, 2 Feb 2021 10:56:22 +0300
Subject: [PATCH 0598/1238] Fix missing type check in StorageEmbeddedRocksDB

---
 docker/test/fasttest/run.sh                   |   1 +
 .../RocksDB/StorageEmbeddedRocksDB.cpp        | 119 +++++++++---------
 .../0_stateless/01686_rocksdb.reference       |  15 +++
 tests/queries/0_stateless/01686_rocksdb.sql   |  27 ++++
 .../queries/0_stateless/arcadia_skip_list.txt |   1 +
 5 files changed, 103 insertions(+), 60 deletions(-)
 create mode 100644 tests/queries/0_stateless/01686_rocksdb.reference
 create mode 100644 tests/queries/0_stateless/01686_rocksdb.sql

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 17cec7ae286..bb29959acd2 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -319,6 +319,7 @@ function run_tests
 
          # In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
         01504_rocksdb
+        01686_rocksdb
 
         # Look at DistributedFilesToInsert, so cannot run in parallel.
         01460_DistributedFilesToInsert
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
index 249026d1011..d7456966467 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@@ -24,6 +24,7 @@
 #include <Interpreters/Set.h>
 #include <Interpreters/PreparedSets.h>
 #include <Interpreters/TreeRewriter.h>
+#include <Interpreters/convertFieldToType.h>
 
 #include <Poco/File.h>
 #include <Poco/Path.h>
@@ -44,9 +45,12 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+using FieldVectorPtr = std::shared_ptr<FieldVector>;
+
 
 // returns keys may be filter by condition
-static bool traverseASTFilter(const String & primary_key, const DataTypePtr & primary_key_type, const ASTPtr & elem, const PreparedSets & sets, FieldVector & res)
+static bool traverseASTFilter(
+    const String & primary_key, const DataTypePtr & primary_key_type, const ASTPtr & elem, const PreparedSets & sets, FieldVectorPtr & res)
 {
     const auto * function = elem->as<ASTFunction>();
     if (!function)
@@ -63,13 +67,9 @@ static bool traverseASTFilter(const String & primary_key, const DataTypePtr & pr
     else if (function->name == "or")
     {
         // make sure every child has the key filter condition
-        FieldVector child_res;
         for (const auto & child : function->arguments->children)
-        {
-            if (!traverseASTFilter(primary_key, primary_key_type, child, sets, child_res))
+            if (!traverseASTFilter(primary_key, primary_key_type, child, sets, res))
                 return false;
-        }
-        res.insert(res.end(), child_res.begin(), child_res.end());
         return true;
     }
     else if (function->name == "equals" || function->name == "in")
@@ -108,9 +108,7 @@ static bool traverseASTFilter(const String & primary_key, const DataTypePtr & pr
             prepared_set->checkColumnsNumber(1);
             const auto & set_column = *prepared_set->getSetElements()[0];
             for (size_t row = 0; row < set_column.size(); ++row)
-            {
-                res.push_back(set_column[row]);
-            }
+                res->push_back(set_column[row]);
             return true;
         }
         else
@@ -125,10 +123,12 @@ static bool traverseASTFilter(const String & primary_key, const DataTypePtr & pr
             if (ident->name() != primary_key)
                 return false;
 
-            //function->name == "equals"
+            /// function->name == "equals"
             if (const auto * literal = value->as<ASTLiteral>())
             {
-                res.push_back(literal->value);
+                auto converted_field = convertFieldToType(literal->value, *primary_key_type);
+                if (!converted_field.isNull())
+                    res->push_back(converted_field);
                 return true;
             }
         }
@@ -140,14 +140,14 @@ static bool traverseASTFilter(const String & primary_key, const DataTypePtr & pr
 /** Retrieve from the query a condition of the form `key = 'key'`, `key in ('xxx_'), from conjunctions in the WHERE clause.
   * TODO support key like search
   */
-static std::pair<FieldVector, bool> getFilterKeys(const String & primary_key, const DataTypePtr & primary_key_type, const SelectQueryInfo & query_info)
+static std::pair<FieldVectorPtr, bool> getFilterKeys(
+    const String & primary_key, const DataTypePtr & primary_key_type, const SelectQueryInfo & query_info)
 {
     const auto & select = query_info.query->as<ASTSelectQuery &>();
     if (!select.where())
-    {
-        return std::make_pair(FieldVector{}, true);
-    }
-    FieldVector res;
+        return {{}, true};
+
+    FieldVectorPtr res = std::make_shared<FieldVector>();
     auto matched_keys = traverseASTFilter(primary_key, primary_key_type, select.where(), query_info.sets, res);
     return std::make_pair(res, !matched_keys);
 }
@@ -159,23 +159,19 @@ public:
     EmbeddedRocksDBSource(
         const StorageEmbeddedRocksDB & storage_,
         const StorageMetadataPtr & metadata_snapshot_,
-        const FieldVector & keys_,
-        const size_t start_,
-        const size_t end_,
+        FieldVectorPtr keys_,
+        FieldVector::const_iterator begin_,
+        FieldVector::const_iterator end_,
         const size_t max_block_size_)
         : SourceWithProgress(metadata_snapshot_->getSampleBlock())
         , storage(storage_)
         , metadata_snapshot(metadata_snapshot_)
-        , start(start_)
+        , keys(std::move(keys_))
+        , begin(begin_)
         , end(end_)
+        , it(begin)
         , max_block_size(max_block_size_)
     {
-        // slice the keys
-        if (end > start)
-        {
-            keys.resize(end - start);
-            std::copy(keys_.begin() + start, keys_.begin() + end, keys.begin());
-        }
     }
 
     String getName() const override
@@ -185,27 +181,34 @@ public:
 
     Chunk generate() override
     {
-        if (processed_keys >= keys.size() || (start == end))
+        if (it >= end)
             return {};
 
-        std::vector<rocksdb::Slice> slices_keys;
-        slices_keys.reserve(keys.size());
-        std::vector<String> values;
-        std::vector<WriteBufferFromOwnString> wbs(keys.size());
+        size_t num_keys = end - begin;
+
+        std::vector<std::string> serialized_keys(num_keys);
+        std::vector<rocksdb::Slice> slices_keys(num_keys);
 
         const auto & sample_block = metadata_snapshot->getSampleBlock();
         const auto & key_column = sample_block.getByName(storage.primary_key);
         auto columns = sample_block.cloneEmptyColumns();
         size_t primary_key_pos = sample_block.getPositionByName(storage.primary_key);
 
-        for (size_t i = processed_keys; i < std::min(keys.size(), processed_keys + max_block_size); ++i)
+        size_t rows_processed = 0;
+        while (it < end && rows_processed < max_block_size)
         {
-            key_column.type->serializeBinary(keys[i], wbs[i]);
-            auto str_ref = wbs[i].stringRef();
-            slices_keys.emplace_back(str_ref.data, str_ref.size);
+            WriteBufferFromString wb(serialized_keys[rows_processed]);
+            key_column.type->serializeBinary(*it, wb);
+            wb.finalize();
+            slices_keys[rows_processed] = std::move(serialized_keys[rows_processed]);
+
+            ++it;
+            ++rows_processed;
         }
 
+        std::vector<String> values;
         auto statuses = storage.rocksdb_ptr->MultiGet(rocksdb::ReadOptions(), slices_keys, &values);
+
         for (size_t i = 0; i < statuses.size(); ++i)
         {
             if (statuses[i].ok())
@@ -221,7 +224,6 @@ public:
                 }
             }
         }
-        processed_keys += max_block_size;
 
         UInt64 num_rows = columns.at(0)->size();
         return Chunk(std::move(columns), num_rows);
@@ -231,12 +233,11 @@ private:
     const StorageEmbeddedRocksDB & storage;
 
     const StorageMetadataPtr metadata_snapshot;
-    const size_t start;
-    const size_t end;
+    FieldVectorPtr keys;
+    FieldVector::const_iterator begin;
+    FieldVector::const_iterator end;
+    FieldVector::const_iterator it;
     const size_t max_block_size;
-    FieldVector keys;
-
-    size_t processed_keys = 0;
 };
 
 
@@ -289,7 +290,8 @@ Pipe StorageEmbeddedRocksDB::read(
         unsigned num_streams)
 {
     metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
-    FieldVector keys;
+
+    FieldVectorPtr keys;
     bool all_scan = false;
 
     auto primary_key_data_type = metadata_snapshot->getSampleBlock().getByName(primary_key).type;
@@ -302,37 +304,34 @@ Pipe StorageEmbeddedRocksDB::read(
     }
     else
     {
-        if (keys.empty())
+        if (keys->empty())
             return {};
 
-        std::sort(keys.begin(), keys.end());
-        auto unique_iter = std::unique(keys.begin(), keys.end());
-        if (unique_iter != keys.end())
-            keys.erase(unique_iter, keys.end());
+        std::sort(keys->begin(), keys->end());
+        keys->erase(std::unique(keys->begin(), keys->end()), keys->end());
 
         Pipes pipes;
-        size_t start = 0;
-        size_t end;
 
-        const size_t num_threads = std::min(size_t(num_streams), keys.size());
-        const size_t batch_per_size = ceil(keys.size() * 1.0 / num_threads);
+        size_t num_keys = keys->size();
+        size_t num_threads = std::min(size_t(num_streams), keys->size());
 
-        for (size_t t = 0; t < num_threads; ++t)
+        assert(num_keys <= std::numeric_limits<uint32_t>::max());
+        assert(num_threads <= std::numeric_limits<uint32_t>::max());
+
+        for (size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx)
         {
-            if (start >= keys.size())
-                start = end = 0;
-            else
-                end = start + batch_per_size > keys.size() ? keys.size() : start + batch_per_size;
+            size_t begin = num_keys * thread_idx / num_threads;
+            size_t end = num_keys * (thread_idx + 1) / num_threads;
 
-            pipes.emplace_back(
-                std::make_shared<EmbeddedRocksDBSource>(*this, metadata_snapshot, keys, start, end, max_block_size));
-            start += batch_per_size;
+            pipes.emplace_back(std::make_shared<EmbeddedRocksDBSource>(
+                    *this, metadata_snapshot, keys, keys->begin() + begin, keys->begin() + end, max_block_size));
         }
         return Pipe::unitePipes(std::move(pipes));
     }
 }
 
-BlockOutputStreamPtr StorageEmbeddedRocksDB::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/)
+BlockOutputStreamPtr StorageEmbeddedRocksDB::write(
+    const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/)
 {
     return std::make_shared<EmbeddedRocksDBBlockOutputStream>(*this, metadata_snapshot);
 }
diff --git a/tests/queries/0_stateless/01686_rocksdb.reference b/tests/queries/0_stateless/01686_rocksdb.reference
new file mode 100644
index 00000000000..fa4e12d51ff
--- /dev/null
+++ b/tests/queries/0_stateless/01686_rocksdb.reference
@@ -0,0 +1,15 @@
+123	Hello, world (123)
+--
+--
+123	Hello, world (123)
+4567	Hello, world (4567)
+--
+--
+0	Hello, world (0)
+--
+123	Hello, world (123)
+456	Hello, world (456)
+--
+99	Hello, world (99)
+999	Hello, world (999)
+9999	Hello, world (9999)
diff --git a/tests/queries/0_stateless/01686_rocksdb.sql b/tests/queries/0_stateless/01686_rocksdb.sql
new file mode 100644
index 00000000000..c9b133acff3
--- /dev/null
+++ b/tests/queries/0_stateless/01686_rocksdb.sql
@@ -0,0 +1,27 @@
+DROP TABLE IF EXISTS test;
+
+CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB PRIMARY KEY(key);
+
+INSERT INTO test SELECT number, format('Hello, world ({})', toString(number)) FROM numbers(10000);
+
+SELECT * FROM test WHERE key = 123;
+SELECT '--';
+SELECT * FROM test WHERE key = -123;
+SELECT '--';
+SELECT * FROM test WHERE key = 123 OR key = 4567 ORDER BY key;
+SELECT '--';
+SELECT * FROM test WHERE key = NULL;
+SELECT '--';
+SELECT * FROM test WHERE key = NULL OR key = 0;
+SELECT '--';
+SELECT * FROM test WHERE key IN (123, 456, -123) ORDER BY key;
+SELECT '--';
+SELECT * FROM test WHERE key = 'Hello'; -- { serverError 53 }
+
+DETACH TABLE test NO DELAY;
+ATTACH TABLE test;
+
+SELECT * FROM test WHERE key IN (99, 999, 9999, -123) ORDER BY key;
+
+DROP TABLE IF EXISTS test;
+
diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index a33ff98032b..d262fb9058e 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -200,3 +200,4 @@
 01676_clickhouse_client_autocomplete
 01671_aggregate_function_group_bitmap_data
 01674_executable_dictionary_implicit_key
+01686_rocksdb

From dc6a9ab9f99ba41ebce0289aa78b33a228f5f2fc Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 3 Feb 2021 09:38:02 -0800
Subject: [PATCH 0599/1238] Docs - fix unit for event_time_microseconds
 system.trace_log

---
 docs/en/operations/system-tables/trace_log.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md
index 8107f60b808..2903e0d3bd7 100644
--- a/docs/en/operations/system-tables/trace_log.md
+++ b/docs/en/operations/system-tables/trace_log.md
@@ -12,7 +12,7 @@ Columns:
 
 -   `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment.
 
--   `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment with microseconds precision.
+-   `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Timestamp of the sampling moment with microseconds precision.
 
 -   `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds.
 

From 40939ff6ba1cef544840d65d49f36becf627dd3a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 3 Feb 2021 20:45:50 +0300
Subject: [PATCH 0600/1238] Fix data race in executable dictionary

---
 src/Dictionaries/CacheDictionary.cpp            | 1 -
 src/Dictionaries/ExecutableDictionarySource.cpp | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp
index ad98d69fdf9..67bcab109ea 100644
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@@ -1291,7 +1291,6 @@ void CacheDictionary::update(UpdateUnitPtr & update_unit_ptr)
             BlockInputStreamPtr stream = current_source_ptr->loadIds(update_unit_ptr->requested_ids);
             stream->readPrefix();
 
-
             while (true)
             {
                 Block block = stream->read();
diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp
index 42dac540f09..37dde600adf 100644
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@@ -186,6 +186,9 @@ namespace
             if (!err.empty())
                 LOG_ERROR(log, "Having stderr: {}", err);
 
+            if (thread.joinable())
+                thread.join();
+
             command->wait();
         }
 

From dc2afc4795dfdecbe675caffd34ab346d21d79ee Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 3 Feb 2021 21:12:14 +0300
Subject: [PATCH 0601/1238] Fix double whitespace #18383

---
 src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 457c9c04aa9..924c7dbe185 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -1266,7 +1266,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
             }
         }
 
-        auto plan = createPlanFromPipe(Pipe::unitePipes(std::move(pipes)), query_id, data, " with order");
+        auto plan = createPlanFromPipe(Pipe::unitePipes(std::move(pipes)), query_id, data, "with order");
 
         if (input_order_info->direction != 1)
         {

From 05c5c8ed80e7895e1d3507ee089f1456f0bf5686 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 3 Feb 2021 21:23:54 +0300
Subject: [PATCH 0602/1238] Avoid UBSan report in pointInPolygon

---
 src/Functions/CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index 321aa5e2196..1c3beb2e47d 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -117,3 +117,6 @@ target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_array)
 if (USE_STATS)
     target_link_libraries(clickhouse_functions PRIVATE stats)
 endif()
+
+# Signed integer overflow on user-provided data inside boost::geometry - ignore.
+set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-sanitize=signed-integer-overflow)

From eb8b2e883aae2257d926f4894f16fa22f86ad170 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 3 Feb 2021 21:25:04 +0300
Subject: [PATCH 0603/1238] Add a test

---
 tests/queries/0_stateless/01700_point_in_polygon_ubsan.reference | 0
 tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql       | 1 +
 2 files changed, 1 insertion(+)
 create mode 100644 tests/queries/0_stateless/01700_point_in_polygon_ubsan.reference
 create mode 100644 tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql

diff --git a/tests/queries/0_stateless/01700_point_in_polygon_ubsan.reference b/tests/queries/0_stateless/01700_point_in_polygon_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql b/tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql
new file mode 100644
index 00000000000..d7859bdc5a9
--- /dev/null
+++ b/tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql
@@ -0,0 +1 @@
+SELECT pointInPolygon((0, 0), [[(0, 0), (10, 10), (256, -9223372036854775808)]]) FORMAT Null;
\ No newline at end of file

From 0aca4a740ccd6462fbe0360f3aa6ea6db9344a5c Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 3 Feb 2021 21:26:03 +0300
Subject: [PATCH 0604/1238] Update 01700_point_in_polygon_ubsan.sql

---
 tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql b/tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql
index d7859bdc5a9..97db40ab65e 100644
--- a/tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql
+++ b/tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql
@@ -1 +1 @@
-SELECT pointInPolygon((0, 0), [[(0, 0), (10, 10), (256, -9223372036854775808)]]) FORMAT Null;
\ No newline at end of file
+SELECT pointInPolygon((0, 0), [[(0, 0), (10, 10), (256, -9223372036854775808)]]) FORMAT Null;

From cc43f92719070ef44b34610e05ee5ba09475030d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 3 Feb 2021 22:45:54 +0300
Subject: [PATCH 0605/1238] Fix tests *.reference files

Using:

    sed -i 's/MergeTree  with order/MergeTree with order/g' tests/queries/0_stateless/*.reference

Fixes: dc2afc4795 ("Fix double whitespace #18383")
---
 ...monotonous_functions_in_order_by.reference |  4 ++--
 .../01576_alias_column_rewrite.reference      | 24 +++++++++----------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference
index feca2cae5ea..a1a1814a581 100644
--- a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference
+++ b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference
@@ -23,7 +23,7 @@ Expression (Projection)
     FinishSorting
       Expression (Before ORDER BY)
         SettingQuotaAndLimits (Set limits and quota after reading from storage)
-          ReadFromStorage (MergeTree  with order)
+          ReadFromStorage (MergeTree with order)
 SELECT
     timestamp,
     key
@@ -37,7 +37,7 @@ Expression (Projection)
     FinishSorting
       Expression (Before ORDER BY)
         SettingQuotaAndLimits (Set limits and quota after reading from storage)
-          ReadFromStorage (MergeTree  with order)
+          ReadFromStorage (MergeTree with order)
 SELECT
     timestamp,
     key
diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference
index 679695dd6db..334ebc7eb1f 100644
--- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference
+++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference
@@ -35,18 +35,18 @@ Expression (Projection)
       Expression ((Before ORDER BY + Add table aliases))
         SettingQuotaAndLimits (Set limits and quota after reading from storage)
           Union
-            ReadFromStorage (MergeTree  with order)
-            ReadFromStorage (MergeTree  with order)
-            ReadFromStorage (MergeTree  with order)
+            ReadFromStorage (MergeTree with order)
+            ReadFromStorage (MergeTree with order)
+            ReadFromStorage (MergeTree with order)
 Expression (Projection)
   Limit (preliminary LIMIT)
     FinishSorting
       Expression (Before ORDER BY)
         SettingQuotaAndLimits (Set limits and quota after reading from storage)
           Union
-            ReadFromStorage (MergeTree  with order)
-            ReadFromStorage (MergeTree  with order)
-            ReadFromStorage (MergeTree  with order)
+            ReadFromStorage (MergeTree with order)
+            ReadFromStorage (MergeTree with order)
+            ReadFromStorage (MergeTree with order)
 optimize_aggregation_in_order
 Expression ((Projection + Before ORDER BY))
   Aggregating
@@ -58,17 +58,17 @@ Expression ((Projection + Before ORDER BY))
     Expression ((Before GROUP BY + Add table aliases))
       SettingQuotaAndLimits (Set limits and quota after reading from storage)
         Union
-          ReadFromStorage (MergeTree  with order)
-          ReadFromStorage (MergeTree  with order)
-          ReadFromStorage (MergeTree  with order)
+          ReadFromStorage (MergeTree with order)
+          ReadFromStorage (MergeTree with order)
+          ReadFromStorage (MergeTree with order)
 Expression ((Projection + Before ORDER BY))
   Aggregating
     Expression (Before GROUP BY)
       SettingQuotaAndLimits (Set limits and quota after reading from storage)
         Union
-          ReadFromStorage (MergeTree  with order)
-          ReadFromStorage (MergeTree  with order)
-          ReadFromStorage (MergeTree  with order)
+          ReadFromStorage (MergeTree with order)
+          ReadFromStorage (MergeTree with order)
+          ReadFromStorage (MergeTree with order)
 second-index
 1
 1

From 066fb4c82bd33744dc8a99d34d88674d83764ba1 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Wed, 3 Feb 2021 23:02:37 +0300
Subject: [PATCH 0606/1238] fix

---
 src/Databases/DatabaseReplicatedWorker.cpp    |   2 +-
 src/Interpreters/DDLWorker.cpp                |   9 +-
 src/Interpreters/DDLWorker.h                  |   2 +-
 src/Interpreters/DatabaseCatalog.cpp          |   8 +-
 src/Interpreters/executeDDLQueryOnCluster.cpp |   2 +-
 .../0_stateless/01238_http_memory_tracking.sh |   3 +
 .../01281_group_by_limit_memory_tracking.sh   |   3 +
 .../01541_max_memory_usage_for_user.sh        |   3 +
 tests/queries/skip_list.json                  | 128 +++++++++++++++++-
 9 files changed, 147 insertions(+), 13 deletions(-)

diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index a1cdff204c7..5af216c3d0d 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -93,7 +93,7 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
     LOG_DEBUG(log, "Waiting for worker thread to process all entries before {}", entry_name);
     {
         std::unique_lock lock{mutex};
-        wait_current_task_change.wait(lock, [&]() { assert(current_task <= entry_name); return zookeeper->expired() || current_task == entry_name; });
+        wait_current_task_change.wait(lock, [&]() { assert(zookeeper->expired() || current_task <= entry_name); return zookeeper->expired() || current_task == entry_name; });
     }
 
     if (zookeeper->expired())
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 4470a3649c5..545e00296e8 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -357,7 +357,7 @@ void DDLWorker::scheduleTasks()
         if (!task)
         {
             LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason);
-            updateMaxDDLEntryID(*task);
+            updateMaxDDLEntryID(entry_name);
             continue;
         }
 
@@ -449,9 +449,9 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task)
     return true;
 }
 
-void DDLWorker::updateMaxDDLEntryID(const DDLTaskBase & task)
+void DDLWorker::updateMaxDDLEntryID(const String & entry_name)
 {
-    DB::ReadBufferFromString in(task.entry_name);
+    DB::ReadBufferFromString in(entry_name);
     DB::assertString("query-", in);
     UInt64 id;
     readText(id, in);
@@ -511,6 +511,7 @@ void DDLWorker::processTask(DDLTaskBase & task)
 
             if (task.execute_on_leader)
             {
+                tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper);
             }
             else
             {
@@ -549,7 +550,7 @@ void DDLWorker::processTask(DDLTaskBase & task)
         task.was_executed = true;
     }
 
-    updateMaxDDLEntryID(task);
+    updateMaxDDLEntryID(task.entry_name);
 
     /// FIXME: if server fails right here, the task will be executed twice. We need WAL here.
     /// If ZooKeeper connection is lost here, we will try again to write query status.
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 6124e5ee8ec..d9fd4e58cb6 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -73,7 +73,7 @@ protected:
     virtual DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper);
 
     void processTask(DDLTaskBase & task);
-    void updateMaxDDLEntryID(const DDLTaskBase & task);
+    void updateMaxDDLEntryID(const String & entry_name);
 
     /// Check that query should be executed on leader replica only
     static bool taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, StoragePtr storage);
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index 4ab3fb28785..6313da7132d 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -976,12 +976,10 @@ void DDLGuard::releaseTableLock() noexcept
 
     table_lock_removed = true;
     guards_lock.lock();
-    --it->second.counter;
-    if (!it->second.counter)
-    {
-        table_lock.unlock();
+    UInt32 counter = --it->second.counter;
+    table_lock.unlock();
+    if (counter == 0)
         map.erase(it);
-    }
     guards_lock.unlock();
 }
 
diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
index fb155e82926..a0148316610 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.cpp
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -33,7 +33,7 @@ bool isSupportedAlterType(int type)
 {
     assert(type != ASTAlterCommand::NO_TYPE);
     static const std::unordered_set<int> unsupported_alter_types{
-        /// It's dangerous, because it may duplicate data if executed on multiple replicas
+        /// It's dangerous, because it may duplicate data if executed on multiple replicas. We can allow it after #18978
         ASTAlterCommand::ATTACH_PARTITION,
         /// Usually followed by ATTACH PARTITION
         ASTAlterCommand::FETCH_PARTITION,
diff --git a/tests/queries/0_stateless/01238_http_memory_tracking.sh b/tests/queries/0_stateless/01238_http_memory_tracking.sh
index 90a7611c7c7..8c900e4c208 100755
--- a/tests/queries/0_stateless/01238_http_memory_tracking.sh
+++ b/tests/queries/0_stateless/01238_http_memory_tracking.sh
@@ -18,3 +18,6 @@ yes 'SELECT 1' 2>/dev/null | {
 } | grep -x -c 1
 
 wait
+
+# Reset max_memory_usage_for_user, so it will not affect other tests
+${CLICKHOUSE_CLIENT} --max_memory_usage_for_user=0 -q "SELECT 1 FORMAT Null"
diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh
index 285e2ab8dad..222f7edd787 100755
--- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh
+++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh
@@ -42,3 +42,6 @@ execute_group_by
 # if memory accounting will be incorrect, the second query will be failed with MEMORY_LIMIT_EXCEEDED
 execute_group_by
 wait
+
+# Reset max_memory_usage_for_user, so it will not affect other tests
+${CLICKHOUSE_CLIENT} --max_memory_usage_for_user=0 -q "SELECT 1 FORMAT Null"
diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh b/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh
index c81bd1a6ce4..32877bfd0fe 100755
--- a/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh
+++ b/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh
@@ -66,4 +66,7 @@ echo 'OK'
 
 ${CLICKHOUSE_CLIENT} --query "DROP USER test_01541";
 
+# Reset max_memory_usage_for_user, so it will not affect other tests
+${CLICKHOUSE_CLIENT} --max_memory_usage_for_user=0 -q "SELECT 1 FORMAT Null"
+
 exit 0
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 3311eb3882d..273e00c8a23 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -100,7 +100,133 @@
         "00604_show_create_database",
         "00609_mv_index_in_in",
         "00510_materizlized_view_and_deduplication_zookeeper",
-        "00738_lock_for_inner_table"
+        "memory_tracking",     /// FIXME remove it before merge
+        "memory_tracking",
+        "memory_usage",
+        "00738_lock_for_inner_table",
+        "01666_blns",
+        "01652_ignore_and_low_cardinality",
+        "01651_map_functions",
+        "01650_fetch_patition_with_macro_in_zk_path",
+        "01648_mutations_and_escaping",
+        "01640_marks_corruption_regression",
+        "01622_byte_size",
+        "01611_string_to_low_cardinality_key_alter",
+        "01602_show_create_view",
+        "01600_log_queries_with_extensive_info",
+        "01560_ttl_remove_empty_parts",
+        "01554_bloom_filter_index_big_integer_uuid",
+        "01550_type_map_formats_input",
+        "01550_type_map_formats",
+        "01550_create_map_type",
+        "01532_primary_key_without_order_by_zookeeper",
+        "01511_alter_version_versioned_collapsing_merge_tree_zookeeper",
+        "01509_parallel_quorum_insert_no_replicas",
+        "01504_compression_multiple_streams",
+        "01494_storage_join_persistency",
+        "01493_storage_set_persistency",
+        "01493_alter_remove_properties_zookeeper",
+        "01475_read_subcolumns_storages",
+        "01475_read_subcolumns",
+        "01463_test_alter_live_view_refresh",
+        "01451_replicated_detach_drop_part",
+        "01451_detach_drop_part",
+        "01440_big_int_exotic_casts",
+        "01430_modify_sample_by_zookeeper",
+        "01417_freeze_partition_verbose_zookeeper",
+        "01417_freeze_partition_verbose",
+        "01396_inactive_replica_cleanup_nodes_zookeeper",
+        "01375_compact_parts_codecs",
+        "01357_version_collapsing_attach_detach_zookeeper",
+        "01355_alter_column_with_order",
+        "01291_geo_types",
+        "01270_optimize_skip_unused_shards_low_cardinality",
+        "01237_live_view_over_distributed_with_subquery_select_table_alias",
+        "01236_distributed_over_live_view_over_distributed",
+        "01235_live_view_over_distributed",
+        "01182_materialized_view_different_structure",
+        "01150_ddl_guard_rwr",
+        "01148_zookeeper_path_macros_unfolding",
+        "01135_default_and_alter_zookeeper",
+        "01130_in_memory_parts_partitons",
+        "01127_month_partitioning_consistency_select",
+        "01114_database_atomic",
+        "01083_expressions_in_engine_arguments",
+        "01073_attach_if_not_exists",
+        "01072_optimize_skip_unused_shards_const_expr_eval",
+        "01071_prohibition_secondary_index_with_old_format_merge_tree",
+        "01071_live_view_detach_dependency",
+        "01062_alter_on_mutataion_zookeeper",
+        "01060_shutdown_table_after_detach",
+        "01056_create_table_as",
+        "01035_avg",
+        "01021_only_tuple_columns",
+        "01019_alter_materialized_view_query",
+        "01019_alter_materialized_view_consistent",
+        "01019_alter_materialized_view_atomic",
+        "01015_attach_part",
+        "00989_parallel_parts_loading",
+        "00980_zookeeper_merge_tree_alter_settings",
+        "00980_merge_alter_settings",
+        "00980_create_temporary_live_view",
+        "00978_live_view_watch",
+        "00977_live_view_watch_events",
+        "00976_live_view_select_version",
+        "00975_live_view_create",
+        "00974_live_view_select_with_aggregation",
+        "00973_live_view_with_subquery_select_with_aggregation_in_subquery",
+        "00973_live_view_with_subquery_select_with_aggregation",
+        "00973_live_view_with_subquery_select_table_alias",
+        "00973_live_view_with_subquery_select_nested_with_aggregation_table_alias",
+        "00973_live_view_with_subquery_select_nested_with_aggregation",
+        "00973_live_view_with_subquery_select_nested",
+        "00973_live_view_with_subquery_select_join_no_alias",
+        "00973_live_view_with_subquery_select_join",
+        "00973_live_view_with_subquery_select",
+        "00973_live_view_select_prewhere",
+        "00973_live_view_select",
+        "00972_live_view_select_1",
+        "00969_live_view_watch_format_jsoneachrowwithprogress",
+        "00968_live_view_select_format_jsoneachrowwithprogress",
+        "00961_temporary_live_view_watch",
+        "00955_test_final_mark",
+        "00933_reserved_word",
+        "00926_zookeeper_adaptive_index_granularity_replicated_merge_tree",
+        "00926_adaptive_index_granularity_replacing_merge_tree",
+        "00926_adaptive_index_granularity_merge_tree",
+        "00925_zookeeper_empty_replicated_merge_tree_optimize_final",
+        "00800_low_cardinality_distinct_numeric",
+        "00754_alter_modify_order_by_replicated_zookeeper",
+        "00751_low_cardinality_nullable_group_by",
+        "00751_default_databasename_for_view",
+        "00719_parallel_ddl_table",
+        "00718_low_cardinaliry_alter",
+        "00717_low_cardinaliry_distributed_group_by",
+        "00688_low_cardinality_syntax",
+        "00688_low_cardinality_nullable_cast",
+        "00688_low_cardinality_in",
+        "00652_replicated_mutations_zookeeper",
+        "00634_rename_view",
+        "00626_replace_partition_from_table",
+        "00625_arrays_in_nested",
+        "00623_replicated_truncate_table_zookeeper",
+        "00619_union_highlite",
+        "00599_create_view_with_subquery",
+        "00571_non_exist_database_when_create_materializ_view",
+        "00553_buff_exists_materlized_column",
+        "00516_deduplication_after_drop_partition_zookeeper",
+        "00508_materialized_view_to",
+        "00446_clear_column_in_partition_concurrent_zookeeper",
+        "00423_storage_log_single_thread",
+        "00311_array_primary_key",
+        "00236_replicated_drop_on_non_leader_zookeeper",
+        "00226_zookeeper_deduplication_and_unexpected_parts",
+        "00215_primary_key_order_zookeeper",
+        "00180_attach_materialized_view",
+        "00121_drop_column_zookeeper",
+        "00116_storage_set",
+        "00083_create_merge_tree_zookeeper",
+        "00062_replicated_merge_tree_alter_zookeeper"
     ],
     "polymorphic-parts": [
         "01508_partition_pruning", /// bug, shoud be fixed

From 1ff87ac6f90452d4a71494c2327d4a6781a55b37 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 3 Feb 2021 23:32:15 +0300
Subject: [PATCH 0607/1238] Add background session lifetime control

---
 src/Common/ZooKeeper/ZooKeeperCommon.h        |  2 +-
 src/Coordination/NuKeeperServer.cpp           | 13 +++-
 src/Coordination/NuKeeperServer.h             |  4 +-
 src/Coordination/NuKeeperStateMachine.cpp     | 23 ++++--
 src/Coordination/NuKeeperStateMachine.h       |  4 +-
 src/Coordination/NuKeeperStorage.cpp          | 15 +++-
 src/Coordination/NuKeeperStorage.h            | 18 ++++-
 .../NuKeeperStorageDispatcher.cpp             | 42 +++++++++-
 src/Coordination/NuKeeperStorageDispatcher.h  |  9 ++-
 src/Coordination/SessionExpiryQueue.cpp       | 77 +++++++++++++++++++
 src/Coordination/SessionExpiryQueue.h         | 43 +++++++++++
 src/Coordination/ya.make.in                   | 12 +++
 src/Server/NuKeeperTCPHandler.cpp             | 36 ++++-----
 src/Server/NuKeeperTCPHandler.h               |  3 +-
 .../configs/enable_test_keeper1.xml           |  4 +-
 .../configs/enable_test_keeper2.xml           |  4 +-
 .../configs/enable_test_keeper3.xml           |  4 +-
 17 files changed, 261 insertions(+), 52 deletions(-)
 create mode 100644 src/Coordination/SessionExpiryQueue.cpp
 create mode 100644 src/Coordination/SessionExpiryQueue.h
 create mode 100644 src/Coordination/ya.make.in

diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h
index b2c18c31798..84d7a0823ec 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.h
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.h
@@ -72,7 +72,7 @@ struct ZooKeeperHeartbeatRequest final : ZooKeeperRequest
     void writeImpl(WriteBuffer &) const override {}
     void readImpl(ReadBuffer &) override {}
     ZooKeeperResponsePtr makeResponse() const override;
-    bool isReadRequest() const override { return true; }
+    bool isReadRequest() const override { return false; }
 };
 
 struct ZooKeeperHeartbeatResponse final : ZooKeeperResponse
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 1d99bf54ec8..335f577beeb 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -24,7 +24,7 @@ NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, in
     , hostname(hostname_)
     , port(port_)
     , endpoint(hostname + ":" + std::to_string(port))
-    , state_machine(nuraft::cs_new<NuKeeperStateMachine>())
+    , state_machine(nuraft::cs_new<NuKeeperStateMachine>(500 /* FIXME */))
     , state_manager(nuraft::cs_new<InMemoryStateManager>(server_id, endpoint))
 {
 }
@@ -214,12 +214,12 @@ NuKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const NuKeeper
     }
 }
 
-int64_t NuKeeperServer::getSessionID()
+int64_t NuKeeperServer::getSessionID(long session_timeout_ms)
 {
-    auto entry = nuraft::buffer::alloc(sizeof(int64_t));
+    auto entry = nuraft::buffer::alloc(sizeof(long));
     /// Just special session request
     nuraft::buffer_serializer bs(entry);
-    bs.put_i64(0);
+    bs.put_i64(session_timeout_ms);
 
     std::lock_guard lock(append_entries_mutex);
 
@@ -275,4 +275,9 @@ void NuKeeperServer::waitForCatchUp() const
     }
 }
 
+std::unordered_set<int64_t> NuKeeperServer::getDeadSessions()
+{
+    return state_machine->getDeadSessions();
+}
+
 }
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index 352836dfc27..962863f591e 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -46,7 +46,9 @@ public:
 
     NuKeeperStorage::ResponsesForSessions putRequests(const NuKeeperStorage::RequestsForSessions & requests);
 
-    int64_t getSessionID();
+    int64_t getSessionID(long session_timeout_ms);
+
+    std::unordered_set<int64_t> getDeadSessions();
 
     void addServer(int server_id_, const std::string & server_uri, bool can_become_leader_, int32_t priority);
 
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index b6521e1d648..8e22da81081 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -43,8 +43,9 @@ nuraft::ptr<nuraft::buffer> writeResponses(NuKeeperStorage::ResponsesForSessions
 }
 
 
-NuKeeperStateMachine::NuKeeperStateMachine()
-    : last_committed_idx(0)
+NuKeeperStateMachine::NuKeeperStateMachine(long tick_time)
+    : storage(tick_time)
+    , last_committed_idx(0)
     , log(&Poco::Logger::get("NuRaftStateMachine"))
 {
     LOG_DEBUG(log, "Created nukeeper state machine");
@@ -52,15 +53,19 @@ NuKeeperStateMachine::NuKeeperStateMachine()
 
 nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
 {
-    if (data.size() == sizeof(size_t))
+    if (data.size() == sizeof(long))
     {
-        LOG_DEBUG(log, "Session ID response {}", log_idx);
+        nuraft::buffer_serializer timeout_data(data);
+        long session_timeout_ms = timeout_data.get_i64();
         auto response = nuraft::buffer::alloc(sizeof(size_t));
+        int64_t session_id;
         nuraft::buffer_serializer bs(response);
         {
             std::lock_guard lock(storage_lock);
-            bs.put_i64(storage.getSessionID());
+            session_id = storage.getSessionID(session_timeout_ms);
+            bs.put_i64(session_id);
         }
+        LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_timeout_ms);
         last_committed_idx = log_idx;
         return response;
     }
@@ -121,7 +126,7 @@ NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::readSnapshot(nura
     NuKeeperStorageSerializer serializer;
 
     ReadBufferFromNuraftBuffer reader(in);
-    NuKeeperStorage new_storage;
+    NuKeeperStorage new_storage(500 /*FIXME*/);
     serializer.deserialize(new_storage, reader);
     return std::make_shared<StorageSnapshot>(ss, new_storage);
 }
@@ -229,4 +234,10 @@ NuKeeperStorage::ResponsesForSessions NuKeeperStateMachine::processReadRequest(c
     return storage.processRequest(request_for_session.request, request_for_session.session_id);
 }
 
+std::unordered_set<int64_t> NuKeeperStateMachine::getDeadSessions()
+{
+    std::lock_guard lock(storage_lock);
+    return storage.getDeadSessions();
+}
+
 }
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index 41c28caa76c..380588a39f0 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -10,7 +10,7 @@ namespace DB
 class NuKeeperStateMachine : public nuraft::state_machine
 {
 public:
-    NuKeeperStateMachine();
+    NuKeeperStateMachine(long tick_time);
 
     nuraft::ptr<nuraft::buffer> pre_commit(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; }
 
@@ -49,6 +49,8 @@ public:
 
     NuKeeperStorage::ResponsesForSessions processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session);
 
+    std::unordered_set<int64_t> getDeadSessions();
+
 private:
     struct StorageSnapshot
     {
diff --git a/src/Coordination/NuKeeperStorage.cpp b/src/Coordination/NuKeeperStorage.cpp
index 9a8b96d63a3..3b52b47c4bf 100644
--- a/src/Coordination/NuKeeperStorage.cpp
+++ b/src/Coordination/NuKeeperStorage.cpp
@@ -67,7 +67,8 @@ static NuKeeperStorage::ResponsesForSessions processWatchesImpl(const String & p
     return result;
 }
 
-NuKeeperStorage::NuKeeperStorage()
+NuKeeperStorage::NuKeeperStorage(long tick_time_ms)
+    : session_expiry_queue(tick_time_ms)
 {
     container.emplace("/", Node());
 }
@@ -638,6 +639,18 @@ NuKeeperStorage::ResponsesForSessions NuKeeperStorage::processRequest(const Coor
         auto response = std::make_shared<Coordination::ZooKeeperCloseResponse>();
         response->xid = zk_request->xid;
         response->zxid = getZXID();
+        session_expiry_queue.remove(session_id);
+        session_and_timeout.erase(session_id);
+        results.push_back(ResponseForSession{session_id, response});
+    }
+    else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat)
+    {
+        session_expiry_queue.update(session_id, session_and_timeout[session_id]);
+        NuKeeperStorageRequestPtr storage_request = NuKeeperWrapperFactory::instance().get(zk_request);
+        auto [response, _] = storage_request->process(container, ephemerals, zxid, session_id);
+        response->xid = zk_request->xid;
+        response->zxid = getZXID();
+
         results.push_back(ResponseForSession{session_id, response});
     }
     else
diff --git a/src/Coordination/NuKeeperStorage.h b/src/Coordination/NuKeeperStorage.h
index dce00391bce..cf881687dcb 100644
--- a/src/Coordination/NuKeeperStorage.h
+++ b/src/Coordination/NuKeeperStorage.h
@@ -4,6 +4,7 @@
 #include <Common/ZooKeeper/IKeeper.h>
 #include <Common/ConcurrentBoundedQueue.h>
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
+#include <Coordination/SessionExpiryQueue.h>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
@@ -50,6 +51,7 @@ public:
     using Container = std::map<std::string, Node>;
     using Ephemerals = std::unordered_map<int64_t, std::unordered_set<String>>;
     using SessionAndWatcher = std::unordered_map<int64_t, std::unordered_set<String>>;
+    using SessionAndTimeout = std::unordered_map<int64_t, long>;
     using SessionIDs = std::vector<int64_t>;
 
     using Watches = std::map<String /* path, relative of root_path */, SessionIDs>;
@@ -57,6 +59,8 @@ public:
     Container container;
     Ephemerals ephemerals;
     SessionAndWatcher sessions_and_watchers;
+    SessionExpiryQueue session_expiry_queue;
+    SessionAndTimeout session_and_timeout;
 
     int64_t zxid{0};
     bool finalized{false};
@@ -72,15 +76,23 @@ public:
     }
 
 public:
-    NuKeeperStorage();
+    NuKeeperStorage(long tick_time_ms);
 
-    int64_t getSessionID()
+    int64_t getSessionID(long session_timeout_ms)
     {
-        return session_id_counter++;
+        auto result = session_id_counter++;
+        session_and_timeout.emplace(result, session_timeout_ms);
+        session_expiry_queue.update(result, session_timeout_ms);
+        return result;
     }
 
     ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
     ResponsesForSessions finalize(const RequestsForSessions & expired_requests);
+
+    std::unordered_set<int64_t> getDeadSessions()
+    {
+        return session_expiry_queue.getExpiredSessions();
+    }
 };
 
 }
diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
index 9988e0ac476..cf36fd40bc3 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -59,7 +59,6 @@ void NuKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordinati
 
 bool NuKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
 {
-
     {
         std::lock_guard lock(session_to_response_callback_mutex);
         if (session_to_response_callback.count(session_id) == 0)
@@ -171,6 +170,7 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
     }
 
     processing_thread = ThreadFromGlobalPool([this] { processingThread(); });
+    session_cleaner_thread = ThreadFromGlobalPool([this] { sessionCleanerTask(); });
 
     LOG_DEBUG(log, "Dispatcher initialized");
 }
@@ -188,6 +188,9 @@ void NuKeeperStorageDispatcher::shutdown()
             LOG_DEBUG(log, "Shutting down storage dispatcher");
             shutdown_called = true;
 
+            if (session_cleaner_thread.joinable())
+                session_cleaner_thread.join();
+
             if (processing_thread.joinable())
                 processing_thread.join();
         }
@@ -225,6 +228,43 @@ void NuKeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperRes
         throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session with id {} already registered in dispatcher", session_id);
 }
 
+void NuKeeperStorageDispatcher::sessionCleanerTask()
+{
+    while (true)
+    {
+        if (shutdown_called)
+            return;
+
+        try
+        {
+            if (isLeader())
+            {
+                auto dead_sessions = server->getDeadSessions();
+                for (int64_t dead_session : dead_sessions)
+                {
+                    LOG_INFO(log, "Found dead session {}, will try to close it", dead_session);
+                    Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
+                    request->xid = Coordination::CLOSE_XID;
+                    putRequest(request, dead_session);
+                    {
+                        std::lock_guard lock(session_to_response_callback_mutex);
+                        auto session_it = session_to_response_callback.find(dead_session);
+                        if (session_it != session_to_response_callback.end())
+                            session_to_response_callback.erase(session_it);
+                    }
+                }
+            }
+        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
+
+        /*FIXME*/
+        std::this_thread::sleep_for(std::chrono::milliseconds(500));
+    }
+}
+
 void NuKeeperStorageDispatcher::finishSession(int64_t session_id)
 {
     std::lock_guard lock(session_to_response_callback_mutex);
diff --git a/src/Coordination/NuKeeperStorageDispatcher.h b/src/Coordination/NuKeeperStorageDispatcher.h
index c292cd99c4f..dfd36b39537 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.h
+++ b/src/Coordination/NuKeeperStorageDispatcher.h
@@ -27,7 +27,6 @@ class NuKeeperStorageDispatcher
 private:
     Poco::Timespan operation_timeout{0, Coordination::DEFAULT_OPERATION_TIMEOUT_MS * 1000};
 
-
     std::mutex push_request_mutex;
 
     using RequestsQueue = ConcurrentBoundedQueue<NuKeeperStorage::RequestForSession>;
@@ -40,12 +39,15 @@ private:
 
     ThreadFromGlobalPool processing_thread;
 
+    ThreadFromGlobalPool session_cleaner_thread;
+
     std::unique_ptr<NuKeeperServer> server;
 
     Poco::Logger * log;
 
 private:
     void processingThread();
+    void sessionCleanerTask();
     void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
 
 public:
@@ -69,15 +71,14 @@ public:
         return server->isLeaderAlive();
     }
 
-    int64_t getSessionID()
+    int64_t getSessionID(long session_timeout_ms)
     {
-        return server->getSessionID();
+        return server->getSessionID(session_timeout_ms);
     }
 
     void registerSession(int64_t session_id, ZooKeeperResponseCallback callback);
     /// Call if we don't need any responses for this session no more (session was expired)
     void finishSession(int64_t session_id);
-
 };
 
 }
diff --git a/src/Coordination/SessionExpiryQueue.cpp b/src/Coordination/SessionExpiryQueue.cpp
new file mode 100644
index 00000000000..45ceaee52fe
--- /dev/null
+++ b/src/Coordination/SessionExpiryQueue.cpp
@@ -0,0 +1,77 @@
+#include <Coordination/SessionExpiryQueue.h>
+#include <common/logger_useful.h>
+namespace DB
+{
+
+bool SessionExpiryQueue::remove(int64_t session_id)
+{
+    auto session_it = session_to_timeout.find(session_id);
+    if (session_it != session_to_timeout.end())
+    {
+        auto set_it = expiry_to_sessions.find(session_it->second);
+        if (set_it != expiry_to_sessions.end())
+            set_it->second.erase(session_id);
+
+        return true;
+    }
+
+    return false;
+}
+
+bool SessionExpiryQueue::update(int64_t session_id, long timeout_ms)
+{
+    auto session_it = session_to_timeout.find(session_id);
+    long now = getNowMilliseconds();
+    long new_expiry_time = roundToNextInterval(now + timeout_ms);
+
+    if (session_it != session_to_timeout.end())
+    {
+        if (new_expiry_time == session_it->second)
+            return false;
+
+        auto set_it = expiry_to_sessions.find(new_expiry_time);
+        if (set_it == expiry_to_sessions.end())
+            std::tie(set_it, std::ignore) = expiry_to_sessions.emplace(new_expiry_time, std::unordered_set<int64_t>());
+
+        set_it->second.insert(session_id);
+        long prev_expiry_time = session_it->second;
+
+        if (prev_expiry_time != new_expiry_time)
+        {
+            auto prev_set_it = expiry_to_sessions.find(prev_expiry_time);
+            if (prev_set_it != expiry_to_sessions.end())
+                prev_set_it->second.erase(session_id);
+        }
+        session_it->second = new_expiry_time;
+        return true;
+    }
+    else
+    {
+        session_to_timeout[session_id] = new_expiry_time;
+        auto set_it = expiry_to_sessions.find(new_expiry_time);
+        if (set_it == expiry_to_sessions.end())
+            std::tie(set_it, std::ignore) = expiry_to_sessions.emplace(new_expiry_time, std::unordered_set<int64_t>());
+        set_it->second.insert(session_id);
+        return false;
+    }
+}
+
+std::unordered_set<int64_t> SessionExpiryQueue::getExpiredSessions()
+{
+    long now = getNowMilliseconds();
+    if (now < next_expiration_time)
+        return {};
+
+    auto set_it = expiry_to_sessions.find(next_expiration_time);
+    long new_expiration_time = next_expiration_time + expiration_interval;
+    next_expiration_time = new_expiration_time;
+    if (set_it != expiry_to_sessions.end())
+    {
+        auto result = set_it->second;
+        expiry_to_sessions.erase(set_it);
+        return result;
+    }
+    return {};
+}
+
+}
diff --git a/src/Coordination/SessionExpiryQueue.h b/src/Coordination/SessionExpiryQueue.h
new file mode 100644
index 00000000000..4fb254526e7
--- /dev/null
+++ b/src/Coordination/SessionExpiryQueue.h
@@ -0,0 +1,43 @@
+#pragma once
+#include <unordered_map>
+#include <unordered_set>
+#include <chrono>
+
+namespace DB
+{
+
+class SessionExpiryQueue
+{
+private:
+    std::unordered_map<int64_t, long> session_to_timeout;
+    std::unordered_map<long, std::unordered_set<int64_t>> expiry_to_sessions;
+
+    long expiration_interval;
+    long next_expiration_time;
+
+    static long getNowMilliseconds()
+    {
+        using namespace std::chrono;
+        return duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
+    }
+
+    long roundToNextInterval(long time) const
+    {
+        return (time / expiration_interval + 1) * expiration_interval;
+    }
+
+public:
+    explicit SessionExpiryQueue(long expiration_interval_)
+        : expiration_interval(expiration_interval_)
+        , next_expiration_time(roundToNextInterval(getNowMilliseconds()))
+    {
+    }
+
+    bool remove(int64_t session_id);
+
+    bool update(int64_t session_id, long timeout_ms);
+
+    std::unordered_set<int64_t> getExpiredSessions();
+};
+
+}
diff --git a/src/Coordination/ya.make.in b/src/Coordination/ya.make.in
new file mode 100644
index 00000000000..ba5f8bcbea4
--- /dev/null
+++ b/src/Coordination/ya.make.in
@@ -0,0 +1,12 @@
+OWNER(g:clickhouse)
+
+LIBRARY()
+
+PEERDIR(
+    clickhouse/src/Common
+)
+
+SRCS(
+)
+
+END()
diff --git a/src/Server/NuKeeperTCPHandler.cpp b/src/Server/NuKeeperTCPHandler.cpp
index 6deee5094ca..9d39c317356 100644
--- a/src/Server/NuKeeperTCPHandler.cpp
+++ b/src/Server/NuKeeperTCPHandler.cpp
@@ -230,8 +230,8 @@ NuKeeperTCPHandler::NuKeeperTCPHandler(IServer & server_, const Poco::Net::Strea
     , log(&Poco::Logger::get("NuKeeperTCPHandler"))
     , global_context(server.context())
     , nu_keeper_storage_dispatcher(global_context.getNuKeeperStorageDispatcher())
-    , operation_timeout(0, global_context.getConfigRef().getUInt("nu_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
-    , session_timeout(0, global_context.getConfigRef().getUInt("nu_keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
+    , operation_timeout(0, global_context.getConfigRef().getUInt("test_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
+    , session_timeout(0, global_context.getConfigRef().getUInt("test_keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
     , poll_wrapper(std::make_unique<SocketInterruptablePollWrapper>(socket_))
     , responses(std::make_unique<ThreadSafeResponseQueue>())
 {
@@ -245,7 +245,7 @@ void NuKeeperTCPHandler::sendHandshake(bool has_leader)
     else /// Specially ignore connections if we are not leader, client will throw exception
         Coordination::write(42, *out);
 
-    Coordination::write(Coordination::DEFAULT_SESSION_TIMEOUT_MS, *out);
+    Coordination::write(static_cast<int32_t>(session_timeout.totalMilliseconds()), *out);
     Coordination::write(session_id, *out);
     std::array<char, Coordination::PASSWORD_LENGTH> passwd{};
     Coordination::write(passwd, *out);
@@ -257,15 +257,14 @@ void NuKeeperTCPHandler::run()
     runImpl();
 }
 
-void NuKeeperTCPHandler::receiveHandshake()
+Poco::Timespan NuKeeperTCPHandler::receiveHandshake()
 {
     int32_t handshake_length;
     int32_t protocol_version;
     int64_t last_zxid_seen;
-    int32_t timeout;
+    int32_t timeout_ms;
     int64_t previous_session_id = 0;    /// We don't support session restore. So previous session_id is always zero.
     std::array<char, Coordination::PASSWORD_LENGTH> passwd {};
-
     Coordination::read(handshake_length, *in);
     if (handshake_length != Coordination::CLIENT_HANDSHAKE_LENGTH && handshake_length != Coordination::CLIENT_HANDSHAKE_LENGTH_WITH_READONLY)
         throw Exception("Unexpected handshake length received: " + toString(handshake_length), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
@@ -280,7 +279,7 @@ void NuKeeperTCPHandler::receiveHandshake()
     if (last_zxid_seen != 0)
         throw Exception("Non zero last_zxid_seen is not supported", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
 
-    Coordination::read(timeout, *in);
+    Coordination::read(timeout_ms, *in);
     Coordination::read(previous_session_id, *in);
 
     if (previous_session_id != 0)
@@ -291,6 +290,8 @@ void NuKeeperTCPHandler::receiveHandshake()
     int8_t readonly;
     if (handshake_length == Coordination::CLIENT_HANDSHAKE_LENGTH_WITH_READONLY)
         Coordination::read(readonly, *in);
+
+    return Poco::Timespan(0, timeout_ms * 1000);
 }
 
 
@@ -316,7 +317,9 @@ void NuKeeperTCPHandler::runImpl()
 
     try
     {
-        receiveHandshake();
+        auto client_timeout = receiveHandshake();
+        if (client_timeout != 0)
+            session_timeout = std::min(client_timeout, session_timeout);
     }
     catch (const Exception & e) /// Typical for an incorrect username, password, or address.
     {
@@ -328,7 +331,7 @@ void NuKeeperTCPHandler::runImpl()
     {
         try
         {
-            session_id = nu_keeper_storage_dispatcher->getSessionID();
+            session_id = nu_keeper_storage_dispatcher->getSessionID(session_timeout.totalMilliseconds());
         }
         catch (const Exception & e)
         {
@@ -416,7 +419,7 @@ void NuKeeperTCPHandler::runImpl()
             if (session_stopwatch.elapsedMicroseconds() > static_cast<UInt64>(session_timeout.totalMicroseconds()))
             {
                 LOG_DEBUG(log, "Session #{} expired", session_id);
-                finish();
+                nu_keeper_storage_dispatcher->finishSession(session_id);
                 break;
             }
         }
@@ -424,21 +427,10 @@ void NuKeeperTCPHandler::runImpl()
     catch (const Exception & ex)
     {
         LOG_INFO(log, "Got exception processing session #{}: {}", session_id, getExceptionMessage(ex, true));
-        finish();
+        nu_keeper_storage_dispatcher->finishSession(session_id);
     }
 }
 
-void NuKeeperTCPHandler::finish()
-{
-    Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
-    request->xid = close_xid;
-    /// Put close request (so storage will remove all info about session)
-    nu_keeper_storage_dispatcher->putRequest(request, session_id);
-    /// We don't need any callbacks because session can be already dead and
-    /// nobody wait for response
-    nu_keeper_storage_dispatcher->finishSession(session_id);
-}
-
 std::pair<Coordination::OpNum, Coordination::XID> NuKeeperTCPHandler::receiveRequest()
 {
     int32_t length;
diff --git a/src/Server/NuKeeperTCPHandler.h b/src/Server/NuKeeperTCPHandler.h
index 1874b8cd309..641d2f78e1f 100644
--- a/src/Server/NuKeeperTCPHandler.h
+++ b/src/Server/NuKeeperTCPHandler.h
@@ -53,10 +53,9 @@ private:
     void runImpl();
 
     void sendHandshake(bool has_leader);
-    void receiveHandshake();
+    Poco::Timespan receiveHandshake();
 
     std::pair<Coordination::OpNum, Coordination::XID> receiveRequest();
-    void finish();
 };
 
 }
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
index 81f68f50c7c..e1b6da40338 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
@@ -1,8 +1,8 @@
 <yandex>
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
-        <operation_timeout_ms>10000</operation_timeout_ms>
-        <session_timeout_ms>30000</session_timeout_ms>
+        <operation_timeout_ms>5000</operation_timeout_ms>
+        <session_timeout_ms>10000</session_timeout_ms>
         <server_id>1</server_id>
         <raft_configuration>
             <server>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
index 73340973367..7622aa164da 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
@@ -1,8 +1,8 @@
 <yandex>
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
-        <operation_timeout_ms>10000</operation_timeout_ms>
-        <session_timeout_ms>30000</session_timeout_ms>
+        <operation_timeout_ms>5000</operation_timeout_ms>
+        <session_timeout_ms>10000</session_timeout_ms>
         <server_id>2</server_id>
         <raft_configuration>
             <server>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
index fbc51489d11..1edbfa7271e 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
@@ -1,8 +1,8 @@
 <yandex>
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
-        <operation_timeout_ms>10000</operation_timeout_ms>
-        <session_timeout_ms>30000</session_timeout_ms>
+        <operation_timeout_ms>5000</operation_timeout_ms>
+        <session_timeout_ms>10000</session_timeout_ms>
         <server_id>3</server_id>
         <raft_configuration>
             <server>

From 46f5268135ec92b248f5246b2a7a574fdb49f7b7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 3 Feb 2021 23:46:28 +0300
Subject: [PATCH 0608/1238] Fix incorrect WriteBuffer interface usage in
 message brokers producers

WriteBufferToKafkaProducer and WriteBufferToRabbitMQProducer uses
WriteBuffer::set(nullptr, 0) which will leave the WriteBuffer in the
invalid state, since after this available() is 0 and next() will not
call nextImpl().

Stacktrace:

    2021.02.02 05:27:16.248056 [ 97 ] {} <Fatal> BaseDaemon: ########################################
    2021.02.02 05:27:16.248697 [ 97 ] {} <Fatal> BaseDaemon: (version 21.3.1.5880, build id: 03F265087EF651DC4D8E569775FE9E91FFA5DE76) (from thread 93) (query_id: 8c34c220-f66a-45d2-8f4f-01193cede98b) Received signal Segmentation fault (11)
    2021.02.02 05:27:16.249030 [ 97 ] {} <Fatal> BaseDaemon: Address: NULL pointer. Access: write. Address not mapped to object.
    2021.02.02 05:27:16.249623 [ 97 ] {} <Fatal> BaseDaemon: Stack trace: 0x12f3dbeb 0x12efd075 0x12efc90e 0x12ebb0e9 0x12ebe0cb 0x12e1da18 0x12a8c857 0x1219aa62 0x1218fc82 0x1218fdaa 0x11e3f120 0x12dfa3df 0x12df42a4 0x12e00fa8 0x158cdc63 0x158ce38f 0x15a36b72 0x15a35110 0x15a33918 0x8adf12d 0x7fb5e0fe3609 0x7fb5e0f0a293
    2021.02.02 05:27:16.408985 [ 97 ] {} <Fatal> BaseDaemon: 5. ./obj-x86_64-linux-gnu/../src/IO/WriteHelpers.h:64: DB::JSONEachRowRowOutputFormat::writeRowStartDelimiter() @ 0x12f3dbeb in /usr/bin/clickhouse
    2021.02.02 05:27:16.534797 [ 97 ] {} <Fatal> BaseDaemon: 6. ./obj-x86_64-linux-gnu/../src/Processors/Formats/IRowOutputFormat.cpp:80: DB::IRowOutputFormat::write(std::__1::vector<COW<DB::IColumn>::immutable_ptr<DB::IColumn>, std::__1::allocator<COW<DB::IColumn>::immutable_ptr<DB::IColumn> > > const&, unsigned long) @ 0x12efd075 in /usr/bin/clickhouse
    2021.02.02 05:27:16.599210 [ 97 ] {} <Fatal> BaseDaemon: 7. ./obj-x86_64-linux-gnu/../src/Processors/Formats/IRowOutputFormat.cpp:0: DB::IRowOutputFormat::consume(DB::Chunk) @ 0x12efc90e in /usr/bin/clickhouse
    2021.02.02 05:27:16.680733 [ 97 ] {} <Fatal> BaseDaemon: 8. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/memory:3211: DB::IOutputFormat::write(DB::Block const&) @ 0x12ebb0e9 in /usr/bin/clickhouse
    2021.02.02 05:27:16.798768 [ 97 ] {} <Fatal> BaseDaemon: 9. ./obj-x86_64-linux-gnu/../src/Processors/Formats/OutputStreamToOutputFormat.cpp:15: DB::OutputStreamToOutputFormat::write(DB::Block const&) @ 0x12ebe0cb in /usr/bin/clickhouse
    2021.02.02 05:27:16.981593 [ 97 ] {} <Fatal> BaseDaemon: 10. ./obj-x86_64-linux-gnu/../src/DataStreams/MaterializingBlockOutputStream.h:0: DB::MaterializingBlockOutputStream::write(DB::Block const&) @ 0x12e1da18 in /usr/bin/clickhouse
    2021.02.02 05:27:17.090383 [ 97 ] {} <Fatal> BaseDaemon: 11. ./obj-x86_64-linux-gnu/../src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp:61: DB::RabbitMQBlockOutputStream::write(DB::Block const&) @ 0x12a8c857 in /usr/bin/clickhouse
    2021.02.02 05:27:17.140425 [ 97 ] {} <Fatal> BaseDaemon: 12. ./obj-x86_64-linux-gnu/../src/DataStreams/AddingDefaultBlockOutputStream.cpp:0: DB::AddingDefaultBlockOutputStream::write(DB::Block const&) @ 0x1219aa62 in /usr/bin/clickhouse
    2021.02.02 05:27:17.190282 [ 97 ] {} <Fatal> BaseDaemon: 13. ./obj-x86_64-linux-gnu/../src/DataStreams/SquashingBlockOutputStream.cpp:0: DB::SquashingBlockOutputStream::finalize() @ 0x1218fc82 in /usr/bin/clickhouse
    2021.02.02 05:27:17.240052 [ 97 ] {} <Fatal> BaseDaemon: 14. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/memory:2844: DB::SquashingBlockOutputStream::writeSuffix() @ 0x1218fdaa in /usr/bin/clickhouse
    2021.02.02 05:27:17.294527 [ 97 ] {} <Fatal> BaseDaemon: 15. ./obj-x86_64-linux-gnu/../src/DataStreams/CountingBlockOutputStream.h:37: DB::CountingBlockOutputStream::writeSuffix() @ 0x11e3f120 in /usr/bin/clickhouse
    2021.02.02 05:27:17.405442 [ 97 ] {} <Fatal> BaseDaemon: 16. ./obj-x86_64-linux-gnu/../src/Server/TCPHandler.cpp:521: DB::TCPHandler::processInsertQuery(DB::Settings const&) @ 0x12dfa3df in /usr/bin/clickhouse
    2021.02.02 05:27:17.475822 [ 97 ] {} <Fatal> BaseDaemon: 17. ./obj-x86_64-linux-gnu/../src/Server/TCPHandler.cpp:0: DB::TCPHandler::runImpl() @ 0x12df42a4 in /usr/bin/clickhouse
    2021.02.02 05:27:17.609845 [ 97 ] {} <Fatal> BaseDaemon: 18. ./obj-x86_64-linux-gnu/../src/Server/TCPHandler.cpp:1419: DB::TCPHandler::run() @ 0x12e00fa8 in /usr/bin/clickhouse
    2021.02.02 05:27:17.695292 [ 97 ] {} <Fatal> BaseDaemon: 19. ./obj-x86_64-linux-gnu/../contrib/poco/Net/src/TCPServerConnection.cpp:57: Poco::Net::TCPServerConnection::start() @ 0x158cdc63 in /usr/bin/clickhouse
    2021.02.02 05:27:17.833612 [ 97 ] {} <Fatal> BaseDaemon: 20. ./obj-x86_64-linux-gnu/../contrib/poco/Net/src/TCPServerDispatcher.cpp:0: Poco::Net::TCPServerDispatcher::run() @ 0x158ce38f in /usr/bin/clickhouse
    2021.02.02 05:27:17.907391 [ 97 ] {} <Fatal> BaseDaemon: 21. ./obj-x86_64-linux-gnu/../contrib/poco/Foundation/include/Poco/ScopedLock.h:36: Poco::PooledThread::run() @ 0x15a36b72 in /usr/bin/clickhouse
    2021.02.02 05:27:18.033949 [ 97 ] {} <Fatal> BaseDaemon: 22. ./obj-x86_64-linux-gnu/../contrib/poco/Foundation/src/Thread.cpp:56: Poco::(anonymous namespace)::RunnableHolder::run() @ 0x15a35110 in /usr/bin/clickhouse
    2021.02.02 05:27:18.122761 [ 97 ] {} <Fatal> BaseDaemon: 23. ./obj-x86_64-linux-gnu/../contrib/poco/Foundation/include/Poco/SharedPtr.h:277: Poco::ThreadImpl::runnableEntry(void*) @ 0x15a33918 in /usr/bin/clickhouse
    2021.02.02 05:27:18.283663 [ 97 ] {} <Fatal> BaseDaemon: 24. __tsan_thread_start_func @ 0x8adf12d in /usr/bin/clickhouse
    2021.02.02 05:27:18.284153 [ 97 ] {} <Fatal> BaseDaemon: 25. start_thread @ 0x9609 in /usr/lib/x86_64-linux-gnu/libpthread-2.31.so
    2021.02.02 05:27:18.284544 [ 97 ] {} <Fatal> BaseDaemon: 26. __clone @ 0x122293 in /usr/lib/x86_64-linux-gnu/libc-2.31.so
    2021.02.02 05:27:21.675458 [ 97 ] {} <Fatal> BaseDaemon: Calculated checksum of the binary: B53B58DB6CF8186EEC9EA9273F135E44. There is no information about the reference checksum.
    2021.02.02 05:27:28.078805 [ 1 ] {} <Fatal> Application: Child process was terminated by signal 11.
---
 .../Kafka/WriteBufferToKafkaProducer.cpp         | 16 +++++++++++++---
 src/Storages/Kafka/WriteBufferToKafkaProducer.h  |  1 +
 .../RabbitMQ/WriteBufferToRabbitMQProducer.cpp   | 16 ++++++++++++----
 .../RabbitMQ/WriteBufferToRabbitMQProducer.h     |  2 ++
 4 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp b/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp
index c6d365ce2fe..99e53f3b133 100644
--- a/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp
+++ b/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp
@@ -42,6 +42,8 @@ WriteBufferToKafkaProducer::WriteBufferToKafkaProducer(
             timestamp_column_index = column_index;
         }
     }
+
+    reinitializeChunks();
 }
 
 WriteBufferToKafkaProducer::~WriteBufferToKafkaProducer()
@@ -108,9 +110,7 @@ void WriteBufferToKafkaProducer::countRow(const Columns & columns, size_t curren
             break;
         }
 
-        rows = 0;
-        chunks.clear();
-        set(nullptr, 0);
+        reinitializeChunks();
     }
 }
 
@@ -141,4 +141,14 @@ void WriteBufferToKafkaProducer::nextImpl()
     set(chunks.back().data(), chunk_size);
 }
 
+void WriteBufferToKafkaProducer::reinitializeChunks()
+{
+    rows = 0;
+    chunks.clear();
+    /// We cannot leave the buffer in the undefined state (i.e. without any
+    /// underlying buffer), since in this case the WriteBuffeR::next() will
+    /// not call our nextImpl() (due to available() == 0)
+    nextImpl();
+}
+
 }
diff --git a/src/Storages/Kafka/WriteBufferToKafkaProducer.h b/src/Storages/Kafka/WriteBufferToKafkaProducer.h
index 76859c4e33f..ee25f54c437 100644
--- a/src/Storages/Kafka/WriteBufferToKafkaProducer.h
+++ b/src/Storages/Kafka/WriteBufferToKafkaProducer.h
@@ -30,6 +30,7 @@ public:
 
 private:
     void nextImpl() override;
+    void reinitializeChunks();
 
     ProducerPtr producer;
     const std::string topic;
diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
index 8bf979755d2..91ba132d289 100644
--- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
+++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
@@ -55,7 +55,6 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer(
         , max_rows(rows_per_message)
         , chunk_size(chunk_size_)
 {
-
     loop = std::make_unique<uv_loop_t>();
     uv_loop_init(loop.get());
     event_handler = std::make_unique<RabbitMQHandler>(loop.get(), log);
@@ -85,6 +84,8 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer(
             key_arguments[matching[0]] = matching[1];
         }
     }
+
+    reinitializeChunks();
 }
 
 
@@ -122,9 +123,7 @@ void WriteBufferToRabbitMQProducer::countRow()
 
         payload.append(last_chunk, 0, last_chunk_size);
 
-        rows = 0;
-        chunks.clear();
-        set(nullptr, 0);
+        reinitializeChunks();
 
         ++payload_counter;
         payloads.push(std::make_pair(payload_counter, payload));
@@ -331,6 +330,15 @@ void WriteBufferToRabbitMQProducer::nextImpl()
     chunks.back().resize(chunk_size);
     set(chunks.back().data(), chunk_size);
 }
+void WriteBufferToRabbitMQProducer::reinitializeChunks()
+{
+    rows = 0;
+    chunks.clear();
+    /// We cannot leave the buffer in the undefined state (i.e. without any
+    /// underlying buffer), since in this case the WriteBuffeR::next() will
+    /// not call our nextImpl() (due to available() == 0)
+    nextImpl();
+}
 
 
 void WriteBufferToRabbitMQProducer::iterateEventLoop()
diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
index 6fa4ca9587f..8b9d2b99d4c 100644
--- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
+++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
@@ -41,6 +41,8 @@ public:
 
 private:
     void nextImpl() override;
+    void reinitializeChunks();
+
     void iterateEventLoop();
     void writingFunc();
     bool setupConnection(bool reconnecting);

From 50d8a87c278bbcb1ee5c9c3f177bc6acd2c80a8a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 3 Feb 2021 21:13:24 +0300
Subject: [PATCH 0609/1238] Fix logging of elapsed time while pushing to views

Logging from PushingToViewsBlockOutputStream::write() is incorrect,
since it does not takes into account squashing (default to 1<<20 rows
and 256<<20), whle write will be done from
PushingToViewsBlockOutputStream::writeSuffix().

Fixes: #19378
---
 .../PushingToViewsBlockOutputStream.cpp       | 44 +++++++++++--------
 .../PushingToViewsBlockOutputStream.h         |  3 ++
 2 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp
index a6e0dcd7356..d2d30262dcb 100644
--- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp
+++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp
@@ -121,7 +121,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
             out = std::make_shared<PushingToViewsBlockOutputStream>(
                 dependent_table, dependent_metadata_snapshot, *insert_context, ASTPtr());
 
-        views.emplace_back(ViewInfo{std::move(query), database_table, std::move(out), nullptr});
+        views.emplace_back(ViewInfo{std::move(query), database_table, std::move(out), nullptr, 0 /* elapsed_ms */});
     }
 
     /// Do not push to destination table if the flag is set
@@ -146,8 +146,6 @@ Block PushingToViewsBlockOutputStream::getHeader() const
 
 void PushingToViewsBlockOutputStream::write(const Block & block)
 {
-    Stopwatch watch;
-
     /** Throw an exception if the sizes of arrays - elements of nested data structures doesn't match.
       * We have to make this assertion before writing to table, because storage engine may assume that they have equal sizes.
       * NOTE It'd better to do this check in serialization of nested structures (in place when this assumption is required),
@@ -202,14 +200,6 @@ void PushingToViewsBlockOutputStream::write(const Block & block)
                 std::rethrow_exception(views[view_num].exception);
         }
     }
-
-    UInt64 milliseconds = watch.elapsedMilliseconds();
-    if (views.size() > 1)
-    {
-        LOG_TRACE(log, "Pushing from {} to {} views took {} ms.",
-            storage->getStorageID().getNameForLogs(), views.size(),
-            milliseconds);
-    }
 }
 
 void PushingToViewsBlockOutputStream::writePrefix()
@@ -257,12 +247,13 @@ void PushingToViewsBlockOutputStream::writeSuffix()
             if (view.exception)
                 continue;
 
-            pool.scheduleOrThrowOnError([thread_group, &view]
+            pool.scheduleOrThrowOnError([thread_group, &view, this]
             {
                 setThreadName("PushingToViews");
                 if (thread_group)
                     CurrentThread::attachToIfDetached(thread_group);
 
+                Stopwatch watch;
                 try
                 {
                     view.out->writeSuffix();
@@ -271,6 +262,12 @@ void PushingToViewsBlockOutputStream::writeSuffix()
                 {
                     view.exception = std::current_exception();
                 }
+                view.elapsed_ms += watch.elapsedMilliseconds();
+
+                LOG_TRACE(log, "Pushing from {} to {} took {} ms.",
+                    storage->getStorageID().getNameForLogs(),
+                    view.table_id.getNameForLogs(),
+                    view.elapsed_ms);
             });
         }
         // Wait for concurrent view processing
@@ -290,6 +287,7 @@ void PushingToViewsBlockOutputStream::writeSuffix()
         if (parallel_processing)
             continue;
 
+        Stopwatch watch;
         try
         {
             view.out->writeSuffix();
@@ -299,10 +297,24 @@ void PushingToViewsBlockOutputStream::writeSuffix()
             ex.addMessage("while write prefix to view " + view.table_id.getNameForLogs());
             throw;
         }
+        view.elapsed_ms += watch.elapsedMilliseconds();
+
+        LOG_TRACE(log, "Pushing from {} to {} took {} ms.",
+            storage->getStorageID().getNameForLogs(),
+            view.table_id.getNameForLogs(),
+            view.elapsed_ms);
     }
 
     if (first_exception)
         std::rethrow_exception(first_exception);
+
+    UInt64 milliseconds = main_watch.elapsedMilliseconds();
+    if (views.size() > 1)
+    {
+        LOG_TRACE(log, "Pushing from {} to {} views took {} ms.",
+            storage->getStorageID().getNameForLogs(), views.size(),
+            milliseconds);
+    }
 }
 
 void PushingToViewsBlockOutputStream::flush()
@@ -316,8 +328,8 @@ void PushingToViewsBlockOutputStream::flush()
 
 void PushingToViewsBlockOutputStream::process(const Block & block, size_t view_num)
 {
-    Stopwatch watch;
     auto & view = views[view_num];
+    Stopwatch watch;
 
     try
     {
@@ -379,11 +391,7 @@ void PushingToViewsBlockOutputStream::process(const Block & block, size_t view_n
         view.exception = std::current_exception();
     }
 
-    UInt64 milliseconds = watch.elapsedMilliseconds();
-    LOG_TRACE(log, "Pushing from {} to {} took {} ms.",
-        storage->getStorageID().getNameForLogs(),
-        view.table_id.getNameForLogs(),
-        milliseconds);
+    view.elapsed_ms += watch.elapsedMilliseconds();
 }
 
 }
diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.h b/src/DataStreams/PushingToViewsBlockOutputStream.h
index 30a223d26a3..d0d5eb0ea70 100644
--- a/src/DataStreams/PushingToViewsBlockOutputStream.h
+++ b/src/DataStreams/PushingToViewsBlockOutputStream.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <DataStreams/IBlockOutputStream.h>
+#include <Common/Stopwatch.h>
 #include <Parsers/IAST_fwd.h>
 #include <Storages/IStorage.h>
 
@@ -44,6 +45,7 @@ private:
 
     const Context & context;
     ASTPtr query_ptr;
+    Stopwatch main_watch;
 
     struct ViewInfo
     {
@@ -51,6 +53,7 @@ private:
         StorageID table_id;
         BlockOutputStreamPtr out;
         std::exception_ptr exception;
+        UInt64 elapsed_ms = 0;
     };
 
     std::vector<ViewInfo> views;

From c59f22d7b4823875a4d5f021329b05be81a1fd69 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 3 Feb 2021 21:13:24 +0300
Subject: [PATCH 0610/1238] Change PushingToViewsBlockOutputStream::process, to
 accept ref to ViewInfo

It is safe, no need to use quirks with passing view_num.
---
 .../PushingToViewsBlockOutputStream.cpp         | 17 ++++++++---------
 .../PushingToViewsBlockOutputStream.h           |  2 +-
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp
index d2d30262dcb..4d1990ffe18 100644
--- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp
+++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp
@@ -175,15 +175,15 @@ void PushingToViewsBlockOutputStream::write(const Block & block)
     {
         // Push to views concurrently if enabled and more than one view is attached
         ThreadPool pool(std::min(size_t(settings.max_threads), views.size()));
-        for (size_t view_num = 0; view_num < views.size(); ++view_num)
+        for (auto & view : views)
         {
             auto thread_group = CurrentThread::getGroup();
-            pool.scheduleOrThrowOnError([=, this]
+            pool.scheduleOrThrowOnError([=, &view, this]
             {
                 setThreadName("PushingToViews");
                 if (thread_group)
                     CurrentThread::attachToIfDetached(thread_group);
-                process(block, view_num);
+                process(block, view);
             });
         }
         // Wait for concurrent view processing
@@ -192,12 +192,12 @@ void PushingToViewsBlockOutputStream::write(const Block & block)
     else
     {
         // Process sequentially
-        for (size_t view_num = 0; view_num < views.size(); ++view_num)
+        for (auto & view : views)
         {
-            process(block, view_num);
+            process(block, view);
 
-            if (views[view_num].exception)
-                std::rethrow_exception(views[view_num].exception);
+            if (view.exception)
+                std::rethrow_exception(view.exception);
         }
     }
 }
@@ -326,9 +326,8 @@ void PushingToViewsBlockOutputStream::flush()
         view.out->flush();
 }
 
-void PushingToViewsBlockOutputStream::process(const Block & block, size_t view_num)
+void PushingToViewsBlockOutputStream::process(const Block & block, ViewInfo & view)
 {
-    auto & view = views[view_num];
     Stopwatch watch;
 
     try
diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.h b/src/DataStreams/PushingToViewsBlockOutputStream.h
index d0d5eb0ea70..6b32607b294 100644
--- a/src/DataStreams/PushingToViewsBlockOutputStream.h
+++ b/src/DataStreams/PushingToViewsBlockOutputStream.h
@@ -60,7 +60,7 @@ private:
     std::unique_ptr<Context> select_context;
     std::unique_ptr<Context> insert_context;
 
-    void process(const Block & block, size_t view_num);
+    void process(const Block & block, ViewInfo & view);
 };
 
 
From da79469092cdead7f2ace1fd19e606afc137fa1f Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 4 Feb 2021 00:38:06 +0300
Subject: [PATCH 0611/1238] Add quantilesTimingWeighted into documentation

---
 .../reference/quantiletimingweighted.md       | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md
index 0f8606986c8..817cd831d85 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md
@@ -79,6 +79,40 @@ Result:
 └───────────────────────────────────────────────┘
 ```
 
+# quantilesTimingWeighted {#quantilestimingweighted}
+
+Same as `quantileTimingWeighted`, but accept multiple parameters with quantile levels and return an Array filled with many values of that quantiles.
+
+
+**Example**
+
+Input table:
+
+``` text
+┌─response_time─┬─weight─┐
+│            68 │      1 │
+│           104 │      2 │
+│           112 │      3 │
+│           126 │      2 │
+│           138 │      1 │
+│           162 │      1 │
+└───────────────┴────────┘
+```
+
+Query:
+
+``` sql
+SELECT quantilesTimingWeighted(0,5, 0.99)(response_time, weight) FROM t
+```
+
+Result:
+
+``` text
+┌─quantilesTimingWeighted(0.5, 0.99)(response_time, weight)─┐
+│ [112,162]                                                 │
+└───────────────────────────────────────────────────────────┘
+```
+
 **See Also**
 
 -   [median](../../../sql-reference/aggregate-functions/reference/median.md#median)

From 984445b44ba8c1601c5638c4a1d452462563a9d2 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 2 Feb 2021 23:31:03 +0300
Subject: [PATCH 0612/1238] Fix UBsan reports in quantileTiming

UBsan reports [1]:

    ../src/AggregateFunctions/QuantileTiming.h:442:27: runtime error: 1.84467e+19 is outside the range of representable values of type 'unsigned long' Received signal -3 Received signal Unknown signal (-3)

  [1]: https://clickhouse-test-reports.s3.yandex.net/19971/e15f5d9cb5b36482d1ae9ca069074fb200f2ab37/fuzzer_ubsan/report.html#fail1

Follow-up for: #19394
---
 src/AggregateFunctions/QuantileTiming.h       | 10 +++---
 .../01690_quantilesTiming_ubsan.reference     |  2 ++
 .../01690_quantilesTiming_ubsan.sql           | 31 +++++++++++++++++++
 3 files changed, 39 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/01690_quantilesTiming_ubsan.reference
 create mode 100644 tests/queries/0_stateless/01690_quantilesTiming_ubsan.sql

diff --git a/src/AggregateFunctions/QuantileTiming.h b/src/AggregateFunctions/QuantileTiming.h
index 6070f264ad6..dd6d923a5a0 100644
--- a/src/AggregateFunctions/QuantileTiming.h
+++ b/src/AggregateFunctions/QuantileTiming.h
@@ -32,6 +32,8 @@ namespace ErrorCodes
   * - a histogram (that is, value -> number), consisting of two parts
   * -- for values from 0 to 1023 - in increments of 1;
   * -- for values from 1024 to 30,000 - in increments of 16;
+  *
+  * NOTE: 64-bit integer weight can overflow, see also QantileExactWeighted.h::get()
   */
 
 #define TINY_MAX_ELEMS 31
@@ -396,9 +398,9 @@ namespace detail
         /// Get the value of the `level` quantile. The level must be between 0 and 1.
         UInt16 get(double level) const
         {
-            UInt64 pos = std::ceil(count * level);
+            double pos = std::ceil(count * level);
 
-            UInt64 accumulated = 0;
+            double accumulated = 0;
             Iterator it(*this);
 
             while (it.isValid())
@@ -422,9 +424,9 @@ namespace detail
             const auto * indices_end = indices + size;
             const auto * index = indices;
 
-            UInt64 pos = std::ceil(count * levels[*index]);
+            double pos = std::ceil(count * levels[*index]);
 
-            UInt64 accumulated = 0;
+            double accumulated = 0;
             Iterator it(*this);
 
             while (it.isValid())
diff --git a/tests/queries/0_stateless/01690_quantilesTiming_ubsan.reference b/tests/queries/0_stateless/01690_quantilesTiming_ubsan.reference
new file mode 100644
index 00000000000..b3c946cbaec
--- /dev/null
+++ b/tests/queries/0_stateless/01690_quantilesTiming_ubsan.reference
@@ -0,0 +1,2 @@
+[0]
+[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
diff --git a/tests/queries/0_stateless/01690_quantilesTiming_ubsan.sql b/tests/queries/0_stateless/01690_quantilesTiming_ubsan.sql
new file mode 100644
index 00000000000..b2a5ab61efc
--- /dev/null
+++ b/tests/queries/0_stateless/01690_quantilesTiming_ubsan.sql
@@ -0,0 +1,31 @@
+-- NOTE: that due to overflows it may give different result before
+-- quantilesTimingWeighted() had been converted to double:
+--
+-- Before:
+--
+--     SELECT quantilesTimingWeighted(1)(number, 9223372036854775807)
+--     FROM numbers(2)
+--
+--     ┌─quantilesTimingWeighted(1)(number, 9223372036854775807)─┐
+--     │ [1]                                                     │
+--     └─────────────────────────────────────────────────────────┘
+--
+-- After:
+--
+--     SELECT quantilesTimingWeighted(1)(number, 9223372036854775807)
+--     FROM numbers(2)
+--
+--     ┌─quantilesTimingWeighted(1)(number, 9223372036854775807)─┐
+--     │ [0]                                                     │
+--     └─────────────────────────────────────────────────────────┘
+
+SELECT quantilesTimingWeighted(0.1)(number, 9223372036854775807) FROM numbers(2);
+
+-- same UB, but in the inner loop
+SELECT quantilesTimingWeighted(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(number, 9223372036854775807)
+FROM
+(
+    SELECT number
+    FROM system.numbers
+    LIMIT 100
+);

From fb4596f433366105064ec2b8bc42cb7b32f88205 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 4 Feb 2021 02:08:41 +0300
Subject: [PATCH 0613/1238] Fix clang-analyzer-optin.cplusplus.VirtualCall

---
 src/Storages/Kafka/WriteBufferToKafkaProducer.cpp       | 7 ++++++-
 src/Storages/Kafka/WriteBufferToKafkaProducer.h         | 1 +
 src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp | 8 +++++++-
 src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h   | 1 +
 4 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp b/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp
index 99e53f3b133..dbb18b56769 100644
--- a/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp
+++ b/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp
@@ -135,6 +135,11 @@ void WriteBufferToKafkaProducer::flush()
 }
 
 void WriteBufferToKafkaProducer::nextImpl()
+{
+    addChunk();
+}
+
+void WriteBufferToKafkaProducer::addChunk()
 {
     chunks.push_back(std::string());
     chunks.back().resize(chunk_size);
@@ -148,7 +153,7 @@ void WriteBufferToKafkaProducer::reinitializeChunks()
     /// We cannot leave the buffer in the undefined state (i.e. without any
     /// underlying buffer), since in this case the WriteBuffeR::next() will
     /// not call our nextImpl() (due to available() == 0)
-    nextImpl();
+    addChunk();
 }
 
 }
diff --git a/src/Storages/Kafka/WriteBufferToKafkaProducer.h b/src/Storages/Kafka/WriteBufferToKafkaProducer.h
index ee25f54c437..15881b7a8e5 100644
--- a/src/Storages/Kafka/WriteBufferToKafkaProducer.h
+++ b/src/Storages/Kafka/WriteBufferToKafkaProducer.h
@@ -30,6 +30,7 @@ public:
 
 private:
     void nextImpl() override;
+    void addChunk();
     void reinitializeChunks();
 
     ProducerPtr producer;
diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
index 91ba132d289..08b95d46115 100644
--- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
+++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
@@ -325,11 +325,17 @@ void WriteBufferToRabbitMQProducer::writingFunc()
 
 
 void WriteBufferToRabbitMQProducer::nextImpl()
+{
+    addChunk();
+}
+
+void WriteBufferToRabbitMQProducer::addChunk()
 {
     chunks.push_back(std::string());
     chunks.back().resize(chunk_size);
     set(chunks.back().data(), chunk_size);
 }
+
 void WriteBufferToRabbitMQProducer::reinitializeChunks()
 {
     rows = 0;
@@ -337,7 +343,7 @@ void WriteBufferToRabbitMQProducer::reinitializeChunks()
     /// We cannot leave the buffer in the undefined state (i.e. without any
     /// underlying buffer), since in this case the WriteBuffeR::next() will
     /// not call our nextImpl() (due to available() == 0)
-    nextImpl();
+    addChunk();
 }
 
 
diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
index 8b9d2b99d4c..2897e20b21d 100644
--- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
+++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
@@ -41,6 +41,7 @@ public:
 
 private:
     void nextImpl() override;
+    void addChunk();
     void reinitializeChunks();
 
     void iterateEventLoop();

From 1795735950f7a1d223fcb164089e04df2fc682a7 Mon Sep 17 00:00:00 2001
From: hexiaoting <hewenting_ict@163.com>
Date: Thu, 4 Feb 2021 10:23:03 +0800
Subject: [PATCH 0614/1238] Remove create-db sql in test case

---
 .../01653_move_conditions_from_join_on_to_where.sql           | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
index 9ec8f0fe156..259ff822f3f 100644
--- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
+++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
@@ -1,6 +1,3 @@
-DROP DATABASE IF EXISTS test_01653;
-CREATE DATABASE test_01653;
-USE test_01653;
 DROP TABLE IF EXISTS table1;
 DROP TABLE IF EXISTS table2;
 
@@ -39,4 +36,3 @@ SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt
 
 DROP TABLE table1;
 DROP TABLE table2;
-DROP DATABASE test_01653;

From 3913f39211e4015eabb472d4ae630c1002d9f37f Mon Sep 17 00:00:00 2001
From: Russ Frank <rf@oden.io>
Date: Wed, 3 Feb 2021 22:49:28 -0500
Subject: [PATCH 0615/1238] Add `deltaSum` aggregate function, docs & test

---
 .../aggregate-functions/reference/deltasum.md | 19 ++++
 .../AggregateFunctionDeltaSum.cpp             | 42 ++++++++
 .../AggregateFunctionDeltaSum.h               | 95 +++++++++++++++++++
 .../registerAggregateFunctions.cpp            |  2 +
 src/AggregateFunctions/ya.make                |  1 +
 .../0_stateless/01700_deltasum.reference      |  5 +
 tests/queries/0_stateless/01700_deltasum.sql  |  5 +
 7 files changed, 169 insertions(+)
 create mode 100644 docs/en/sql-reference/aggregate-functions/reference/deltasum.md
 create mode 100644 src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
 create mode 100644 src/AggregateFunctions/AggregateFunctionDeltaSum.h
 create mode 100644 tests/queries/0_stateless/01700_deltasum.reference
 create mode 100644 tests/queries/0_stateless/01700_deltasum.sql

diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md
new file mode 100644
index 00000000000..1a21564d5b7
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md
@@ -0,0 +1,19 @@
+---
+toc_priority: 141
+---
+
+# deltaSum {#agg_functions-deltasum}
+
+Syntax: `deltaSum(value)`
+
+Adds the differences between consecutive rows. If the difference is negative, it is ignored. 
+`value` must be some integer, floating point or decimal type.
+
+
+Example:
+
+```sql
+select deltaSum(arrayJoin([1, 2, 3]));                  -- => 3
+select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));   -- => 8
+```
+
diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
new file mode 100644
index 00000000000..239c5bf6535
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
@@ -0,0 +1,42 @@
+#include <AggregateFunctions/AggregateFunctionDeltaSum.h>
+
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+#include <AggregateFunctions/Helpers.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+namespace
+{
+
+AggregateFunctionPtr createAggregateFunctionDeltaSum(
+    const String & name,
+    const DataTypes & arguments,
+    const Array & params)
+{
+    assertNoParameters(name, params);
+
+    if (arguments.size() != 1)
+        throw Exception("Incorrect number of arguments for aggregate function " + name,
+            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+    return AggregateFunctionPtr(createWithNumericType<AggregationFunctionDeltaSum>(*arguments[0], arguments, params));
+}
+
+}
+
+void registerAggregateFunctionDeltaSum(AggregateFunctionFactory & factory)
+{
+    AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true };
+
+    factory.registerFunction("deltaSum", { createAggregateFunctionDeltaSum, properties });
+}
+
+}
diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
new file mode 100644
index 00000000000..95f97d079fd
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
@@ -0,0 +1,95 @@
+#pragma once
+
+#include <experimental/type_traits>
+#include <type_traits>
+
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <Columns/ColumnVector.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
+
+namespace DB
+{
+
+template <typename T>
+struct AggregationFunctionDeltaSumData
+{
+    T sum{};
+    T last{};
+    T first{};
+};
+
+template <typename T>
+class AggregationFunctionDeltaSum final : public IAggregateFunctionDataHelper<
+    AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>
+{
+public:
+    AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params)
+        : IAggregateFunctionDataHelper<
+          AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>> {arguments, params}
+    {
+        // empty constructor
+    }
+
+    String getName() const override 
+    {
+        return "deltaSum";
+    }
+
+    DataTypePtr getReturnType() const override
+    {
+        return std::make_shared<DataTypeNumber<T>>();
+    }
+
+    void ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        const T & value = (*columns[0])[row_num].get<T>();
+
+        if (this->data(place).last < value) {
+            this->data(place).sum += (value - this->data(place).last);
+        }
+
+        this->data(place).last = value;
+
+        if (this->data(place).first == 0) {
+            this->data(place).first = value;
+        }
+    }
+
+    void ALWAYS_INLINE merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        if (this->data(place).last < this->data(rhs).first) {
+            this->data(place).sum += this->data(rhs).sum + (this->data(rhs).first - this->data(place).last);
+        } else {
+            this->data(place).sum += this->data(rhs).sum;
+        }
+
+        this->data(place).last = this->data(rhs).last;
+    }
+
+        void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    {
+        writeIntBinary(this->data(place).sum, buf);
+        writeIntBinary(this->data(place).first, buf);
+        writeIntBinary(this->data(place).last, buf);
+    }
+
+    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    {
+        readIntBinary(this->data(place).sum, buf);
+        readIntBinary(this->data(place).first, buf);
+        readIntBinary(this->data(place).last, buf);
+    }
+
+    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    {
+        assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).sum);
+    }
+};
+
+}
diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp
index d8e4eb7ba98..1900d5d46c6 100644
--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@@ -11,6 +11,7 @@ class AggregateFunctionFactory;
 void registerAggregateFunctionAvg(AggregateFunctionFactory &);
 void registerAggregateFunctionAvgWeighted(AggregateFunctionFactory &);
 void registerAggregateFunctionCount(AggregateFunctionFactory &);
+void registerAggregateFunctionDeltaSum(AggregateFunctionFactory &);
 void registerAggregateFunctionGroupArray(AggregateFunctionFactory &);
 void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
 void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
@@ -66,6 +67,7 @@ void registerAggregateFunctions()
         registerAggregateFunctionAvg(factory);
         registerAggregateFunctionAvgWeighted(factory);
         registerAggregateFunctionCount(factory);
+        registerAggregateFunctionDeltaSum(factory);
         registerAggregateFunctionGroupArray(factory);
         registerAggregateFunctionGroupUniqArray(factory);
         registerAggregateFunctionGroupArrayInsertAt(factory);
diff --git a/src/AggregateFunctions/ya.make b/src/AggregateFunctions/ya.make
index f2105688feb..3a8f0ad9fba 100644
--- a/src/AggregateFunctions/ya.make
+++ b/src/AggregateFunctions/ya.make
@@ -19,6 +19,7 @@ SRCS(
     AggregateFunctionCategoricalInformationValue.cpp
     AggregateFunctionCombinatorFactory.cpp
     AggregateFunctionCount.cpp
+    AggregateFunctionDeltaSum.cpp
     AggregateFunctionDistinct.cpp
     AggregateFunctionEntropy.cpp
     AggregateFunctionFactory.cpp
diff --git a/tests/queries/0_stateless/01700_deltasum.reference b/tests/queries/0_stateless/01700_deltasum.reference
new file mode 100644
index 00000000000..2995ee8627d
--- /dev/null
+++ b/tests/queries/0_stateless/01700_deltasum.reference
@@ -0,0 +1,5 @@
+3
+7
+8
+8
+8
diff --git a/tests/queries/0_stateless/01700_deltasum.sql b/tests/queries/0_stateless/01700_deltasum.sql
new file mode 100644
index 00000000000..6a818ce02d8
--- /dev/null
+++ b/tests/queries/0_stateless/01700_deltasum.sql
@@ -0,0 +1,5 @@
+select deltaSum(arrayJoin([1, 2, 3]));
+select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4]));
+select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));
+select deltaSum(arrayJoin([1, 2, 3, 0, 3, 3, 3, 3, 3, 4, 2, 3]));
+select deltaSum(arrayJoin([1, 2, 3, 0, 0, 0, 0, 3, 3, 3, 3, 3, 4, 2, 3]));

From b36d20ef6e5b24edf32bc74e59e91e6e84b6cbf5 Mon Sep 17 00:00:00 2001
From: Russ Frank <rf@oden.io>
Date: Thu, 4 Feb 2021 00:11:43 -0500
Subject: [PATCH 0616/1238] record seen first and last

---
 .../AggregateFunctionDeltaSum.h               | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
index 95f97d079fd..e83ff6ea140 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
@@ -19,9 +19,11 @@ namespace DB
 template <typename T>
 struct AggregationFunctionDeltaSumData
 {
-    T sum{};
-    T last{};
-    T first{};
+    T sum = 0;
+    bool seen_last = false;
+    T last;
+    bool seen_first = false;
+    T first;
 };
 
 template <typename T>
@@ -50,26 +52,29 @@ public:
     {
         const T & value = (*columns[0])[row_num].get<T>();
 
-        if (this->data(place).last < value) {
+        if (this->data(place).last < value && this->data(place).seen_last) {
             this->data(place).sum += (value - this->data(place).last);
         }
 
         this->data(place).last = value;
+        this->data(place).seen_last = true;
 
-        if (this->data(place).first == 0) {
+        if (this->data(place).seen_first == false) {
             this->data(place).first = value;
+            this->data(place).seen_first = true;
         }
     }
 
     void ALWAYS_INLINE merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
     {
-        if (this->data(place).last < this->data(rhs).first) {
+        if ((this->data(place).last < this->data(rhs).first) && this->data(place).seen_last && this->data(place).seen_first) {
             this->data(place).sum += this->data(rhs).sum + (this->data(rhs).first - this->data(place).last);
         } else {
             this->data(place).sum += this->data(rhs).sum;
         }
 
         this->data(place).last = this->data(rhs).last;
+        this->data(place).first = this->data(rhs).first;
     }
 
         void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
@@ -77,6 +82,8 @@ public:
         writeIntBinary(this->data(place).sum, buf);
         writeIntBinary(this->data(place).first, buf);
         writeIntBinary(this->data(place).last, buf);
+        writePODBinary<bool>(this->data(place).seen_first, buf);
+        writePODBinary<bool>(this->data(place).seen_last, buf);
     }
 
     void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
@@ -84,6 +91,8 @@ public:
         readIntBinary(this->data(place).sum, buf);
         readIntBinary(this->data(place).first, buf);
         readIntBinary(this->data(place).last, buf);
+        readPODBinary<bool>(this->data(place).seen_first, buf);
+        readPODBinary<bool>(this->data(place).seen_last, buf);
     }
 
     void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override

From 8a931799be4d2e8570ef6933bfcb7921f5f4b24d Mon Sep 17 00:00:00 2001
From: Russ Frank <rf@oden.io>
Date: Thu, 4 Feb 2021 00:14:15 -0500
Subject: [PATCH 0617/1238] fix tests

---
 tests/queries/0_stateless/01700_deltasum.reference | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/01700_deltasum.reference b/tests/queries/0_stateless/01700_deltasum.reference
index 2995ee8627d..27d687d57d0 100644
--- a/tests/queries/0_stateless/01700_deltasum.reference
+++ b/tests/queries/0_stateless/01700_deltasum.reference
@@ -1,5 +1,5 @@
-3
+2
+6
+7
+7
 7
-8
-8
-8

From 75ab05f80a95db9190e5ac9cfb3304dfa51d2116 Mon Sep 17 00:00:00 2001
From: Russ Frank <rf@oden.io>
Date: Thu, 4 Feb 2021 00:19:24 -0500
Subject: [PATCH 0618/1238] correct conditional

---
 src/AggregateFunctions/AggregateFunctionDeltaSum.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
index e83ff6ea140..c98d939cc6b 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
@@ -67,7 +67,7 @@ public:
 
     void ALWAYS_INLINE merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
     {
-        if ((this->data(place).last < this->data(rhs).first) && this->data(place).seen_last && this->data(place).seen_first) {
+        if ((this->data(place).last < this->data(rhs).first) && this->data(place).seen_last && this->data(rhs).seen_first) {
             this->data(place).sum += this->data(rhs).sum + (this->data(rhs).first - this->data(place).last);
         } else {
             this->data(place).sum += this->data(rhs).sum;

From 474f151707ef13cc80bc38ed756729db36cb8a51 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 4 Feb 2021 08:26:22 +0300
Subject: [PATCH 0619/1238] More docs for MergeTreeSettings

---
 src/Storages/MergeTree/MergeTreeSettings.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 713bfffde05..53388617a07 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -16,6 +16,10 @@ class ASTStorage;
 struct Settings;
 
 
+/** These settings represent fine tunes for internal details of MergeTree storages
+  * and should not be changed by the user without a reason.
+  */
+
 #define LIST_OF_MERGE_TREE_SETTINGS(M) \
     M(UInt64, min_compress_block_size, 0, "When granule is written, compress the data in buffer if the size of pending uncompressed data is larger or equal than the specified threshold. If this setting is not set, the corresponding global setting is used.", 0) \
     M(UInt64, max_compress_block_size, 0, "Compress the pending uncompressed data in buffer if its size is larger or equal than the specified threshold. Block of data will be compressed even if the current granule is not finished. If this setting is not set, the corresponding global setting is used.", 0) \
@@ -40,7 +44,7 @@ struct Settings;
     M(UInt64, number_of_free_entries_in_pool_to_execute_mutation, 10, "When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \
     M(UInt64, max_number_of_merges_with_ttl_in_pool, 2, "When there is more than specified number of merges with TTL entries in pool, do not assign new merge with TTL. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \
     M(Seconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.", 0) \
-    M(Seconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories.", 0) \
+    M(Seconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories. You should not lower this value because merges and mutations may not be able to work with low value of this setting.", 0) \
     M(Seconds, lock_acquire_timeout_for_background_operations, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "For background operations like merges, mutations etc. How many seconds before failing to acquire table locks.", 0) \
     M(UInt64, min_rows_to_fsync_after_merge, 0, "Minimal number of rows to do fsync for part after merge (0 - disabled)", 0) \
     M(UInt64, min_compressed_bytes_to_fsync_after_merge, 0, "Minimal number of compressed bytes to do fsync for part after merge (0 - disabled)", 0) \

From f47ea31ef1264b1799b991175bbf75b0299353cc Mon Sep 17 00:00:00 2001
From: Russ Frank <rf@oden.io>
Date: Thu, 4 Feb 2021 00:26:23 -0500
Subject: [PATCH 0620/1238] clang-format

---
 .../AggregateFunctionDeltaSum.h               | 41 +++++++++----------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
index c98d939cc6b..4227a0dd6c3 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
@@ -1,21 +1,20 @@
 #pragma once
 
-#include <experimental/type_traits>
 #include <type_traits>
+#include <experimental/type_traits>
 
-#include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
 
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypesDecimal.h>
 #include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypesNumber.h>
 
 #include <AggregateFunctions/IAggregateFunction.h>
 
 
 namespace DB
 {
-
 template <typename T>
 struct AggregationFunctionDeltaSumData
 {
@@ -27,39 +26,34 @@ struct AggregationFunctionDeltaSumData
 };
 
 template <typename T>
-class AggregationFunctionDeltaSum final : public IAggregateFunctionDataHelper<
-    AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>
+class AggregationFunctionDeltaSum final
+    : public IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>
 {
 public:
     AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params)
-        : IAggregateFunctionDataHelper<
-          AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>> {arguments, params}
+        : IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{arguments, params}
     {
         // empty constructor
     }
 
-    String getName() const override 
-    {
-        return "deltaSum";
-    }
+    String getName() const override { return "deltaSum"; }
 
-    DataTypePtr getReturnType() const override
-    {
-        return std::make_shared<DataTypeNumber<T>>();
-    }
+    DataTypePtr getReturnType() const override { return std::make_shared<DataTypeNumber<T>>(); }
 
     void ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         const T & value = (*columns[0])[row_num].get<T>();
 
-        if (this->data(place).last < value && this->data(place).seen_last) {
+        if (this->data(place).last < value && this->data(place).seen_last)
+        {
             this->data(place).sum += (value - this->data(place).last);
         }
 
         this->data(place).last = value;
         this->data(place).seen_last = true;
 
-        if (this->data(place).seen_first == false) {
+        if (this->data(place).seen_first == false)
+        {
             this->data(place).first = value;
             this->data(place).seen_first = true;
         }
@@ -67,9 +61,12 @@ public:
 
     void ALWAYS_INLINE merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
     {
-        if ((this->data(place).last < this->data(rhs).first) && this->data(place).seen_last && this->data(rhs).seen_first) {
+        if ((this->data(place).last < this->data(rhs).first) && this->data(place).seen_last && this->data(rhs).seen_first)
+        {
             this->data(place).sum += this->data(rhs).sum + (this->data(rhs).first - this->data(place).last);
-        } else {
+        }
+        else
+        {
             this->data(place).sum += this->data(rhs).sum;
         }
 
@@ -77,7 +74,7 @@ public:
         this->data(place).first = this->data(rhs).first;
     }
 
-        void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
     {
         writeIntBinary(this->data(place).sum, buf);
         writeIntBinary(this->data(place).first, buf);

From a403047581d33cea09b63851c4d477c21b48aeb6 Mon Sep 17 00:00:00 2001
From: Russ Frank <rf@oden.io>
Date: Thu, 4 Feb 2021 00:50:35 -0500
Subject: [PATCH 0621/1238] correct docs

---
 .../sql-reference/aggregate-functions/reference/deltasum.md   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md
index 1a21564d5b7..59af4e44120 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md
@@ -13,7 +13,7 @@ Adds the differences between consecutive rows. If the difference is negative, it
 Example:
 
 ```sql
-select deltaSum(arrayJoin([1, 2, 3]));                  -- => 3
-select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));   -- => 8
+select deltaSum(arrayJoin([1, 2, 3]));                  -- => 2
+select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));   -- => 7
 ```
 

From 227698ea1cd67974f1594f2596fe17c3e92b23be Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 4 Feb 2021 09:21:36 +0300
Subject: [PATCH 0622/1238] Update adopters.md

---
 docs/en/introduction/adopters.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md
index 707a05b63e5..ca16119f460 100644
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@@ -46,7 +46,7 @@ toc_title: Adopters
 | <a href="https://www.exness.com" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
 | <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
 | <a href="https://www.flipkart.com/" class="favicon">Flipkart</a> | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) |
-| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
+| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | 14 bn records/day as of Jan 2021 | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
 | <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
 | <a href="https://www.genotek.ru/" class="favicon">Genotek</a> | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) |
 | <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |

From d6372bd3d91408140337bd77b4b22e9c133eb7ce Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 4 Feb 2021 09:38:42 +0300
Subject: [PATCH 0623/1238] linker woes

---
 src/Processors/Transforms/WindowTransform.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index b7b0c72eb94..c5c4432e886 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -22,8 +22,6 @@ WindowTransform::WindowTransform(const Block & input_header_,
     , input_header(input_header_)
     , window_description(window_description_)
 {
-    window_description.frame.checkValid();
-
     workspaces.reserve(functions.size());
     for (const auto & f : functions)
     {

From b281d39036b8c7098bb6b98e217ee331278c428f Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 4 Feb 2021 09:40:09 +0300
Subject: [PATCH 0624/1238] yamake

---
 src/Interpreters/ya.make | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make
index 1cadc447e59..6a155749ddf 100644
--- a/src/Interpreters/ya.make
+++ b/src/Interpreters/ya.make
@@ -145,6 +145,7 @@ SRCS(
     TranslateQualifiedNamesVisitor.cpp
     TreeOptimizer.cpp
     TreeRewriter.cpp
+    WindowDescription.cpp
     addMissingDefaults.cpp
     addTypeConversionToAST.cpp
     castColumn.cpp

From 773b364fe43b39df53e8ffad220c301d379446b2 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 4 Feb 2021 09:49:11 +0300
Subject: [PATCH 0625/1238] check some bounds

---
 src/Parsers/ExpressionElementParsers.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index c129c312d11..2434f7dbc42 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -568,6 +568,15 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
             }
             node->frame.begin_offset = value.get<Int64>();
             node->frame.begin_type = WindowFrame::BoundaryType::Offset;
+            // We can easily get a UINT64_MAX here, which doesn't even fit into
+            // int64_t. Not sure what checks we are going to need here after we
+            // support floats and dates.
+            if (node->frame.begin_offset > INT_MAX || node->frame.begin_offset < INT_MIN)
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                    "Frame offset must be between {} and {}, but {} is given",
+                    INT_MAX, INT_MIN, node->frame.begin_offset);
+            }
         }
         else
         {
@@ -622,6 +631,13 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
                 }
                 node->frame.end_offset = value.get<Int64>();
                 node->frame.end_type = WindowFrame::BoundaryType::Offset;
+
+                if (node->frame.end_offset > INT_MAX || node->frame.end_offset < INT_MIN)
+                {
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Frame offset must be between {} and {}, but {} is given",
+                        INT_MAX, INT_MIN, node->frame.end_offset);
+                }
             }
             else
             {

From 1b6262f874bfa26870bf404e2149698016ef75d3 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 4 Feb 2021 09:51:46 +0300
Subject: [PATCH 0626/1238] cleanup

---
 src/Parsers/ExpressionElementParsers.cpp      | 2 +-
 src/Processors/Transforms/WindowTransform.cpp | 7 ++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 2434f7dbc42..3f4403bc264 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -585,7 +585,7 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
 
         if (keyword_preceding.ignore(pos, expected))
         {
-            node->frame.begin_offset = - node->frame.begin_offset;
+            node->frame.begin_offset = -node->frame.begin_offset;
         }
         else if (keyword_following.ignore(pos, expected))
         {
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 634c588beb0..775a9e23191 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -434,18 +434,15 @@ void WindowTransform::advanceFrameEndCurrentRow()
 
 //    fmt::print(stderr, "first row {} last {}\n", frame_end.row, rows_end);
 
-    // We could retreat the frame_end here, but for some reason I am reluctant
-    // to do this... It would have better data locality.
-    auto reference = current_row;
+    // Advance frame_end while it is still peers with the current row.
     for (; frame_end.row < rows_end; ++frame_end.row)
     {
-        if (!arePeers(reference, frame_end))
+        if (!arePeers(current_row, frame_end))
         {
 //            fmt::print(stderr, "{} and {} don't match\n", reference, frame_end);
             frame_ended = true;
             return;
         }
-        reference = frame_end;
     }
 
     // Might have gotten to the end of the current block, have to properly

From ab30c95b4112f84755aecb449501872cd63ecba4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 4 Feb 2021 10:35:30 +0300
Subject: [PATCH 0627/1238] Adjust perf test

---
 tests/performance/reinterpret_as.xml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/performance/reinterpret_as.xml b/tests/performance/reinterpret_as.xml
index 50cf0cb2278..6e1a0385319 100644
--- a/tests/performance/reinterpret_as.xml
+++ b/tests/performance/reinterpret_as.xml
@@ -230,11 +230,11 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(20000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
-    
+
     <query>
          SELECT
             reinterpretAsFixedString(a),
@@ -249,7 +249,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>

From 15a529f6f76a6c66f3df46fef274ebc4fa5297dc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 4 Feb 2021 10:35:49 +0300
Subject: [PATCH 0628/1238] Whitespace

---
 src/AggregateFunctions/AggregateFunctionWindowFunnel.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
index de8f0f1e2e9..c765024507e 100644
--- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
+++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
@@ -149,7 +149,6 @@ private:
     UInt8 strict_order; // When the 'strict_order' is set, it doesn't allow interventions of other events.
                         // In the case of 'A->B->D->C', it stops finding 'A->B->C' at the 'D' and the max event level is 2.
 
-
     // Loop through the entire events_list, update the event timestamp value
     // The level path must be 1---2---3---...---check_events_size, find the max event level that satisfied the path in the sliding window.
     // If found, returns the max event level, else return 0.

From c1c71fc8e9bb23b48fac327c52b1c1ce77400e2a Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 4 Feb 2021 10:41:09 +0300
Subject: [PATCH 0629/1238] ROWS OFFSET frame end

---
 src/Interpreters/WindowDescription.cpp        | 15 +++
 src/Processors/Transforms/WindowTransform.cpp | 59 ++++++++++--
 src/Processors/Transforms/WindowTransform.h   |  3 +-
 .../01591_window_functions.reference          | 95 +++++++++++++++++++
 .../0_stateless/01591_window_functions.sql    | 16 ++++
 5 files changed, 181 insertions(+), 7 deletions(-)

diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp
index 12f4bdd4124..6e72f056b16 100644
--- a/src/Interpreters/WindowDescription.cpp
+++ b/src/Interpreters/WindowDescription.cpp
@@ -109,6 +109,21 @@ void WindowFrame::checkValid() const
         return;
     }
 
+    if (end_type == BoundaryType::Offset
+        && begin_type == BoundaryType::Offset)
+    {
+        if (type == FrameType::Rows)
+        {
+            if (end_offset >= begin_offset)
+            {
+                return;
+            }
+        }
+
+        // For RANGE and GROUPS, we must check that end follows begin if sorted
+        // according to ORDER BY (we don't support them yet).
+    }
+
     throw Exception(ErrorCodes::BAD_ARGUMENTS,
         "Window frame '{}' is invalid",
         toString());
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 2eaabaf1dc5..474d1a3c452 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -271,16 +271,22 @@ void WindowTransform::advanceFrameStartRowsOffset()
         return;
     }
 
-    assert(frame_start <= partition_end);
-    if (frame_start == partition_end && partition_ended)
+    if (partition_end <= frame_start)
     {
         // A FOLLOWING frame start ran into the end of partition.
-        frame_started = true;
+        frame_start = partition_end;
+        frame_started = partition_ended;
         return;
     }
 
+    // Handled the equality case above. Now the frame start is inside the
+    // partition, if we walked all the offset, it's final.
     assert(partition_start < frame_start);
     frame_started = offset_left == 0;
+
+    // If we ran into the start of data (offset left is negative), we won't be
+    // able to make progress. Should have handled this case above.
+    assert(offset_left >= 0);
 }
 
 void WindowTransform::advanceFrameStartChoose()
@@ -463,6 +469,39 @@ void WindowTransform::advanceFrameEndUnbounded()
     frame_ended = partition_ended;
 }
 
+void WindowTransform::advanceFrameEndRowsOffset()
+{
+    // Walk the specified offset from the current row. The "+1" is needed
+    // because the frame_end is a past-the-end pointer.
+    const auto [moved_row, offset_left] = moveRowNumber(current_row,
+        window_description.frame.end_offset + 1);
+
+    if (partition_end <= moved_row)
+    {
+        // Clamp to the end of partition. It might not have ended yet, in which
+        // case wait for more data.
+        frame_end = partition_end;
+        frame_ended = partition_ended;
+        return;
+    }
+
+    if (moved_row <= partition_start)
+    {
+        // Clamp to the start of partition.
+        frame_end = partition_start;
+        frame_ended = true;
+        return;
+    }
+
+    // Frame end inside partition, if we walked all the offset, it's final.
+    frame_end = moved_row;
+    frame_ended = offset_left == 0;
+
+    // If we ran into the start of data (offset left is negative), we won't be
+    // able to make progress. Should have handled this case above.
+    assert(offset_left >= 0);
+}
+
 void WindowTransform::advanceFrameEnd()
 {
     // No reason for this function to be called again after it succeeded.
@@ -479,9 +518,17 @@ void WindowTransform::advanceFrameEnd()
             advanceFrameEndUnbounded();
             break;
         case WindowFrame::BoundaryType::Offset:
-            throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-                "The frame end type '{}' is not implemented",
-                WindowFrame::toString(window_description.frame.end_type));
+            switch (window_description.frame.type)
+            {
+                case WindowFrame::FrameType::Rows:
+                    advanceFrameEndRowsOffset();
+                    break;
+                default:
+                    throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "The frame end type '{}' is not implemented",
+                        WindowFrame::toString(window_description.frame.end_type));
+            }
+            break;
     }
 
 //    fmt::print(stderr, "frame_end {} -> {}\n", frame_end_before, frame_end);
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index c5e1c8b3653..bb1a9aefd64 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -109,9 +109,10 @@ private:
     void advanceFrameStart();
     void advanceFrameStartChoose();
     void advanceFrameStartRowsOffset();
-    void advanceFrameEnd();
     void advanceFrameEndCurrentRow();
     void advanceFrameEndUnbounded();
+    void advanceFrameEndRowsOffset();
+    void advanceFrameEnd();
     bool arePeers(const RowNumber & x, const RowNumber & y) const;
     void updateAggregationState();
     void writeOutCurrentRow();
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 2e9c659e0af..1993c59fc8b 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -558,11 +558,106 @@ settings max_block_size = 2;
 28	5	3	2	1
 29	5	2	1	0
 30	6	1	1	0
+-- ROWS offset frame start and end
+select number, p,
+    count(*) over (partition by p order by number
+        rows between 2 preceding and 2 following)
+from (select number, intDiv(number, 7) p from numbers(71))
+order by p, number
+settings max_block_size = 2;
+0	0	3
+1	0	4
+2	0	5
+3	0	5
+4	0	5
+5	0	4
+6	0	3
+7	1	3
+8	1	4
+9	1	5
+10	1	5
+11	1	5
+12	1	4
+13	1	3
+14	2	3
+15	2	4
+16	2	5
+17	2	5
+18	2	5
+19	2	4
+20	2	3
+21	3	3
+22	3	4
+23	3	5
+24	3	5
+25	3	5
+26	3	4
+27	3	3
+28	4	3
+29	4	4
+30	4	5
+31	4	5
+32	4	5
+33	4	4
+34	4	3
+35	5	3
+36	5	4
+37	5	5
+38	5	5
+39	5	5
+40	5	4
+41	5	3
+42	6	3
+43	6	4
+44	6	5
+45	6	5
+46	6	5
+47	6	4
+48	6	3
+49	7	3
+50	7	4
+51	7	5
+52	7	5
+53	7	5
+54	7	4
+55	7	3
+56	8	3
+57	8	4
+58	8	5
+59	8	5
+60	8	5
+61	8	4
+62	8	3
+63	9	3
+64	9	4
+65	9	5
+66	9	5
+67	9	5
+68	9	4
+69	9	3
+70	10	1
 SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4);
 1
 2
 3
 3
+-- frame boundaries that runs into the partition end
+select
+    count() over (partition by intDiv(number, 3)
+        rows between 100 following and unbounded following),
+    count() over (partition by intDiv(number, 3)
+        rows between current row and 100 following)
+from numbers(10);
+0	3
+0	2
+0	1
+0	3
+0	2
+0	1
+0	3
+0	2
+0	1
+0	1
 -- seen a use-after-free under MSan in this query once
 SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null;
 -- a corner case
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 6c4190b47d3..400d4832144 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -175,8 +175,24 @@ from (select number, intDiv(number, 5) p from numbers(31))
 order by p, number
 settings max_block_size = 2;
 
+-- ROWS offset frame start and end
+select number, p,
+    count(*) over (partition by p order by number
+        rows between 2 preceding and 2 following)
+from (select number, intDiv(number, 7) p from numbers(71))
+order by p, number
+settings max_block_size = 2;
+
 SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4);
 
+-- frame boundaries that runs into the partition end
+select
+    count() over (partition by intDiv(number, 3)
+        rows between 100 following and unbounded following),
+    count() over (partition by intDiv(number, 3)
+        rows between current row and 100 following)
+from numbers(10);
+
 -- seen a use-after-free under MSan in this query once
 SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null;
 

From 1cc9e03d0cca12bedd0559e30843b1356d29da51 Mon Sep 17 00:00:00 2001
From: kreuzerkrieg <kreuzerkrieg@gmail.com>
Date: Tue, 2 Feb 2021 16:09:43 +0200
Subject: [PATCH 0630/1238] return `DiskType` instead of `String`
 IDisk::getType() as in the rest of storage interfaces

---
 src/Disks/DiskDecorator.h                  |  2 +-
 src/Disks/DiskLocal.h                      |  2 +-
 src/Disks/DiskMemory.h                     |  2 +-
 src/Disks/IDisk.h                          | 25 +++++++++++++++++++++-
 src/Disks/S3/DiskS3.h                      |  2 +-
 src/Storages/System/StorageSystemDisks.cpp |  2 +-
 6 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h
index b50252c2c97..0f66ef63800 100644
--- a/src/Disks/DiskDecorator.h
+++ b/src/Disks/DiskDecorator.h
@@ -48,7 +48,7 @@ public:
     void setReadOnly(const String & path) override;
     void createHardLink(const String & src_path, const String & dst_path) override;
     void truncateFile(const String & path, size_t size) override;
-    const String getType() const override { return delegate->getType(); }
+    DiskType::Type getType() const override { return delegate->getType(); }
     Executor & getExecutor() override;
     SyncGuardPtr getDirectorySyncGuard(const String & path) const override;
 
diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h
index d8d45290986..495e511f07e 100644
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@@ -100,7 +100,7 @@ public:
 
     void truncateFile(const String & path, size_t size) override;
 
-    const String getType() const override { return "local"; }
+    DiskType::Type getType() const override { return DiskType::Type::Local; }
 
     SyncGuardPtr getDirectorySyncGuard(const String & path) const override;
 
diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h
index 3ebc76661d4..1ef38801a6c 100644
--- a/src/Disks/DiskMemory.h
+++ b/src/Disks/DiskMemory.h
@@ -91,7 +91,7 @@ public:
 
     void truncateFile(const String & path, size_t size) override;
 
-    const String getType() const override { return "memory"; }
+    DiskType::Type getType() const override { return DiskType::Type::RAM; }
 
 private:
     void createDirectoriesImpl(const String & path);
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index f41490a0807..3bbe553ba59 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -57,6 +57,29 @@ public:
 
 using SpacePtr = std::shared_ptr<Space>;
 
+struct DiskType
+{
+    enum class Type
+    {
+        Local,
+        RAM,
+        S3
+    };
+    static String toString(Type disk_type)
+    {
+        switch (disk_type)
+        {
+            case Type::Local:
+                return "local";
+            case Type::RAM:
+                return "memory";
+            case Type::S3:
+                return "s3";
+        }
+        __builtin_unreachable();
+    }
+};
+
 /**
  * A guard, that should synchronize file's or directory's state
  * with storage device (e.g. fsync in POSIX) in its destructor.
@@ -191,7 +214,7 @@ public:
     virtual void truncateFile(const String & path, size_t size);
 
     /// Return disk type - "local", "s3", etc.
-    virtual const String getType() const = 0;
+    virtual DiskType::Type getType() const = 0;
 
     /// Invoked when Global Context is shutdown.
     virtual void shutdown() { }
diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h
index 3dbd9029fb2..4447d49b0ed 100644
--- a/src/Disks/S3/DiskS3.h
+++ b/src/Disks/S3/DiskS3.h
@@ -105,7 +105,7 @@ public:
 
     void setReadOnly(const String & path) override;
 
-    const String getType() const override { return "s3"; }
+    DiskType::Type getType() const override { return DiskType::Type::S3; }
 
     void shutdown() override;
 
diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp
index fbbee51e34e..b04d24cc705 100644
--- a/src/Storages/System/StorageSystemDisks.cpp
+++ b/src/Storages/System/StorageSystemDisks.cpp
@@ -51,7 +51,7 @@ Pipe StorageSystemDisks::read(
         col_free->insert(disk_ptr->getAvailableSpace());
         col_total->insert(disk_ptr->getTotalSpace());
         col_keep->insert(disk_ptr->getKeepingFreeSpace());
-        col_type->insert(disk_ptr->getType());
+        col_type->insert(DiskType::toString(disk_ptr->getType()));
     }
 
     Columns res_columns;

From e7a83868dd16b279f6736a827eb4519fce7b0fb1 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 4 Feb 2021 11:28:11 +0300
Subject: [PATCH 0631/1238] Fix build

---
 src/Coordination/NuKeeperServer.cpp              |  4 ++--
 src/Coordination/NuKeeperServer.h                |  2 +-
 src/Coordination/NuKeeperStateMachine.cpp        |  6 +++---
 src/Coordination/NuKeeperStorage.cpp             |  2 +-
 src/Coordination/NuKeeperStorage.h               |  4 ++--
 src/Coordination/SessionExpiryQueue.cpp          | 12 ++++++------
 src/Coordination/SessionExpiryQueue.h            | 16 ++++++++--------
 src/Coordination/tests/gtest_for_build.cpp       |  4 ++--
 .../test_testkeeper_multinode/test.py            |  5 ++++-
 9 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 335f577beeb..d700956c522 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -214,9 +214,9 @@ NuKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const NuKeeper
     }
 }
 
-int64_t NuKeeperServer::getSessionID(long session_timeout_ms)
+int64_t NuKeeperServer::getSessionID(int64_t session_timeout_ms)
 {
-    auto entry = nuraft::buffer::alloc(sizeof(long));
+    auto entry = nuraft::buffer::alloc(sizeof(int64_t));
     /// Just special session request
     nuraft::buffer_serializer bs(entry);
     bs.put_i64(session_timeout_ms);
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index 962863f591e..32ca61e924f 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -46,7 +46,7 @@ public:
 
     NuKeeperStorage::ResponsesForSessions putRequests(const NuKeeperStorage::RequestsForSessions & requests);
 
-    int64_t getSessionID(long session_timeout_ms);
+    int64_t getSessionID(int64_t session_timeout_ms);
 
     std::unordered_set<int64_t> getDeadSessions();
 
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 8e22da81081..f7b7ba3c567 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -53,11 +53,11 @@ NuKeeperStateMachine::NuKeeperStateMachine(long tick_time)
 
 nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
 {
-    if (data.size() == sizeof(long))
+    if (data.size() == sizeof(int64_t))
     {
         nuraft::buffer_serializer timeout_data(data);
-        long session_timeout_ms = timeout_data.get_i64();
-        auto response = nuraft::buffer::alloc(sizeof(size_t));
+        int64_t session_timeout_ms = timeout_data.get_i64();
+        auto response = nuraft::buffer::alloc(sizeof(int64_t));
         int64_t session_id;
         nuraft::buffer_serializer bs(response);
         {
diff --git a/src/Coordination/NuKeeperStorage.cpp b/src/Coordination/NuKeeperStorage.cpp
index 3b52b47c4bf..45701b63b8b 100644
--- a/src/Coordination/NuKeeperStorage.cpp
+++ b/src/Coordination/NuKeeperStorage.cpp
@@ -67,7 +67,7 @@ static NuKeeperStorage::ResponsesForSessions processWatchesImpl(const String & p
     return result;
 }
 
-NuKeeperStorage::NuKeeperStorage(long tick_time_ms)
+NuKeeperStorage::NuKeeperStorage(int64_t tick_time_ms)
     : session_expiry_queue(tick_time_ms)
 {
     container.emplace("/", Node());
diff --git a/src/Coordination/NuKeeperStorage.h b/src/Coordination/NuKeeperStorage.h
index cf881687dcb..6f709a6f480 100644
--- a/src/Coordination/NuKeeperStorage.h
+++ b/src/Coordination/NuKeeperStorage.h
@@ -76,9 +76,9 @@ public:
     }
 
 public:
-    NuKeeperStorage(long tick_time_ms);
+    NuKeeperStorage(int64_t tick_time_ms);
 
-    int64_t getSessionID(long session_timeout_ms)
+    int64_t getSessionID(int64_t session_timeout_ms)
     {
         auto result = session_id_counter++;
         session_and_timeout.emplace(result, session_timeout_ms);
diff --git a/src/Coordination/SessionExpiryQueue.cpp b/src/Coordination/SessionExpiryQueue.cpp
index 45ceaee52fe..f90cd089be8 100644
--- a/src/Coordination/SessionExpiryQueue.cpp
+++ b/src/Coordination/SessionExpiryQueue.cpp
@@ -18,11 +18,11 @@ bool SessionExpiryQueue::remove(int64_t session_id)
     return false;
 }
 
-bool SessionExpiryQueue::update(int64_t session_id, long timeout_ms)
+bool SessionExpiryQueue::update(int64_t session_id, int64_t timeout_ms)
 {
     auto session_it = session_to_timeout.find(session_id);
-    long now = getNowMilliseconds();
-    long new_expiry_time = roundToNextInterval(now + timeout_ms);
+    int64_t now = getNowMilliseconds();
+    int64_t new_expiry_time = roundToNextInterval(now + timeout_ms);
 
     if (session_it != session_to_timeout.end())
     {
@@ -34,7 +34,7 @@ bool SessionExpiryQueue::update(int64_t session_id, long timeout_ms)
             std::tie(set_it, std::ignore) = expiry_to_sessions.emplace(new_expiry_time, std::unordered_set<int64_t>());
 
         set_it->second.insert(session_id);
-        long prev_expiry_time = session_it->second;
+        int64_t prev_expiry_time = session_it->second;
 
         if (prev_expiry_time != new_expiry_time)
         {
@@ -58,12 +58,12 @@ bool SessionExpiryQueue::update(int64_t session_id, long timeout_ms)
 
 std::unordered_set<int64_t> SessionExpiryQueue::getExpiredSessions()
 {
-    long now = getNowMilliseconds();
+    int64_t now = getNowMilliseconds();
     if (now < next_expiration_time)
         return {};
 
     auto set_it = expiry_to_sessions.find(next_expiration_time);
-    long new_expiration_time = next_expiration_time + expiration_interval;
+    int64_t new_expiration_time = next_expiration_time + expiration_interval;
     next_expiration_time = new_expiration_time;
     if (set_it != expiry_to_sessions.end())
     {
diff --git a/src/Coordination/SessionExpiryQueue.h b/src/Coordination/SessionExpiryQueue.h
index 4fb254526e7..3b4ad6dde88 100644
--- a/src/Coordination/SessionExpiryQueue.h
+++ b/src/Coordination/SessionExpiryQueue.h
@@ -9,25 +9,25 @@ namespace DB
 class SessionExpiryQueue
 {
 private:
-    std::unordered_map<int64_t, long> session_to_timeout;
-    std::unordered_map<long, std::unordered_set<int64_t>> expiry_to_sessions;
+    std::unordered_map<int64_t, int64_t> session_to_timeout;
+    std::unordered_map<int64_t, std::unordered_set<int64_t>> expiry_to_sessions;
 
-    long expiration_interval;
-    long next_expiration_time;
+    int64_t expiration_interval;
+    int64_t next_expiration_time;
 
-    static long getNowMilliseconds()
+    static int64_t getNowMilliseconds()
     {
         using namespace std::chrono;
         return duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
     }
 
-    long roundToNextInterval(long time) const
+    int64_t roundToNextInterval(int64_t time) const
     {
         return (time / expiration_interval + 1) * expiration_interval;
     }
 
 public:
-    explicit SessionExpiryQueue(long expiration_interval_)
+    explicit SessionExpiryQueue(int64_t expiration_interval_)
         : expiration_interval(expiration_interval_)
         , next_expiration_time(roundToNextInterval(getNowMilliseconds()))
     {
@@ -35,7 +35,7 @@ public:
 
     bool remove(int64_t session_id);
 
-    bool update(int64_t session_id, long timeout_ms);
+    bool update(int64_t session_id, int64_t timeout_ms);
 
     std::unordered_set<int64_t> getExpiredSessions();
 };
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index d2f4938dfd3..956b12d6e08 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -310,7 +310,7 @@ DB::NuKeeperStorage::ResponsesForSessions getZooKeeperResponses(nuraft::ptr<nura
 
 TEST(CoordinationTest, TestStorageSerialization)
 {
-    DB::NuKeeperStorage storage;
+    DB::NuKeeperStorage storage(500);
     storage.container["/hello"] = DB::NuKeeperStorage::Node{.data="world"};
     storage.container["/hello/somepath"] =  DB::NuKeeperStorage::Node{.data="somedata"};
     storage.session_id_counter = 5;
@@ -324,7 +324,7 @@ TEST(CoordinationTest, TestStorageSerialization)
     std::string serialized = buffer.str();
     EXPECT_NE(serialized.size(), 0);
     DB::ReadBufferFromString read(serialized);
-    DB::NuKeeperStorage new_storage;
+    DB::NuKeeperStorage new_storage(500);
     serializer.deserialize(new_storage, read);
 
     EXPECT_EQ(new_storage.container.size(), 3);
diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index cb457e24435..d815af7a63e 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -42,7 +42,8 @@ def test_simple_replicated_table(started_cluster):
     assert node3.query("SELECT COUNT() FROM t") == "10\n"
 
 
-
+# in extremely rare case it can take more than 5 minutes in debug build with sanitizer
+@pytest.mark.timeout(600)
 def test_blocade_leader(started_cluster):
     for i, node in enumerate([node1, node2, node3]):
         node.query("CREATE TABLE t1 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t1', '{}') ORDER BY tuple()".format(i + 1))
@@ -133,6 +134,8 @@ def test_blocade_leader(started_cluster):
     assert node3.query("SELECT COUNT() FROM t1") == "310\n"
 
 
+# in extremely rare case it can take more than 5 minutes in debug build with sanitizer
+@pytest.mark.timeout(600)
 def test_blocade_leader_twice(started_cluster):
     for i, node in enumerate([node1, node2, node3]):
         node.query("CREATE TABLE t2 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t2', '{}') ORDER BY tuple()".format(i + 1))

From d6d7365cdd230a5035d8b8340abf2944f18c1562 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 4 Feb 2021 12:19:02 +0300
Subject: [PATCH 0632/1238] Update test

---
 tests/queries/0_stateless/01671_merge_join_and_constants.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/01671_merge_join_and_constants.sql b/tests/queries/0_stateless/01671_merge_join_and_constants.sql
index 8611a96a723..c34f8e6705d 100644
--- a/tests/queries/0_stateless/01671_merge_join_and_constants.sql
+++ b/tests/queries/0_stateless/01671_merge_join_and_constants.sql
@@ -11,5 +11,7 @@ set join_algorithm = 'partial_merge';
 
 SELECT * FROM table1 AS t1 ALL LEFT JOIN (SELECT *, '0.10', c, d AS b FROM table2) AS t2 USING (a, b) ORDER BY d ASC FORMAT PrettyCompact settings max_rows_in_join = 1;
 
+SELECT pow('0.0000000257', NULL), pow(pow(NULL, NULL), NULL) - NULL, (val + NULL) = (rval * 0), * FROM (SELECT (val + 256) = (NULL * NULL), toLowCardinality(toNullable(dummy)) AS val FROM system.one) AS s1 ANY LEFT JOIN (SELECT toLowCardinality(dummy) AS rval FROM system.one) AS s2 ON (val + 0) = (rval * 255) settings max_rows_in_join = 1;
+
 DROP TABLE IF EXISTS table1;
 DROP TABLE IF EXISTS table2;

From aead46c1463ed2be80b1bd735dd0d46fa9d0e051 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 4 Feb 2021 12:20:57 +0300
Subject: [PATCH 0633/1238] Update test.

---
 .../queries/0_stateless/01671_merge_join_and_constants.reference | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/01671_merge_join_and_constants.reference b/tests/queries/0_stateless/01671_merge_join_and_constants.reference
index 114fc9ff91f..efd814df893 100644
--- a/tests/queries/0_stateless/01671_merge_join_and_constants.reference
+++ b/tests/queries/0_stateless/01671_merge_join_and_constants.reference
@@ -3,3 +3,4 @@
 │ b │ 2018-01-01 │ B │ 2018-01-01 │ 0.10      │
 │ c │ 2018-01-01 │ C │ 2018-01-01 │ 0.10      │
 └───┴────────────┴───┴────────────┴───────────┘
+\N	\N	\N	\N	0	0

From 81b0cc381da8366fe6356a852c6b372ed078dc6c Mon Sep 17 00:00:00 2001
From: hexiaoting <hewenting_ict@163.com>
Date: Thu, 4 Feb 2021 17:36:09 +0800
Subject: [PATCH 0634/1238] enlarge signed type for modulo function

---
 src/DataTypes/NumberTraits.h                               | 5 ++++-
 tests/queries/0_stateless/01692_mod_enlarge_type.reference | 2 ++
 tests/queries/0_stateless/01692_mod_enlarge_type.sql       | 2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01692_mod_enlarge_type.reference
 create mode 100644 tests/queries/0_stateless/01692_mod_enlarge_type.sql

diff --git a/src/DataTypes/NumberTraits.h b/src/DataTypes/NumberTraits.h
index 3aa00c68274..c68a73eeb17 100644
--- a/src/DataTypes/NumberTraits.h
+++ b/src/DataTypes/NumberTraits.h
@@ -108,7 +108,10 @@ template <typename A, typename B> struct ResultOfIntegerDivision
     */
 template <typename A, typename B> struct ResultOfModulo
 {
-    using Type0 = typename Construct<is_signed_v<A> || is_signed_v<B>, false, sizeof(B)>::Type;
+    using Type0 = typename Construct<
+        is_signed_v<A> || is_signed_v<B>,
+	false,
+        (is_signed_v<A> || is_signed_v<B>) ? std::max(sizeof(A), sizeof(B)) : sizeof(B)>::Type;
     using Type = std::conditional_t<std::is_floating_point_v<A> || std::is_floating_point_v<B>, Float64, Type0>;
 };
 
diff --git a/tests/queries/0_stateless/01692_mod_enlarge_type.reference b/tests/queries/0_stateless/01692_mod_enlarge_type.reference
new file mode 100644
index 00000000000..6d962821ad6
--- /dev/null
+++ b/tests/queries/0_stateless/01692_mod_enlarge_type.reference
@@ -0,0 +1,2 @@
+-199
+146	Int32
diff --git a/tests/queries/0_stateless/01692_mod_enlarge_type.sql b/tests/queries/0_stateless/01692_mod_enlarge_type.sql
new file mode 100644
index 00000000000..1a97d084a4f
--- /dev/null
+++ b/tests/queries/0_stateless/01692_mod_enlarge_type.sql
@@ -0,0 +1,2 @@
+SELECT toInt32(-199) % 200;
+select toInt32(441746) % 150 as a , toTypeName(a);

From 3ddca91a5b5606b7f5b735c6caa8ec96a18cf5fc Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 4 Feb 2021 12:39:07 +0300
Subject: [PATCH 0635/1238] Fix build one more time

---
 src/Coordination/NuKeeperStateMachine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index 380588a39f0..bfb67f10a67 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -10,7 +10,7 @@ namespace DB
 class NuKeeperStateMachine : public nuraft::state_machine
 {
 public:
-    NuKeeperStateMachine(long tick_time);
+    NuKeeperStateMachine(long tick_time = 500);
 
     nuraft::ptr<nuraft::buffer> pre_commit(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; }
 

From 41698d65f556a0b93333520d80a95000519e7cc5 Mon Sep 17 00:00:00 2001
From: hexiaoting <hewenting_ict@163.com>
Date: Thu, 4 Feb 2021 17:50:59 +0800
Subject: [PATCH 0636/1238] fix style error

---
 src/DataTypes/NumberTraits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/DataTypes/NumberTraits.h b/src/DataTypes/NumberTraits.h
index c68a73eeb17..4d534df0b42 100644
--- a/src/DataTypes/NumberTraits.h
+++ b/src/DataTypes/NumberTraits.h
@@ -110,7 +110,7 @@ template <typename A, typename B> struct ResultOfModulo
 {
     using Type0 = typename Construct<
         is_signed_v<A> || is_signed_v<B>,
-	false,
+        false,
         (is_signed_v<A> || is_signed_v<B>) ? std::max(sizeof(A), sizeof(B)) : sizeof(B)>::Type;
     using Type = std::conditional_t<std::is_floating_point_v<A> || std::is_floating_point_v<B>, Float64, Type0>;
 };

From 7a2279d06707ea3a4a99842b10048cc3fd2c31e9 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 4 Feb 2021 14:44:00 +0300
Subject: [PATCH 0637/1238] Fix removing filter column from expression after
 Filter actions split

---
 src/Interpreters/ActionsDAG.cpp               | 49 +++++++++++--------
 src/Interpreters/ActionsDAG.h                 | 15 +++---
 .../QueryPlan/Optimizations/splitFilter.cpp   |  5 +-
 3 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index d8c40ffda2f..becd3f4f4a2 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -454,36 +454,42 @@ bool ActionsDAG::tryRestoreColumn(const std::string & column_name)
     return false;
 }
 
-void ActionsDAG::removeUnusedInput(const std::string & column_name)
+bool ActionsDAG::removeUnusedResult(const std::string & column_name)
 {
+    /// Find column in index and remove.
+    const Node * col;
+    {
+        auto it = index.begin();
+        for (; it != index.end(); ++it)
+            if ((*it)->result_name == column_name)
+                break;
+
+        if (it == index.end())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found result {} in ActionsDAG\n{}", column_name, dumpDAG());
+
+        col = *it;
+        index.remove(it);
+    }
+
+    /// Check if column is in input.
     auto it = inputs.begin();
     for (; it != inputs.end(); ++it)
-        if ((*it)->result_name == column_name)
+        if (*it == col)
             break;
 
     if (it == inputs.end())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found input {} in ActionsDAG\n{}", column_name, dumpDAG());
+        return false;
 
-    auto * input = *it;
+    /// Check column has no dependent.
     for (const auto & node : nodes)
         for (const auto * child : node.children)
-            if (input == child)
-                throw Exception(ErrorCodes::LOGICAL_ERROR,
-                                "Cannot remove input {} because it has dependent nodes in ActionsDAG\n{}",
-                                column_name, dumpDAG());
-
-    for (auto jt = index.begin(); jt != index.end(); ++jt)
-    {
-        if (*jt == input)
-        {
-            index.remove(jt);
-            break;
-        }
-    }
+            if (col == child)
+                return false;
 
+    /// Remove from nodes and inputs.
     for (auto jt = nodes.begin(); jt != nodes.end(); ++jt)
     {
-        if (&(*jt) == input)
+        if (&(*jt) == *it)
         {
             nodes.erase(jt);
             break;
@@ -491,6 +497,7 @@ void ActionsDAG::removeUnusedInput(const std::string & column_name)
     }
 
     inputs.erase(it);
+    return true;
 }
 
 ActionsDAGPtr ActionsDAG::clone() const
@@ -844,7 +851,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
     return std::make_shared<ActionsDAG>(std::move(first));
 }
 
-std::pair<ActionsDAGPtr, ActionsDAGPtr> ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
+ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
 {
     /// Split DAG into two parts.
     /// (first_nodes, first_index) is a part which will have split_list in result.
@@ -1045,7 +1052,7 @@ std::pair<ActionsDAGPtr, ActionsDAGPtr> ActionsDAG::split(std::unordered_set<con
     return {std::move(first_actions), std::move(second_actions)};
 }
 
-std::pair<ActionsDAGPtr, ActionsDAGPtr>  ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const
+ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const
 {
 
     struct Frame
@@ -1113,7 +1120,7 @@ std::pair<ActionsDAGPtr, ActionsDAGPtr>  ActionsDAG::splitActionsBeforeArrayJoin
     return res;
 }
 
-std::pair<ActionsDAGPtr, ActionsDAGPtr> ActionsDAG::splitActionsForFilter(const std::string & column_name) const
+ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & column_name) const
 {
     auto it = index.begin();
     for (; it != index.end(); ++it)
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index b12da30e24f..fa5ae2ac83f 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -214,9 +214,10 @@ public:
 
     /// If column is not in index, try to find it in nodes and insert back into index.
     bool tryRestoreColumn(const std::string & column_name);
-    /// Find column in input. Remove it from input and index.
-    /// Checks that column in inputs and has not dependent nodes.
-    void removeUnusedInput(const std::string & column_name);
+    /// Find column in result. Remove it from index.
+    /// If columns is in inputs and has no dependent nodes, remove it from inputs too.
+    /// Return true if column was removed from inputs.
+    bool removeUnusedResult(const std::string & column_name);
 
     void projectInput() { settings.project_input = true; }
     void removeUnusedActions(const Names & required_names);
@@ -255,18 +256,20 @@ public:
     /// Otherwise, any two actions may be combined.
     static ActionsDAGPtr merge(ActionsDAG && first, ActionsDAG && second);
 
+    using SplitResult = std::pair<ActionsDAGPtr, ActionsDAGPtr>;
+
     /// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children.
     /// Execution of first then second parts on block is equivalent to execution of initial DAG.
     /// First DAG and initial DAG have equal inputs, second DAG and initial DAG has equal index (outputs).
     /// Second DAG inputs may contain less inputs then first DAG (but also include other columns).
-    std::pair<ActionsDAGPtr, ActionsDAGPtr> split(std::unordered_set<const Node *> split_nodes) const;
+    SplitResult split(std::unordered_set<const Node *> split_nodes) const;
 
     /// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN.
-    std::pair<ActionsDAGPtr, ActionsDAGPtr> splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const;
+    SplitResult splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const;
 
     /// Splits actions into two parts. First part has minimal size sufficient for calculation of column_name.
     /// Index of initial actions must contain column_name.
-    std::pair<ActionsDAGPtr, ActionsDAGPtr> splitActionsForFilter(const std::string & column_name) const;
+    SplitResult splitActionsForFilter(const std::string & column_name) const;
 
 private:
     Node & addNode(Node node, bool can_replace = false);
diff --git a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
index 38ba8f25b24..8c212936195 100644
--- a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
+++ b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
@@ -24,8 +24,9 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
     if (split.second->trivial())
         return 0;
 
+    bool remove_filter = false;
     if (filter_step->removesFilterColumn())
-        split.second->removeUnusedInput(filter_step->getFilterColumnName());
+        remove_filter = split.second->removeUnusedResult(filter_step->getFilterColumnName());
 
     auto description = filter_step->getStepDescription();
 
@@ -37,7 +38,7 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
             filter_node.children.at(0)->step->getOutputStream(),
             std::move(split.first),
             filter_step->getFilterColumnName(),
-            filter_step->removesFilterColumn());
+            remove_filter);
 
     node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
 

From e6f1ce48fe50c83a88f8c8feb985c1a0ea6180d3 Mon Sep 17 00:00:00 2001
From: Haavard Kvaalen <havardk@kvaalen.no>
Date: Thu, 4 Feb 2021 11:37:12 +0100
Subject: [PATCH 0638/1238] Fix handling of TABLE_MAP_EVENT from MySQL

The MySQL replication code assumed that row update events would be
preceded by a single TABLE_MAP_EVENT.  However, if a single SQL
statement modifies rows in multiple tables, MySQL will first send
table map events for all involved tables, and then row update events.

Depending on circumstances, this could lead to an exception when the row
update was processed, the update could be incorrectly dropped, or the
update could be applied to the wrong table.
---
 src/Core/MySQL/MySQLReplication.cpp           | 46 +++++++++++++++----
 src/Core/MySQL/MySQLReplication.h             | 36 +++++++++++----
 .../materialize_with_ddl.py                   | 15 ++++++
 .../test_materialize_mysql_database/test.py   |  5 ++
 4 files changed, 86 insertions(+), 16 deletions(-)

diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp
index b86d6447e26..8e1e0cd7646 100644
--- a/src/Core/MySQL/MySQLReplication.cpp
+++ b/src/Core/MySQL/MySQLReplication.cpp
@@ -136,6 +136,7 @@ namespace MySQLReplication
         out << "XID: " << this->xid << '\n';
     }
 
+    /// https://dev.mysql.com/doc/internals/en/table-map-event.html
     void TableMapEvent::parseImpl(ReadBuffer & payload)
     {
         payload.readStrict(reinterpret_cast<char *>(&table_id), 6);
@@ -257,15 +258,19 @@ namespace MySQLReplication
         out << "Null Bitmap: " << bitmap_str << '\n';
     }
 
-    void RowsEvent::parseImpl(ReadBuffer & payload)
+    void RowsEventHeader::parse(ReadBuffer & payload)
     {
         payload.readStrict(reinterpret_cast<char *>(&table_id), 6);
         payload.readStrict(reinterpret_cast<char *>(&flags), 2);
 
+        UInt16 extra_data_len;
         /// This extra_data_len contains the 2 bytes length.
         payload.readStrict(reinterpret_cast<char *>(&extra_data_len), 2);
         payload.ignore(extra_data_len - 2);
+    }
 
+    void RowsEvent::parseImpl(ReadBuffer & payload)
+    {
         number_columns = readLengthEncodedNumber(payload);
         size_t columns_bitmap_size = (number_columns + 7) / 8;
         switch (header.type)
@@ -795,37 +800,50 @@ namespace MySQLReplication
             {
                 event = std::make_shared<TableMapEvent>(std::move(event_header));
                 event->parseEvent(event_payload);
-                table_map = std::static_pointer_cast<TableMapEvent>(event);
+                auto table_map = std::static_pointer_cast<TableMapEvent>(event);
+                table_maps[table_map->table_id] = table_map;
                 break;
             }
             case WRITE_ROWS_EVENT_V1:
             case WRITE_ROWS_EVENT_V2: {
-                if (doReplicate())
-                    event = std::make_shared<WriteRowsEvent>(table_map, std::move(event_header));
+                RowsEventHeader rows_header(event_header.type);
+                rows_header.parse(event_payload);
+                if (doReplicate(rows_header.table_id))
+                    event = std::make_shared<WriteRowsEvent>(table_maps.at(rows_header.table_id), std::move(event_header), rows_header);
                 else
                     event = std::make_shared<DryRunEvent>(std::move(event_header));
 
                 event->parseEvent(event_payload);
+                if (rows_header.flags & ROWS_END_OF_STATEMENT)
+                    table_maps.clear();
                 break;
             }
             case DELETE_ROWS_EVENT_V1:
             case DELETE_ROWS_EVENT_V2: {
-                if (doReplicate())
-                    event = std::make_shared<DeleteRowsEvent>(table_map, std::move(event_header));
+                RowsEventHeader rows_header(event_header.type);
+                rows_header.parse(event_payload);
+                if (doReplicate(rows_header.table_id))
+                    event = std::make_shared<DeleteRowsEvent>(table_maps.at(rows_header.table_id), std::move(event_header), rows_header);
                 else
                     event = std::make_shared<DryRunEvent>(std::move(event_header));
 
                 event->parseEvent(event_payload);
+                if (rows_header.flags & ROWS_END_OF_STATEMENT)
+                    table_maps.clear();
                 break;
             }
             case UPDATE_ROWS_EVENT_V1:
             case UPDATE_ROWS_EVENT_V2: {
-                if (doReplicate())
-                    event = std::make_shared<UpdateRowsEvent>(table_map, std::move(event_header));
+                RowsEventHeader rows_header(event_header.type);
+                rows_header.parse(event_payload);
+                if (doReplicate(rows_header.table_id))
+                    event = std::make_shared<UpdateRowsEvent>(table_maps.at(rows_header.table_id), std::move(event_header), rows_header);
                 else
                     event = std::make_shared<DryRunEvent>(std::move(event_header));
 
                 event->parseEvent(event_payload);
+                if (rows_header.flags & ROWS_END_OF_STATEMENT)
+                    table_maps.clear();
                 break;
             }
             case GTID_EVENT:
@@ -843,6 +861,18 @@ namespace MySQLReplication
             }
         }
     }
+
+    bool MySQLFlavor::doReplicate(UInt64 table_id)
+    {
+        if (replicate_do_db.empty())
+            return false;
+        if (table_id == 0x00ffffff) {
+            // Special "dummy event"
+            return false;
+        }
+        auto table_map = table_maps.at(table_id);
+        return table_map->schema == replicate_do_db;
+    }
 }
 
 }
diff --git a/src/Core/MySQL/MySQLReplication.h b/src/Core/MySQL/MySQLReplication.h
index 7c7604cad58..ae8dc80f673 100644
--- a/src/Core/MySQL/MySQLReplication.h
+++ b/src/Core/MySQL/MySQLReplication.h
@@ -430,6 +430,22 @@ namespace MySQLReplication
         void parseMeta(String meta);
     };
 
+    enum RowsEventFlags
+    {
+        ROWS_END_OF_STATEMENT = 1
+    };
+
+    class RowsEventHeader
+    {
+    public:
+        EventType type;
+        UInt64 table_id;
+        UInt16 flags;
+
+        RowsEventHeader(EventType type_) : type(type_), table_id(0), flags(0) {};
+        void parse(ReadBuffer & payload);
+    };
+
     class RowsEvent : public EventBase
     {
     public:
@@ -438,9 +454,11 @@ namespace MySQLReplication
         String table;
         std::vector<Field> rows;
 
-        RowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_)
-            : EventBase(std::move(header_)), number_columns(0), table_id(0), flags(0), extra_data_len(0), table_map(table_map_)
+        RowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_, const RowsEventHeader & rows_header)
+            : EventBase(std::move(header_)), number_columns(0), table_map(table_map_)
         {
+            table_id = rows_header.table_id;
+            flags = rows_header.flags;
             schema = table_map->schema;
             table = table_map->table;
         }
@@ -450,7 +468,6 @@ namespace MySQLReplication
     protected:
         UInt64 table_id;
         UInt16 flags;
-        UInt16 extra_data_len;
         Bitmap columns_present_bitmap1;
         Bitmap columns_present_bitmap2;
 
@@ -464,21 +481,24 @@ namespace MySQLReplication
     class WriteRowsEvent : public RowsEvent
     {
     public:
-        WriteRowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_) : RowsEvent(table_map_, std::move(header_)) {}
+        WriteRowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_, const RowsEventHeader & rows_header)
+            : RowsEvent(table_map_, std::move(header_), rows_header) {}
         MySQLEventType type() const override { return MYSQL_WRITE_ROWS_EVENT; }
     };
 
     class DeleteRowsEvent : public RowsEvent
     {
     public:
-        DeleteRowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_) : RowsEvent(table_map_, std::move(header_)) {}
+        DeleteRowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_, const RowsEventHeader & rows_header)
+            : RowsEvent(table_map_, std::move(header_), rows_header) {}
         MySQLEventType type() const override { return MYSQL_DELETE_ROWS_EVENT; }
     };
 
     class UpdateRowsEvent : public RowsEvent
     {
     public:
-        UpdateRowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_) : RowsEvent(table_map_, std::move(header_)) {}
+        UpdateRowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_, const RowsEventHeader & rows_header)
+            : RowsEvent(table_map_, std::move(header_), rows_header) {}
         MySQLEventType type() const override { return MYSQL_UPDATE_ROWS_EVENT; }
     };
 
@@ -546,10 +566,10 @@ namespace MySQLReplication
         Position position;
         BinlogEventPtr event;
         String replicate_do_db;
-        std::shared_ptr<TableMapEvent> table_map;
+        std::map<UInt64, std::shared_ptr<TableMapEvent> > table_maps;
         size_t checksum_signature_length = 4;
 
-        inline bool doReplicate() { return (replicate_do_db.empty() || table_map->schema == replicate_do_db); }
+        bool doReplicate(UInt64 table_id);
     };
 }
 
diff --git a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
index 38ff8fd752b..b7f432d963b 100644
--- a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
+++ b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
@@ -757,3 +757,18 @@ def system_parts_test(clickhouse_node, mysql_node, service_name):
     check_active_parts(2)
     clickhouse_node.query("OPTIMIZE TABLE system_parts_test.test")
     check_active_parts(1)
+
+def multi_table_update_test(clickhouse_node, mysql_node, service_name):
+    mysql_node.query("DROP DATABASE IF EXISTS multi_table_update")
+    clickhouse_node.query("DROP DATABASE IF EXISTS multi_table_update")
+    mysql_node.query("CREATE DATABASE multi_table_update")
+    mysql_node.query("CREATE TABLE multi_table_update.a (id INT(11) NOT NULL PRIMARY KEY, value VARCHAR(255))")
+    mysql_node.query("CREATE TABLE multi_table_update.b (id INT(11) NOT NULL PRIMARY KEY, othervalue VARCHAR(255))")
+    mysql_node.query("INSERT INTO multi_table_update.a VALUES(1, 'foo')")
+    mysql_node.query("INSERT INTO multi_table_update.b VALUES(1, 'bar')")
+    clickhouse_node.query("CREATE DATABASE multi_table_update ENGINE = MaterializeMySQL('{}:3306', 'multi_table_update', 'root', 'clickhouse')".format(service_name))
+    check_query(clickhouse_node, "SHOW TABLES FROM multi_table_update", "a\nb\n")
+    mysql_node.query("UPDATE multi_table_update.a, multi_table_update.b SET value='baz', othervalue='quux' where a.id=b.id")
+
+    check_query(clickhouse_node, "SELECT * FROM multi_table_update.a", "1\tbaz\n")
+    check_query(clickhouse_node, "SELECT * FROM multi_table_update.b", "1\tquux\n")
diff --git a/tests/integration/test_materialize_mysql_database/test.py b/tests/integration/test_materialize_mysql_database/test.py
index 8cd2f7def07..32316901dce 100644
--- a/tests/integration/test_materialize_mysql_database/test.py
+++ b/tests/integration/test_materialize_mysql_database/test.py
@@ -237,3 +237,8 @@ def test_utf8mb4(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhou
 @pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary])
 def test_system_parts_table(started_cluster, started_mysql_8_0, clickhouse_node):
     materialize_with_ddl.system_parts_test(clickhouse_node, started_mysql_8_0, "mysql8_0")
+
+@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary])
+def test_multi_table_update(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node):
+    materialize_with_ddl.multi_table_update_test(clickhouse_node, started_mysql_5_7, "mysql1")
+    materialize_with_ddl.multi_table_update_test(clickhouse_node, started_mysql_8_0, "mysql8_0")

From a3ac27674134a743b16e963aa5490e8077cbb297 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 4 Feb 2021 14:56:04 +0300
Subject: [PATCH 0639/1238] Add type promotion for modulo of division of
 negative number

---
 src/DataTypes/NumberTraits.h                          | 11 ++++++++---
 .../01700_mod_negative_type_promotion.reference       |  1 +
 .../0_stateless/01700_mod_negative_type_promotion.sql |  1 +
 3 files changed, 10 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/01700_mod_negative_type_promotion.reference
 create mode 100644 tests/queries/0_stateless/01700_mod_negative_type_promotion.sql

diff --git a/src/DataTypes/NumberTraits.h b/src/DataTypes/NumberTraits.h
index 3aa00c68274..479fc37c795 100644
--- a/src/DataTypes/NumberTraits.h
+++ b/src/DataTypes/NumberTraits.h
@@ -104,11 +104,16 @@ template <typename A, typename B> struct ResultOfIntegerDivision
         sizeof(A)>::Type;
 };
 
-/** Division with remainder you get a number with the same number of bits as in divisor.
-    */
+/** Division with remainder you get a number with the same number of bits as in divisor,
+  * or larger in case of signed type.
+  */
 template <typename A, typename B> struct ResultOfModulo
 {
-    using Type0 = typename Construct<is_signed_v<A> || is_signed_v<B>, false, sizeof(B)>::Type;
+    static constexpr bool result_is_signed = is_signed_v<A>;
+    /// If modulo of division can yield negative number, we need larger type to accomodate it.
+    /// Example: toInt32(-199) % toUInt8(200) will return -199 that does not fit in Int8, only in Int16.
+    static constexpr size_t size_of_result = result_is_signed ? nextSize(sizeof(B)) : sizeof(B);
+    using Type0 = typename Construct<result_is_signed, false, size_of_result>::Type;
     using Type = std::conditional_t<std::is_floating_point_v<A> || std::is_floating_point_v<B>, Float64, Type0>;
 };
 
diff --git a/tests/queries/0_stateless/01700_mod_negative_type_promotion.reference b/tests/queries/0_stateless/01700_mod_negative_type_promotion.reference
new file mode 100644
index 00000000000..b8d2624b7fe
--- /dev/null
+++ b/tests/queries/0_stateless/01700_mod_negative_type_promotion.reference
@@ -0,0 +1 @@
+-199
diff --git a/tests/queries/0_stateless/01700_mod_negative_type_promotion.sql b/tests/queries/0_stateless/01700_mod_negative_type_promotion.sql
new file mode 100644
index 00000000000..db850ba5c80
--- /dev/null
+++ b/tests/queries/0_stateless/01700_mod_negative_type_promotion.sql
@@ -0,0 +1 @@
+SELECT toInt32(-199) % 200;

From 858043cd538fb21fbfff69e78b3d3a07b996b100 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 4 Feb 2021 15:06:48 +0300
Subject: [PATCH 0640/1238] detect unmarked long tests in flaky check

---
 tests/clickhouse-test                         | 26 ++++++++++++-------
 ...ong_concurrent_select_and_drop_deadlock.sh |  2 ++
 .../01232_preparing_sets_race_condition.sh    |  2 ++
 .../0_stateless/01443_merge_truncate.sh       |  2 ++
 4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 0c49a3670a0..74f5f07eb9d 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -428,15 +428,23 @@ def run_tests_array(all_tests_with_params):
                                 status += print_test_time(total_time)
                                 status += " - result differs with reference:\n{}\n".format(diff)
                             else:
-                                passed_total += 1
-                                failures_chain = 0
-                                status += MSG_OK
-                                status += print_test_time(total_time)
-                                status += "\n"
-                                if os.path.exists(stdout_file):
-                                    os.remove(stdout_file)
-                                if os.path.exists(stderr_file):
-                                    os.remove(stderr_file)
+                                if args.test_runs > 1 and total_time > 30 and 'long' not in name:
+                                    # We're in Flaky Check mode, check the run time as well while we're at it.
+                                    failures += 1
+                                    failures_chain += 1
+                                    status += MSG_FAIL
+                                    status += print_test_time(total_time)
+                                    status += " - Long test not marked as 'long'"
+                                else:
+                                    passed_total += 1
+                                    failures_chain = 0
+                                    status += MSG_OK
+                                    status += print_test_time(total_time)
+                                    status += "\n"
+                                    if os.path.exists(stdout_file):
+                                        os.remove(stdout_file)
+                                    if os.path.exists(stderr_file):
+                                        os.remove(stderr_file)
 
             if status and not status.endswith('\n'):
                 status += '\n'
diff --git a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh
index 60a2d8eb9a0..f7659bc3728 100755
--- a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh
+++ b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh
@@ -1,5 +1,7 @@
 #!/usr/bin/env bash
 
+# remove this comment before merge
+
 set -e
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
diff --git a/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh b/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh
index e42e68a6589..5aaac7cd86e 100755
--- a/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh
+++ b/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh
@@ -1,5 +1,7 @@
 #!/usr/bin/env bash
 
+# remove this comment before merge
+
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
diff --git a/tests/queries/0_stateless/01443_merge_truncate.sh b/tests/queries/0_stateless/01443_merge_truncate.sh
index ffd5f225ffe..538e457a5d8 100755
--- a/tests/queries/0_stateless/01443_merge_truncate.sh
+++ b/tests/queries/0_stateless/01443_merge_truncate.sh
@@ -1,5 +1,7 @@
 #!/usr/bin/env bash
 
+# remove this comment before merge
+
 set -e
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)

From da51ea179464ea96156f8205312a202f9956db9e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 4 Feb 2021 15:07:41 +0300
Subject: [PATCH 0641/1238] Simplify shutdown and requests processing

---
 src/Common/ZooKeeper/ZooKeeperCommon.cpp      |  7 ++
 src/Common/ZooKeeper/ZooKeeperCommon.h        |  4 +-
 src/Coordination/NuKeeperCommon.h             | 24 ++++++
 src/Coordination/NuKeeperServer.cpp           | 83 +++++++------------
 src/Coordination/NuKeeperServer.h             | 12 +--
 src/Coordination/NuKeeperStateMachine.cpp     |  8 +-
 src/Coordination/NuKeeperStateMachine.h       |  2 +
 src/Coordination/NuKeeperStorage.cpp          | 48 ++---------
 src/Coordination/NuKeeperStorage.h            |  3 +-
 .../NuKeeperStorageDispatcher.cpp             | 19 ++---
 src/Coordination/SessionExpiryQueue.cpp       |  6 ++
 src/Coordination/SessionExpiryQueue.h         |  2 +
 src/Server/NuKeeperTCPHandler.cpp             | 13 +--
 13 files changed, 105 insertions(+), 126 deletions(-)
 create mode 100644 src/Coordination/NuKeeperCommon.h

diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp
index 278d36f9245..2d32cd75624 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp
@@ -51,6 +51,13 @@ void ZooKeeperWatchResponse::writeImpl(WriteBuffer & out) const
     Coordination::write(path, out);
 }
 
+void ZooKeeperWatchResponse::write(WriteBuffer & out) const
+{
+    if (error == Error::ZOK)
+        ZooKeeperResponse::write(out);
+    /// skip bad responses for watches
+}
+
 void ZooKeeperAuthRequest::writeImpl(WriteBuffer & out) const
 {
     Coordination::write(type, out);
diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h
index 84d7a0823ec..8bc1cde8cd7 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.h
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.h
@@ -30,7 +30,7 @@ struct ZooKeeperResponse : virtual Response
     virtual ~ZooKeeperResponse() override = default;
     virtual void readImpl(ReadBuffer &) = 0;
     virtual void writeImpl(WriteBuffer &) const = 0;
-    void write(WriteBuffer & out) const;
+    virtual void write(WriteBuffer & out) const;
     virtual OpNum getOpNum() const = 0;
 };
 
@@ -88,6 +88,8 @@ struct ZooKeeperWatchResponse final : WatchResponse, ZooKeeperResponse
 
     void writeImpl(WriteBuffer & out) const override;
 
+    void write(WriteBuffer & out) const override;
+
     OpNum getOpNum() const override
     {
         throw Exception("OpNum for watch response doesn't exist", Error::ZRUNTIMEINCONSISTENCY);
diff --git a/src/Coordination/NuKeeperCommon.h b/src/Coordination/NuKeeperCommon.h
new file mode 100644
index 00000000000..14fc612093c
--- /dev/null
+++ b/src/Coordination/NuKeeperCommon.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <Common/ZooKeeper/ZooKeeperCommon.h>
+
+namespace DB
+{
+
+struct NuKeeperRequest
+{
+    int64_t session_id;
+    Coordination::ZooKeeperRequestPtr request;
+};
+
+using NuKeeperRequests = std::vector<NuKeeperRequest>;
+
+struct NuKeeperResponse
+{
+    int64_t session_id;
+    Coordination::ZooKeeperRequestPtr response;
+};
+
+using NuKeeperResponses = std::vector<NuKeeperResponse>;
+
+}
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index d700956c522..3910376ebda 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -17,6 +17,7 @@ namespace ErrorCodes
 {
     extern const int TIMEOUT_EXCEEDED;
     extern const int RAFT_ERROR;
+    extern const int LOGICAL_ERROR;
 }
 
 NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, int port_)
@@ -75,24 +76,11 @@ void NuKeeperServer::startup()
     throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot start RAFT server within startup timeout");
 }
 
-NuKeeperStorage::ResponsesForSessions NuKeeperServer::shutdown(const NuKeeperStorage::RequestsForSessions & expired_requests)
+void NuKeeperServer::shutdown()
 {
-    NuKeeperStorage::ResponsesForSessions responses;
-    if (isLeader())
-    {
-        try
-        {
-            responses = putRequests(expired_requests);
-        }
-        catch (...)
-        {
-            tryLogCurrentException(__PRETTY_FUNCTION__);
-        }
-    }
-
+    state_machine->shutdownStorage();
     if (!launcher.shutdown(5))
         LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Failed to shutdown RAFT server in {} seconds", 5);
-    return responses;
 }
 
 namespace
@@ -106,12 +94,11 @@ nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coord
     return buf.getBuffer();
 }
 
-}
-
-NuKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer)
+NuKeeperStorage::ResponsesForSessions readZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer, const Coordination::ZooKeeperRequestPtr & request)
 {
     DB::NuKeeperStorage::ResponsesForSessions results;
     DB::ReadBufferFromNuraftBuffer buf(buffer);
+    bool response_found = false;
 
     while (!buf.eof())
     {
@@ -122,7 +109,6 @@ NuKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(nur
         int64_t zxid;
         Coordination::Error err;
 
-        /// FIXME (alesap) We don't need to parse responses here
         Coordination::read(length, buf);
         Coordination::read(xid, buf);
         Coordination::read(zxid, buf);
@@ -133,17 +119,11 @@ NuKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(nur
             response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
         else
         {
-            auto session_xids = ops_mapping.find(session_id);
-            if (session_xids == ops_mapping.end())
-                throw Exception(ErrorCodes::RAFT_ERROR, "Unknown session id {}", session_id);
-            auto response_it = session_xids->second.find(xid);
-            if (response_it == session_xids->second.end())
-                throw Exception(ErrorCodes::RAFT_ERROR, "Unknown xid {} for session id {}", xid, session_id);
+            if (response_found)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "More than one non-watch response for single request with xid {}, response xid {}", request->xid, xid);
 
-            response = response_it->second;
-            ops_mapping[session_id].erase(response_it);
-            if (ops_mapping[session_id].empty())
-                ops_mapping.erase(session_xids);
+            response_found = true;
+            response = request->makeResponse();
         }
 
         if (err == Coordination::Error::ZOK && (xid == Coordination::WATCH_XID || response->getOpNum() != Coordination::OpNum::Close))
@@ -158,20 +138,19 @@ NuKeeperStorage::ResponsesForSessions NuKeeperServer::readZooKeeperResponses(nur
     return results;
 }
 
-NuKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const NuKeeperStorage::RequestsForSessions & requests)
+}
+
+NuKeeperStorage::ResponsesForSessions NuKeeperServer::putRequest(const NuKeeperStorage::RequestForSession & request_for_session)
 {
-    if (isLeaderAlive() && requests.size() == 1 && requests[0].request->isReadRequest())
+    auto [session_id, request] = request_for_session;
+    if (isLeaderAlive() && request_for_session.request->isReadRequest())
     {
-        return state_machine->processReadRequest(requests[0]);
+        return state_machine->processReadRequest(request_for_session);
     }
     else
     {
         std::vector<nuraft::ptr<nuraft::buffer>> entries;
-        for (const auto & [session_id, request] : requests)
-        {
-            ops_mapping[session_id][request->xid] = request->makeResponse();
-            entries.push_back(getZooKeeperLogEntry(session_id, request));
-        }
+        entries.push_back(getZooKeeperLogEntry(session_id, request));
 
         std::lock_guard lock(append_entries_mutex);
 
@@ -179,28 +158,22 @@ NuKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const NuKeeper
         if (!result->get_accepted())
         {
             NuKeeperStorage::ResponsesForSessions responses;
-            for (const auto & [session_id, request] : requests)
-            {
-                auto response = request->makeResponse();
-                response->xid = request->xid;
-                response->zxid = 0; /// FIXME what we can do with it?
-                response->error = Coordination::Error::ZOPERATIONTIMEOUT;
-                responses.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
-            }
+            auto response = request->makeResponse();
+            response->xid = request->xid;
+            response->zxid = 0;
+            response->error = Coordination::Error::ZOPERATIONTIMEOUT;
+            responses.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
             return responses;
         }
 
         if (result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
         {
             NuKeeperStorage::ResponsesForSessions responses;
-            for (const auto & [session_id, request] : requests)
-            {
-                auto response = request->makeResponse();
-                response->xid = request->xid;
-                response->zxid = 0; /// FIXME what we can do with it?
-                response->error = Coordination::Error::ZOPERATIONTIMEOUT;
-                responses.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
-            }
+            auto response = request->makeResponse();
+            response->xid = request->xid;
+            response->zxid = 0;
+            response->error = Coordination::Error::ZOPERATIONTIMEOUT;
+            responses.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
             return responses;
         }
         else if (result->get_result_code() != nuraft::cmd_result_code::OK)
@@ -210,7 +183,7 @@ NuKeeperStorage::ResponsesForSessions NuKeeperServer::putRequests(const NuKeeper
         if (result_buf == nullptr)
             throw Exception(ErrorCodes::RAFT_ERROR, "Received nullptr from RAFT leader");
 
-        return readZooKeeperResponses(result_buf);
+        return readZooKeeperResponses(result_buf, request);
     }
 }
 
@@ -250,7 +223,7 @@ bool NuKeeperServer::isLeaderAlive() const
 
 bool NuKeeperServer::waitForServer(int32_t id) const
 {
-    for (size_t i = 0; i < 10; ++i)
+    for (size_t i = 0; i < 50; ++i)
     {
         if (raft_instance->get_srv_config(id) != nullptr)
             return true;
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index 32ca61e924f..358a4212967 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -29,14 +29,6 @@ private:
 
     nuraft::ptr<nuraft::raft_server> raft_instance;
 
-    using XIDToOp = std::unordered_map<Coordination::XID, Coordination::ZooKeeperResponsePtr>;
-
-    using SessionIDOps = std::unordered_map<int64_t, XIDToOp>;
-
-    SessionIDOps ops_mapping;
-
-    NuKeeperStorage::ResponsesForSessions readZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer);
-
     std::mutex append_entries_mutex;
 
 public:
@@ -44,7 +36,7 @@ public:
 
     void startup();
 
-    NuKeeperStorage::ResponsesForSessions putRequests(const NuKeeperStorage::RequestsForSessions & requests);
+    NuKeeperStorage::ResponsesForSessions putRequest(const NuKeeperStorage::RequestForSession & request);
 
     int64_t getSessionID(int64_t session_timeout_ms);
 
@@ -60,7 +52,7 @@ public:
     void waitForServers(const std::vector<int32_t> & ids) const;
     void waitForCatchUp() const;
 
-    NuKeeperStorage::ResponsesForSessions shutdown(const NuKeeperStorage::RequestsForSessions & expired_requests);
+    void shutdown();
 };
 
 }
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index f7b7ba3c567..092b2b0580f 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -43,7 +43,7 @@ nuraft::ptr<nuraft::buffer> writeResponses(NuKeeperStorage::ResponsesForSessions
 }
 
 
-NuKeeperStateMachine::NuKeeperStateMachine(long tick_time)
+NuKeeperStateMachine::NuKeeperStateMachine(int64_t tick_time)
     : storage(tick_time)
     , last_committed_idx(0)
     , log(&Poco::Logger::get("NuRaftStateMachine"))
@@ -240,4 +240,10 @@ std::unordered_set<int64_t> NuKeeperStateMachine::getDeadSessions()
     return storage.getDeadSessions();
 }
 
+void NuKeeperStateMachine::shutdownStorage()
+{
+    std::lock_guard lock(storage_lock);
+    storage.finalize();
+}
+
 }
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index bfb67f10a67..e45c197db8c 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -51,6 +51,8 @@ public:
 
     std::unordered_set<int64_t> getDeadSessions();
 
+    void shutdownStorage();
+
 private:
     struct StorageSnapshot
     {
diff --git a/src/Coordination/NuKeeperStorage.cpp b/src/Coordination/NuKeeperStorage.cpp
index 45701b63b8b..679426a1a64 100644
--- a/src/Coordination/NuKeeperStorage.cpp
+++ b/src/Coordination/NuKeeperStorage.cpp
@@ -513,50 +513,23 @@ struct NuKeeperStorageCloseRequest final : public NuKeeperStorageRequest
     }
 };
 
-NuKeeperStorage::ResponsesForSessions NuKeeperStorage::finalize(const RequestsForSessions & expired_requests)
+void NuKeeperStorage::finalize()
 {
     if (finalized)
         throw DB::Exception("Testkeeper storage already finalized", ErrorCodes::LOGICAL_ERROR);
 
     finalized = true;
 
-    /// TODO delete ephemerals
-    ResponsesForSessions finalize_results;
-    auto finish_watch = [] (const auto & watch_pair) -> ResponsesForSessions
-    {
-        ResponsesForSessions results;
-        std::shared_ptr<Coordination::ZooKeeperWatchResponse> response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
-        response->type = Coordination::SESSION;
-        response->state = Coordination::EXPIRED_SESSION;
-        response->error = Coordination::Error::ZSESSIONEXPIRED;
+    for (const auto & [session_id, ephemerals] : ephemerals)
+        for (const String & ephemeral_path : ephemerals)
+            container.erase(ephemeral_path);
 
-        for (auto & watcher_session : watch_pair.second)
-            results.push_back(ResponseForSession{watcher_session, response});
-        return results;
-    };
-
-    for (auto & path_watch : watches)
-    {
-        auto watch_responses = finish_watch(path_watch);
-        finalize_results.insert(finalize_results.end(), watch_responses.begin(), watch_responses.end());
-    }
+    ephemerals.clear();
 
     watches.clear();
-    for (auto & path_watch : list_watches)
-    {
-        auto list_watch_responses = finish_watch(path_watch);
-        finalize_results.insert(finalize_results.end(), list_watch_responses.begin(), list_watch_responses.end());
-    }
     list_watches.clear();
     sessions_and_watchers.clear();
-
-    for (const auto & [session_id, zk_request] : expired_requests)
-    {
-        auto response = zk_request->makeResponse();
-        response->error = Coordination::Error::ZSESSIONEXPIRED;
-        finalize_results.push_back(ResponseForSession{session_id, response});
-    }
-    return finalize_results;
+    session_expiry_queue.clear();
 }
 
 
@@ -675,15 +648,6 @@ NuKeeperStorage::ResponsesForSessions NuKeeperStorage::processRequest(const Coor
                 watches[zk_request->getPath()].emplace_back(session_id);
                 sessions_and_watchers[session_id].emplace(zk_request->getPath());
             }
-            else
-            {
-                std::shared_ptr<Coordination::ZooKeeperWatchResponse> watch_response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
-                watch_response->path = zk_request->getPath();
-                watch_response->xid = -1;
-                watch_response->error = response->error;
-                watch_response->type = Coordination::Event::NOTWATCHING;
-                results.push_back(ResponseForSession{session_id, watch_response});
-            }
         }
 
         if (response->error == Coordination::Error::ZOK)
diff --git a/src/Coordination/NuKeeperStorage.h b/src/Coordination/NuKeeperStorage.h
index 6f709a6f480..20ab1982b4e 100644
--- a/src/Coordination/NuKeeperStorage.h
+++ b/src/Coordination/NuKeeperStorage.h
@@ -87,7 +87,8 @@ public:
     }
 
     ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
-    ResponsesForSessions finalize(const RequestsForSessions & expired_requests);
+
+    void finalize();
 
     std::unordered_set<int64_t> getDeadSessions()
     {
diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
index cf36fd40bc3..fbf54106316 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -32,7 +32,7 @@ void NuKeeperStorageDispatcher::processingThread()
 
             try
             {
-                auto responses = server->putRequests({request});
+                auto responses = server->putRequest(request);
                 for (const auto & response_for_session : responses)
                     setResponse(response_for_session.session_id, response_for_session.response);
             }
@@ -196,17 +196,16 @@ void NuKeeperStorageDispatcher::shutdown()
         }
 
         if (server)
+            server->shutdown();
+
+        NuKeeperStorage::RequestForSession request_for_session;
+        while (requests_queue.tryPop(request_for_session))
         {
-            NuKeeperStorage::RequestsForSessions expired_requests;
-            NuKeeperStorage::RequestForSession request;
-            while (requests_queue.tryPop(request))
-                expired_requests.push_back(NuKeeperStorage::RequestForSession{request});
-
-            auto expired_responses = server->shutdown(expired_requests);
-
-            for (const auto & response_for_session : expired_responses)
-                setResponse(response_for_session.session_id, response_for_session.response);
+            auto response = request_for_session.request->makeResponse();
+            response->error = Coordination::Error::ZSESSIONEXPIRED;
+            setResponse(request_for_session.session_id, response);
         }
+        session_to_response_callback.clear();
     }
     catch (...)
     {
diff --git a/src/Coordination/SessionExpiryQueue.cpp b/src/Coordination/SessionExpiryQueue.cpp
index f90cd089be8..51837087af5 100644
--- a/src/Coordination/SessionExpiryQueue.cpp
+++ b/src/Coordination/SessionExpiryQueue.cpp
@@ -74,4 +74,10 @@ std::unordered_set<int64_t> SessionExpiryQueue::getExpiredSessions()
     return {};
 }
 
+void SessionExpiryQueue::clear()
+{
+    session_to_timeout.clear();
+    expiry_to_sessions.clear();
+}
+
 }
diff --git a/src/Coordination/SessionExpiryQueue.h b/src/Coordination/SessionExpiryQueue.h
index 3b4ad6dde88..dff629a2432 100644
--- a/src/Coordination/SessionExpiryQueue.h
+++ b/src/Coordination/SessionExpiryQueue.h
@@ -38,6 +38,8 @@ public:
     bool update(int64_t session_id, int64_t timeout_ms);
 
     std::unordered_set<int64_t> getExpiredSessions();
+
+    void clear();
 };
 
 }
diff --git a/src/Server/NuKeeperTCPHandler.cpp b/src/Server/NuKeeperTCPHandler.cpp
index 9d39c317356..706b57ee71d 100644
--- a/src/Server/NuKeeperTCPHandler.cpp
+++ b/src/Server/NuKeeperTCPHandler.cpp
@@ -404,12 +404,13 @@ void NuKeeperTCPHandler::runImpl()
                     LOG_DEBUG(log, "Session #{} successfully closed", session_id);
                     return;
                 }
-
-                if (response->error == Coordination::Error::ZOK)
-                    response->write(*out);
-                else if (response->xid != Coordination::WATCH_XID)
-                    response->write(*out);
-                /// skipping bad response for watch
+                response->write(*out);
+                if (response->error == Coordination::Error::ZSESSIONEXPIRED)
+                {
+                    LOG_DEBUG(log, "Session #{} expired because server shutting down or quorum is not alive", session_id);
+                    nu_keeper_storage_dispatcher->finishSession(session_id);
+                    return;
+                }
                 result.ready_responses_count--;
             }
 

From 4d66bc413cacd43bca352ea3ce5c912975283ac2 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 4 Feb 2021 16:19:20 +0300
Subject: [PATCH 0642/1238] Update test.

---
 tests/queries/0_stateless/00597_push_down_predicate.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/00597_push_down_predicate.sql b/tests/queries/0_stateless/00597_push_down_predicate.sql
index ea01bba9f4d..ec306ac6792 100644
--- a/tests/queries/0_stateless/00597_push_down_predicate.sql
+++ b/tests/queries/0_stateless/00597_push_down_predicate.sql
@@ -8,6 +8,8 @@ DROP TABLE IF EXISTS test_view_00597;
 CREATE TABLE test_00597(date Date, id Int8, name String, value Int64) ENGINE = MergeTree(date, (id, date), 8192);
 CREATE VIEW test_view_00597 AS SELECT * FROM test_00597;
 
+SELECT * FROM (SELECT floor(floor(1, floor(NULL), id = 257), floor(floor(floor(floor(NULL), '10485.76', '9223372036854775807', NULL), floor(10, floor(65535, NULL), 100.0000991821289), NULL)), '2.56'), b.* FROM (SELECT floor(floor(floor(floor(NULL), 1000.0001220703125))), * FROM test_00597) AS b) WHERE id = 257;
+
 INSERT INTO test_00597 VALUES('2000-01-01', 1, 'test string 1', 1);
 INSERT INTO test_00597 VALUES('2000-01-01', 2, 'test string 2', 2);
 

From d85e9b496c0292675778f88dbddaa99dc030de52 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 4 Feb 2021 16:22:30 +0300
Subject: [PATCH 0643/1238] Fix gcc-10 build

---
 src/Coordination/NuKeeperStorage.cpp                |  4 ++--
 tests/integration/test_testkeeper_multinode/test.py | 12 +++++++++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/Coordination/NuKeeperStorage.cpp b/src/Coordination/NuKeeperStorage.cpp
index 679426a1a64..ef59e717b4c 100644
--- a/src/Coordination/NuKeeperStorage.cpp
+++ b/src/Coordination/NuKeeperStorage.cpp
@@ -520,8 +520,8 @@ void NuKeeperStorage::finalize()
 
     finalized = true;
 
-    for (const auto & [session_id, ephemerals] : ephemerals)
-        for (const String & ephemeral_path : ephemerals)
+    for (const auto & [session_id, ephemerals_paths] : ephemerals)
+        for (const String & ephemeral_path : ephemerals_paths)
             container.erase(ephemeral_path);
 
     ephemerals.clear();
diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index d815af7a63e..caba7ecddd9 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -124,6 +124,11 @@ def test_blocade_leader(started_cluster):
                 node.query("SYSTEM SYNC REPLICA t1", timeout=10)
                 break
             except Exception as ex:
+                try:
+                    node.query("ATTACH TABLE t1")
+                except Exception as attach_ex:
+                    print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
+
                 print("Got exception node{}".format(n + 1), smaller_exception(ex))
                 time.sleep(0.5)
         else:
@@ -229,13 +234,18 @@ def test_blocade_leader_twice(started_cluster):
         else:
             assert False, "Cannot reconnect for node{}".format(n + 1)
 
-    for node in [node1, node2, node3]:
+    for n, node in enumerate([node1, node2, node3]):
         for i in range(100):
             try:
                 node.query("SYSTEM RESTART REPLICA t2", timeout=10)
                 node.query("SYSTEM SYNC REPLICA t2", timeout=10)
                 break
             except Exception as ex:
+                try:
+                    node.query("ATTACH TABLE t2")
+                except Exception as attach_ex:
+                    print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
+
                 print("Got exception node{}".format(n + 1), smaller_exception(ex))
                 time.sleep(0.5)
         else:

From 933105a6678f7db1e520f77434acf03c013dce7f Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 4 Feb 2021 16:31:38 +0300
Subject: [PATCH 0644/1238] Fix session timeout

---
 tests/integration/test_testkeeper_back_to_back/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_testkeeper_back_to_back/test.py b/tests/integration/test_testkeeper_back_to_back/test.py
index d3a9b742cdd..0f2c1ed19a5 100644
--- a/tests/integration/test_testkeeper_back_to_back/test.py
+++ b/tests/integration/test_testkeeper_back_to_back/test.py
@@ -25,7 +25,7 @@ def get_fake_zk():
     global _fake_zk_instance
     if not _fake_zk_instance:
         print("node", cluster.get_instance_ip("node"))
-        _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip("node") + ":9181")
+        _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip("node") + ":9181", timeout=30.0)
         def reset_last_zxid_listener(state):
             print("Fake zk callback called for state", state)
             global _fake_zk_instance

From a161969b50b8a167a93d352b8f1b1e9a004155f2 Mon Sep 17 00:00:00 2001
From: Haavard Kvaalen <havardk@kvaalen.no>
Date: Thu, 4 Feb 2021 14:43:18 +0100
Subject: [PATCH 0645/1238] Fix incorrectly placed brace

---
 src/Core/MySQL/MySQLReplication.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp
index 8e1e0cd7646..8fdf337c849 100644
--- a/src/Core/MySQL/MySQLReplication.cpp
+++ b/src/Core/MySQL/MySQLReplication.cpp
@@ -866,7 +866,8 @@ namespace MySQLReplication
     {
         if (replicate_do_db.empty())
             return false;
-        if (table_id == 0x00ffffff) {
+        if (table_id == 0x00ffffff)
+        {
             // Special "dummy event"
             return false;
         }

From e3d6ffd9dc67221ab257a569f5c6e2ff05b0641d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 4 Feb 2021 17:25:11 +0300
Subject: [PATCH 0646/1238] Remove AddingConstColumn step and transform.

---
 src/Interpreters/ActionsDAG.cpp               | 17 ++++++++
 src/Interpreters/ActionsDAG.h                 |  3 ++
 .../QueryPlan/AddingConstColumnStep.cpp       | 41 ------------------
 .../QueryPlan/AddingConstColumnStep.h         | 22 ----------
 .../Transforms/AddingConstColumnTransform.h   | 43 -------------------
 src/Processors/ya.make                        |  1 -
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  7 ++-
 src/Storages/StorageMerge.cpp                 |  7 ++-
 8 files changed, 28 insertions(+), 113 deletions(-)
 delete mode 100644 src/Processors/QueryPlan/AddingConstColumnStep.cpp
 delete mode 100644 src/Processors/QueryPlan/AddingConstColumnStep.h
 delete mode 100644 src/Processors/Transforms/AddingConstColumnTransform.h

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index d8c40ffda2f..bc6f48b530d 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -727,6 +727,23 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
     return actions_dag;
 }
 
+ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column)
+{
+    auto adding_column_action = std::make_shared<ActionsDAG>();
+    FunctionOverloadResolverPtr func_builder_materialize =
+            std::make_shared<FunctionOverloadResolverAdaptor>(
+                    std::make_unique<DefaultOverloadResolver>(
+                            std::make_shared<FunctionMaterialize>()));
+
+    auto column_name = column.name;
+    const auto & column_node = adding_column_action->addColumn(std::move(column));
+    Inputs inputs = {const_cast<Node *>(&column_node)};
+    auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {}, true);
+    adding_column_action->addAlias(function_node, std::move(column_name), true);
+
+    return adding_column_action;
+}
+
 ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
 {
     /// first: x (1), x (2), y ==> x (2), z, x (3)
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index b12da30e24f..e1677384943 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -249,6 +249,9 @@ public:
         MatchColumnsMode mode,
         bool ignore_constant_values = false); /// Do not check that constants are same. Use value from result_header.
 
+    /// Create expression which add const column and then materialize it.
+    static ActionsDAGPtr makeAddingColumnActions(ColumnWithTypeAndName column);
+
     /// Create ActionsDAG which represents expression equivalent to applying first and second actions consequently.
     /// Is used to replace `(first -> second)` expression chain to single `merge(first, second)` expression.
     /// If first.settings.project_input is set, then outputs of `first` must include inputs of `second`.
diff --git a/src/Processors/QueryPlan/AddingConstColumnStep.cpp b/src/Processors/QueryPlan/AddingConstColumnStep.cpp
deleted file mode 100644
index 27c7720e58e..00000000000
--- a/src/Processors/QueryPlan/AddingConstColumnStep.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-#include <Processors/QueryPlan/AddingConstColumnStep.h>
-#include <Processors/QueryPipeline.h>
-#include <Processors/Transforms/AddingConstColumnTransform.h>
-#include <IO/Operators.h>
-
-namespace DB
-{
-
-static ITransformingStep::Traits getTraits()
-{
-    return ITransformingStep::Traits
-    {
-        {
-            .preserves_distinct_columns = true,
-            .returns_single_stream = false,
-            .preserves_number_of_streams = true,
-            .preserves_sorting = true,
-        },
-        {
-            .preserves_number_of_rows = true,
-        }
-    };
-}
-
-AddingConstColumnStep::AddingConstColumnStep(const DataStream & input_stream_, ColumnWithTypeAndName column_)
-    : ITransformingStep(input_stream_,
-                        AddingConstColumnTransform::transformHeader(input_stream_.header, column_),
-                        getTraits())
-    , column(std::move(column_))
-{
-}
-
-void AddingConstColumnStep::transformPipeline(QueryPipeline & pipeline)
-{
-    pipeline.addSimpleTransform([&](const Block & header)
-    {
-        return std::make_shared<AddingConstColumnTransform>(header, column);
-    });
-}
-
-}
diff --git a/src/Processors/QueryPlan/AddingConstColumnStep.h b/src/Processors/QueryPlan/AddingConstColumnStep.h
deleted file mode 100644
index baa63873f21..00000000000
--- a/src/Processors/QueryPlan/AddingConstColumnStep.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-#include <Processors/QueryPlan/ITransformingStep.h>
-
-namespace DB
-{
-
-/// Adds a materialized const column with a specified value.
-class AddingConstColumnStep : public ITransformingStep
-{
-public:
-    AddingConstColumnStep(const DataStream & input_stream_, ColumnWithTypeAndName column_);
-
-    String getName() const override { return "AddingConstColumn"; }
-
-    void transformPipeline(QueryPipeline & pipeline) override;
-
-private:
-    ColumnWithTypeAndName column;
-};
-
-}
-
diff --git a/src/Processors/Transforms/AddingConstColumnTransform.h b/src/Processors/Transforms/AddingConstColumnTransform.h
deleted file mode 100644
index 15e9addd924..00000000000
--- a/src/Processors/Transforms/AddingConstColumnTransform.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#pragma once
-#include <Processors/ISimpleTransform.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-/// Adds a materialized const column to the chunk with a specified value.
-class AddingConstColumnTransform : public ISimpleTransform
-{
-public:
-    AddingConstColumnTransform(const Block & header, ColumnWithTypeAndName column_)
-        : ISimpleTransform(header, transformHeader(header, column_), false)
-        , column(std::move(column_))
-    {
-        if (!column.column || !isColumnConst(*column.column) || !column.column->empty())
-            throw Exception("AddingConstColumnTransform expected empty const column", ErrorCodes::LOGICAL_ERROR);
-    }
-
-    String getName() const override { return "AddingConstColumnTransform"; }
-
-    static Block transformHeader(Block header, ColumnWithTypeAndName & column_)
-    {
-        header.insert(column_);
-        return header;
-    }
-
-protected:
-    void transform(Chunk & chunk) override
-    {
-        auto num_rows = chunk.getNumRows();
-        chunk.addColumn(column.column->cloneResized(num_rows)->convertToFullColumnIfConst());
-    }
-
-private:
-    ColumnWithTypeAndName column;
-};
-
-}
diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index d42746791fb..2862873a920 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -92,7 +92,6 @@ SRCS(
     Pipe.cpp
     Port.cpp
     QueryPipeline.cpp
-    QueryPlan/AddingConstColumnStep.cpp
     QueryPlan/AddingDelayedSourceStep.cpp
     QueryPlan/AddingMissedStep.cpp
     QueryPlan/AggregatingStep.cpp
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 924c7dbe185..430437ccbcd 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -28,15 +28,12 @@
 #include <Processors/QueryPlan/FilterStep.h>
 #include <Processors/QueryPlan/ExpressionStep.h>
 #include <Processors/QueryPlan/ReadFromPreparedSource.h>
-#include <Processors/QueryPlan/AddingConstColumnStep.h>
 #include <Processors/QueryPlan/ReverseRowsStep.h>
 #include <Processors/QueryPlan/MergingSortedStep.h>
 #include <Processors/QueryPlan/UnionStep.h>
 #include <Processors/QueryPlan/MergingFinal.h>
-#include <Processors/QueryPlan/ReadNothingStep.h>
 
 #include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Storages/VirtualColumnUtils.h>
 
@@ -846,7 +843,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
         column.type = std::make_shared<DataTypeFloat64>();
         column.column = column.type->createColumnConst(0, Field(used_sample_factor));
 
-        auto adding_column = std::make_unique<AddingConstColumnStep>(plan->getCurrentDataStream(), std::move(column));
+        auto adding_column_action = ActionsDAG::makeAddingColumnActions(std::move(column));
+
+        auto adding_column = std::make_unique<ExpressionStep>(plan->getCurrentDataStream(), std::move(adding_column_action));
         adding_column->setStepDescription("Add _sample_factor column");
         plan->addStep(std::move(adding_column));
     }
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 74df6dd185b..91ebfaa3a27 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -24,7 +24,6 @@
 #include <Parsers/queryToString.h>
 #include <Processors/Transforms/MaterializingTransform.h>
 #include <Processors/ConcatProcessor.h>
-#include <Processors/Transforms/AddingConstColumnTransform.h>
 #include <Processors/Transforms/ExpressionTransform.h>
 
 
@@ -364,9 +363,13 @@ Pipe StorageMerge::createSources(
             column.name = "_table";
             column.type = std::make_shared<DataTypeString>();
             column.column = column.type->createColumnConst(0, Field(table_name));
+
+            auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
+            auto adding_column_actions = std::make_shared<ExpressionActions>(std::move(adding_column_dag));
+
             pipe.addSimpleTransform([&](const Block & stream_header)
             {
-                return std::make_shared<AddingConstColumnTransform>(stream_header, column);
+                return std::make_shared<ExpressionTransform>(stream_header, adding_column_actions);
             });
         }
 

From ee0ff755e2a8ad105b00fcb305a1a41447af5e87 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Thu, 4 Feb 2021 17:46:46 +0300
Subject: [PATCH 0647/1238] Check that position always advances

---
 programs/client/Client.cpp      | 2 +-
 src/IO/LimitReadBuffer.cpp      | 6 ++++++
 src/IO/ReadBuffer.h             | 6 +++---
 src/Parsers/ParserInsertQuery.h | 2 +-
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 9a8b580407a..f52b9b7a0da 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -1719,7 +1719,7 @@ private:
             }
             // Remember where the data ended. We use this info later to determine
             // where the next query begins.
-            parsed_insert_query->end = data_in.buffer().begin() + data_in.count();
+            parsed_insert_query->end = parsed_insert_query->data + data_in.count();
         }
         else if (!is_interactive)
         {
diff --git a/src/IO/LimitReadBuffer.cpp b/src/IO/LimitReadBuffer.cpp
index f36facfdd99..a2c93642833 100644
--- a/src/IO/LimitReadBuffer.cpp
+++ b/src/IO/LimitReadBuffer.cpp
@@ -1,4 +1,5 @@
 #include <IO/LimitReadBuffer.h>
+
 #include <Common/Exception.h>
 
 
@@ -13,6 +14,8 @@ namespace ErrorCodes
 
 bool LimitReadBuffer::nextImpl()
 {
+    assert(position() >= in.position());
+
     /// Let underlying buffer calculate read bytes in `next()` call.
     in.position() = position();
 
@@ -25,7 +28,10 @@ bool LimitReadBuffer::nextImpl()
     }
 
     if (!in.next())
+    {
+        working_buffer = in.buffer();
         return false;
+    }
 
     working_buffer = in.buffer();
 
diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h
index 3d6eb6970ce..df21fc9bfb2 100644
--- a/src/IO/ReadBuffer.h
+++ b/src/IO/ReadBuffer.h
@@ -58,9 +58,9 @@ public:
         bytes += offset();
         bool res = nextImpl();
         if (!res)
-            working_buffer.resize(0);
-
-        pos = working_buffer.begin() + nextimpl_working_buffer_offset;
+            working_buffer = Buffer(pos, pos);
+        else
+            pos = working_buffer.begin() + nextimpl_working_buffer_offset;
         nextimpl_working_buffer_offset = 0;
         return res;
     }
diff --git a/src/Parsers/ParserInsertQuery.h b/src/Parsers/ParserInsertQuery.h
index b6a199c9d71..f98e433551d 100644
--- a/src/Parsers/ParserInsertQuery.h
+++ b/src/Parsers/ParserInsertQuery.h
@@ -30,7 +30,7 @@ private:
     const char * getName() const override { return "INSERT query"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
 public:
-    ParserInsertQuery(const char * end_) : end(end_) {}
+    explicit ParserInsertQuery(const char * end_) : end(end_) {}
 };
 
 /** Insert accepts an identifier and an asterisk with variants.

From d219540a9cef940d7f07f1802f9abc44f71027c0 Mon Sep 17 00:00:00 2001
From: Denny Crane <deniszhuravlov@gmail.com>
Date: Thu, 4 Feb 2021 11:13:32 -0400
Subject: [PATCH 0648/1238] max_array_size = 1mil

---
 docs/ru/sql-reference/data-types/array.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/ru/sql-reference/data-types/array.md b/docs/ru/sql-reference/data-types/array.md
index 906246b66ee..86a23ed041b 100644
--- a/docs/ru/sql-reference/data-types/array.md
+++ b/docs/ru/sql-reference/data-types/array.md
@@ -47,6 +47,8 @@ SELECT [1, 2] AS x, toTypeName(x)
 
 ## Особенности работы с типами данных {#osobennosti-raboty-s-tipami-dannykh}
 
+Максимальный размер массива ограничен одним миллионом элементов.
+
 При создании массива «на лету» ClickHouse автоматически определяет тип аргументов как наиболее узкий тип данных, в котором можно хранить все перечисленные аргументы. Если среди аргументов есть [NULL](../../sql-reference/data-types/array.md#null-literal) или аргумент типа [Nullable](nullable.md#data_type-nullable), то тип элементов массива — [Nullable](nullable.md).
 
 Если ClickHouse не смог подобрать тип данных, то он сгенерирует исключение. Это произойдёт, например, при попытке создать массив одновременно со строками и числами `SELECT array(1, 'a')`.

From 0dc12ebfe149d1a7d6c8baccf26600126cdcf427 Mon Sep 17 00:00:00 2001
From: Denny Crane <deniszhuravlov@gmail.com>
Date: Thu, 4 Feb 2021 11:15:33 -0400
Subject: [PATCH 0649/1238] max_array_size = 1mil

---
 docs/en/sql-reference/data-types/array.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md
index 48957498d63..41e35aaa96f 100644
--- a/docs/en/sql-reference/data-types/array.md
+++ b/docs/en/sql-reference/data-types/array.md
@@ -45,6 +45,8 @@ SELECT [1, 2] AS x, toTypeName(x)
 
 ## Working with Data Types {#working-with-data-types}
 
+The maximum size of an array is limited to one million elements. 
+
 When creating an array on the fly, ClickHouse automatically defines the argument type as the narrowest data type that can store all the listed arguments. If there are any [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) or literal [NULL](../../sql-reference/syntax.md#null-literal) values, the type of an array element also becomes [Nullable](../../sql-reference/data-types/nullable.md).
 
 If ClickHouse couldn’t determine the data type, it generates an exception. For instance, this happens when trying to create an array with strings and numbers simultaneously (`SELECT array(1, 'a')`).

From e49a051092260e72973ef57b436b5c773731d633 Mon Sep 17 00:00:00 2001
From: Haavard Kvaalen <havardk@kvaalen.no>
Date: Thu, 4 Feb 2021 16:21:28 +0100
Subject: [PATCH 0650/1238] Remove superfluous semicolon

---
 src/Core/MySQL/MySQLReplication.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/MySQL/MySQLReplication.h b/src/Core/MySQL/MySQLReplication.h
index ae8dc80f673..d415bdda70d 100644
--- a/src/Core/MySQL/MySQLReplication.h
+++ b/src/Core/MySQL/MySQLReplication.h
@@ -442,7 +442,7 @@ namespace MySQLReplication
         UInt64 table_id;
         UInt16 flags;
 
-        RowsEventHeader(EventType type_) : type(type_), table_id(0), flags(0) {};
+        RowsEventHeader(EventType type_) : type(type_), table_id(0), flags(0) {}
         void parse(ReadBuffer & payload);
     };
 

From c2312bd72e617b54251b7100a35e9b189fa98509 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Thu, 4 Feb 2021 18:31:00 +0300
Subject: [PATCH 0651/1238] updated description and added translation

---
 .../functions/ip-address-functions.md         | 80 +++++++++++++++---
 .../functions/ip-address-functions.md         | 82 +++++++++++++++++++
 2 files changed, 149 insertions(+), 13 deletions(-)

diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md
index 1361eb65a56..b7a47c09d8f 100644
--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@@ -265,32 +265,86 @@ SELECT toIPv6('127.0.0.1')
 └─────────────────────┘
 ```
 
-## isIPv4String
+## isIPv4String {#isIPv4String}
 
-Determines if the input string is an IPv4 address or not. Returns `1` if true `0` otherwise.
+Determines whether the input string is an IPv4 address or not.
 
-``` sql
-SELECT isIPv4String('127.0.0.1')
+**Syntax**
+
+```sql
+isIPv4String(string)
 ```
 
+**Parameters**
+
+-   `string` — String. [String](../../sql-reference/data-types/string.md).
+
+**Returned value**
+
+-   `1` if `string` is IPv4 address, `0` if not.
+
+Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Examples**
+
+Query:
+
+```sql
+SELECT isIPv4String('0.0.0.0');
+
+SELECT isIPv4String('Hello');
+```
+
+Result:
+
 ``` text
-┌─isIPv4String('127.0.0.1')─┐
-│                         1 │
-└───────────────────────────┘
+┌─isIPv4String('0.0.0.0')─┐
+│                       1 │
+└─────────────────────────┘
+┌─isIPv4String('Hello')─┐
+│                     0 │
+└───────────────────────┘
 ```
 
-## isIPv6String
+## isIPv6String {#isIPv4String}
 
-Determines if the input string is an IPv6 address or not. Returns `1` if true `0` otherwise.
+Determines whether the input string is an IPv6 address or not. 
+
+**Syntax**
+
+```sql
+isIPv6String(string)
+```
+
+**Parameters**
+
+-   `string` — String. [String](../../sql-reference/data-types/string.md).
+
+**Returned value**
+
+-   `1` if `string` is IPv6 address, `0` if not.
+
+Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Examples**
+
+Query:
 
 ``` sql
-SELECT isIPv6String('2001:438:ffff::407d:1bc1')
+SELECT isIPv6String('::ffff:127.0.0.1');
+
+SELECT isIPv6String('Hello');
 ```
 
+Result:
+
 ``` text
-┌─isIPv6String('2001:438:ffff::407d:1bc1')─┐
-│                                        1 │
-└──────────────────────────────────────────┘
+┌─isIPv6String('::ffff:127.0.0.1')─┐
+│                                1 │
+└──────────────────────────────────┘
+┌─isIPv6String('Hello')─┐
+│                     0 │
+└───────────────────────┘
 ```
 
 [Original article](https://clickhouse.tech/docs/en/query_language/functions/ip_address_functions/) <!--hide-->
diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md
index 724fb97c0d5..640d6d0e4fd 100644
--- a/docs/ru/sql-reference/functions/ip-address-functions.md
+++ b/docs/ru/sql-reference/functions/ip-address-functions.md
@@ -243,4 +243,86 @@ SELECT
 └───────────────────────────────────┴──────────────────────────────────┘
 ```
 
+## isIPv4String {#isIPv4String}
+
+Определяет, является ли строка адресом IPv4 или нет.
+
+**Синтаксис**
+
+```sql
+isIPv4String(string)
+```
+
+**Параметры**
+
+-   `string` — строка. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-   `1` если `string` является адресом IPv4 , `0` если нет.
+
+Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Примеры**
+
+Запрос:
+
+```sql
+SELECT isIPv4String('0.0.0.0');
+
+SELECT isIPv4String('Hello');
+```
+
+Результат:
+
+``` text
+┌─isIPv4String('0.0.0.0')─┐
+│                       1 │
+└─────────────────────────┘
+┌─isIPv4String('Hello')─┐
+│                     0 │
+└───────────────────────┘
+```
+
+## isIPv6String {#isIPv4String}
+
+Определяет, является ли строка адресом IPv6 или нет.
+
+**Синтаксис**
+
+```sql
+isIPv6String(string)
+```
+
+**Параметры**
+
+-   `string` — строка. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-   `1` если `string` является адресом IPv6 , `0` если нет.
+
+Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT isIPv6String('::ffff:127.0.0.1');
+
+SELECT isIPv6String('Hello');
+```
+
+Результат:
+
+``` text
+┌─isIPv6String('::ffff:127.0.0.1')─┐
+│                                1 │
+└──────────────────────────────────┘
+┌─isIPv6String('Hello')─┐
+│                     0 │
+└───────────────────────┘
+```
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/ip_address_functions/) <!--hide-->

From 237ee39228607a39b13e1b2836ec8b6ca6647bc8 Mon Sep 17 00:00:00 2001
From: "Anthony N. Simon" <anthonynsimon@users.noreply.github.com>
Date: Thu, 4 Feb 2021 16:34:32 +0100
Subject: [PATCH 0652/1238] Add Panelbear to adopters

---
 docs/en/introduction/adopters.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md
index ca16119f460..c7230f2f080 100644
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@@ -74,6 +74,7 @@ toc_title: Adopters
 | <a href="https://getnoc.com/" class="favicon">NOC Project</a> | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) |
 | <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a> | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) |
 | <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
+| <a href="https://panelbear.com/" class="favicon">Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) |
 | <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
 | <a href="https://www.percona.com/" class="favicon">Percona</a> | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) |
 | <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |

From ac4c3a6b27156f0528a1a0d2179b5c7e8d0a66fc Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Thu, 4 Feb 2021 18:59:54 +0300
Subject: [PATCH 0653/1238] Don't pass empty vector to ConcatReadBuffer

---
 src/Server/HTTPHandler.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index e161b5752ae..5e0d1f0ac66 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -219,8 +219,11 @@ void HTTPHandler::pushDelayedResults(Output & used_output)
         }
     }
 
-    ConcatReadBuffer concat_read_buffer(read_buffers_raw_ptr);
-    copyData(concat_read_buffer, *used_output.out_maybe_compressed);
+    if (!read_buffers_raw_ptr.empty())
+    {
+        ConcatReadBuffer concat_read_buffer(read_buffers_raw_ptr);
+        copyData(concat_read_buffer, *used_output.out_maybe_compressed);
+    }
 }
 
 
From 85b5805c3cc0c20286030cd505356c9b28c25047 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 4 Feb 2021 19:36:31 +0300
Subject: [PATCH 0654/1238] Fix tests

---
 src/DataTypes/NumberTraits.h                | 2 +-
 src/Functions/tests/gtest_number_traits.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/DataTypes/NumberTraits.h b/src/DataTypes/NumberTraits.h
index 479fc37c795..c3b0d956ef5 100644
--- a/src/DataTypes/NumberTraits.h
+++ b/src/DataTypes/NumberTraits.h
@@ -110,7 +110,7 @@ template <typename A, typename B> struct ResultOfIntegerDivision
 template <typename A, typename B> struct ResultOfModulo
 {
     static constexpr bool result_is_signed = is_signed_v<A>;
-    /// If modulo of division can yield negative number, we need larger type to accomodate it.
+    /// If modulo of division can yield negative number, we need larger type to accommodate it.
     /// Example: toInt32(-199) % toUInt8(200) will return -199 that does not fit in Int8, only in Int16.
     static constexpr size_t size_of_result = result_is_signed ? nextSize(sizeof(B)) : sizeof(B);
     using Type0 = typename Construct<result_is_signed, false, size_of_result>::Type;
diff --git a/src/Functions/tests/gtest_number_traits.cpp b/src/Functions/tests/gtest_number_traits.cpp
index 7664b4fcbdc..7f25c6cbeb7 100644
--- a/src/Functions/tests/gtest_number_traits.cpp
+++ b/src/Functions/tests/gtest_number_traits.cpp
@@ -258,7 +258,7 @@ TEST(NumberTraits, Others)
     ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfFloatingPointDivision<DB::UInt16, DB::Int16>::Type()), "Float64");
     ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfFloatingPointDivision<DB::UInt32, DB::Int16>::Type()), "Float64");
     ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfIntegerDivision<DB::UInt8, DB::Int16>::Type()), "Int8");
-    ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfModulo<DB::UInt32, DB::Int8>::Type()), "Int8");
+    ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfModulo<DB::UInt32, DB::Int8>::Type()), "UInt8");
 }
 
 
From 2bfeef23cf90df057b1e188c11a62b524634cfd4 Mon Sep 17 00:00:00 2001
From: Russ Frank <rf@oden.io>
Date: Thu, 4 Feb 2021 11:55:19 -0500
Subject: [PATCH 0655/1238] correct merge logic

---
 .../AggregateFunctionDeltaSum.h               | 23 ++++++++++++-------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
index 4227a0dd6c3..5fb6063d7e8 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
@@ -20,9 +20,9 @@ struct AggregationFunctionDeltaSumData
 {
     T sum = 0;
     bool seen_last = false;
-    T last;
+    T last = 0;
     bool seen_first = false;
-    T first;
+    T first = 0;
 };
 
 template <typename T>
@@ -42,9 +42,9 @@ public:
 
     void ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
-        const T & value = (*columns[0])[row_num].get<T>();
+        auto value = static_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
 
-        if (this->data(place).last < value && this->data(place).seen_last)
+        if ((this->data(place).last < value) && this->data(place).seen_last)
         {
             this->data(place).sum += (value - this->data(place).last);
         }
@@ -52,7 +52,7 @@ public:
         this->data(place).last = value;
         this->data(place).seen_last = true;
 
-        if (this->data(place).seen_first == false)
+        if (!this->data(place).seen_first)
         {
             this->data(place).first = value;
             this->data(place).seen_first = true;
@@ -64,14 +64,21 @@ public:
         if ((this->data(place).last < this->data(rhs).first) && this->data(place).seen_last && this->data(rhs).seen_first)
         {
             this->data(place).sum += this->data(rhs).sum + (this->data(rhs).first - this->data(place).last);
+            this->data(place).last = this->data(rhs).last;
+        }
+        else if ((this->data(rhs).last < this->data(place).first && this->data(rhs).seen_last && this->data(place).seen_first))
+        {
+            this->data(place).sum += this->data(rhs).sum + (this->data(place).first - this->data(rhs).last);
+            this->data(place).first = this->data(rhs).first;
         }
         else
         {
             this->data(place).sum += this->data(rhs).sum;
+            this->data(place).first = this->data(rhs).first;
+            this->data(place).seen_first = this->data(rhs).seen_first;
+            this->data(place).last = this->data(rhs).last;
+            this->data(place).seen_last = this->data(rhs).seen_last;
         }
-
-        this->data(place).last = this->data(rhs).last;
-        this->data(place).first = this->data(rhs).first;
     }
 
     void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override

From 43f9bfd1feb4b94db8884a3d919ff51fd00bae88 Mon Sep 17 00:00:00 2001
From: Russ Frank <rf@oden.io>
Date: Thu, 4 Feb 2021 11:55:37 -0500
Subject: [PATCH 0656/1238] merge tests

---
 tests/queries/0_stateless/01700_deltasum.reference | 2 ++
 tests/queries/0_stateless/01700_deltasum.sql       | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/01700_deltasum.reference b/tests/queries/0_stateless/01700_deltasum.reference
index 27d687d57d0..d442bc1de2e 100644
--- a/tests/queries/0_stateless/01700_deltasum.reference
+++ b/tests/queries/0_stateless/01700_deltasum.reference
@@ -3,3 +3,5 @@
 7
 7
 7
+5
+5
diff --git a/tests/queries/0_stateless/01700_deltasum.sql b/tests/queries/0_stateless/01700_deltasum.sql
index 6a818ce02d8..a1447cd3c7c 100644
--- a/tests/queries/0_stateless/01700_deltasum.sql
+++ b/tests/queries/0_stateless/01700_deltasum.sql
@@ -3,3 +3,5 @@ select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4]));
 select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));
 select deltaSum(arrayJoin([1, 2, 3, 0, 3, 3, 3, 3, 3, 4, 2, 3]));
 select deltaSum(arrayJoin([1, 2, 3, 0, 0, 0, 0, 3, 3, 3, 3, 3, 4, 2, 3]));
+select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([0, 1])) as rows union all select deltaSumState(arrayJoin([4, 5])) as rows);
+select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([4, 5])) as rows union all select deltaSumState(arrayJoin([0, 1])) as rows);

From b0fba3decef216e32c8813c0fe321210915159c6 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 4 Feb 2021 20:46:15 +0300
Subject: [PATCH 0657/1238] mark as long

---
 ...ference => 01232_preparing_sets_race_condition_long.reference} | 0
 ...e_condition.sh => 01232_preparing_sets_race_condition_long.sh} | 0
 ...rge_truncate.reference => 01443_merge_truncate_long.reference} | 0
 .../{01443_merge_truncate.sh => 01443_merge_truncate_long.sh}     | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/queries/0_stateless/{01232_preparing_sets_race_condition.reference => 01232_preparing_sets_race_condition_long.reference} (100%)
 rename tests/queries/0_stateless/{01232_preparing_sets_race_condition.sh => 01232_preparing_sets_race_condition_long.sh} (100%)
 rename tests/queries/0_stateless/{01443_merge_truncate.reference => 01443_merge_truncate_long.reference} (100%)
 rename tests/queries/0_stateless/{01443_merge_truncate.sh => 01443_merge_truncate_long.sh} (100%)

diff --git a/tests/queries/0_stateless/01232_preparing_sets_race_condition.reference b/tests/queries/0_stateless/01232_preparing_sets_race_condition_long.reference
similarity index 100%
rename from tests/queries/0_stateless/01232_preparing_sets_race_condition.reference
rename to tests/queries/0_stateless/01232_preparing_sets_race_condition_long.reference
diff --git a/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh b/tests/queries/0_stateless/01232_preparing_sets_race_condition_long.sh
similarity index 100%
rename from tests/queries/0_stateless/01232_preparing_sets_race_condition.sh
rename to tests/queries/0_stateless/01232_preparing_sets_race_condition_long.sh
diff --git a/tests/queries/0_stateless/01443_merge_truncate.reference b/tests/queries/0_stateless/01443_merge_truncate_long.reference
similarity index 100%
rename from tests/queries/0_stateless/01443_merge_truncate.reference
rename to tests/queries/0_stateless/01443_merge_truncate_long.reference
diff --git a/tests/queries/0_stateless/01443_merge_truncate.sh b/tests/queries/0_stateless/01443_merge_truncate_long.sh
similarity index 100%
rename from tests/queries/0_stateless/01443_merge_truncate.sh
rename to tests/queries/0_stateless/01443_merge_truncate_long.sh

From a500bd70a489fbce3a4968fb4ec3b31db5e5cab0 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Thu, 4 Feb 2021 21:14:02 +0300
Subject: [PATCH 0658/1238] Update index.md

---
 docs/en/sql-reference/window-functions/index.md | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
index a79328ade32..22b40585452 100644
--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -1,4 +1,9 @@
-# [development] Window Functions
+---
+toc_priority: 62
+toc_title: Window Functions
+---
+
+# [experimental] Window Functions
 
 !!! warning "Warning"
 This is an experimental feature that is currently in development and is not ready
@@ -11,9 +16,7 @@ Pure window functions such as `rank`, `lag`, `lead` and so on are not yet suppor
 The window can be specified either with an `OVER` clause or with a separate
 `WINDOW` clause.
 
-Only two variants of frame are supported, `ROWS` and `RANGE`. The only supported
-frame boundaries are `ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW`.
-
+Only two variants of frame are supported, `ROWS` and `RANGE`. Offsets for the `RANGE` frame are not yet supported.
 
 ## References
 

From 61605026658e24c218c8bb3b9a531883a7d27bda Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Thu, 4 Feb 2021 21:16:26 +0300
Subject: [PATCH 0659/1238] Update index.md

---
 docs/en/sql-reference/window-functions/index.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
index 22b40585452..211656ed07f 100644
--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -31,6 +31,7 @@ https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_fu
 https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql
 
 ### Postgres Docs
+https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW
 https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS
 https://www.postgresql.org/docs/devel/functions-window.html
 https://www.postgresql.org/docs/devel/tutorial-window.html

From cea2fcb18ca578b4ac1ac8bd8c57633da92ee501 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Thu, 4 Feb 2021 21:17:25 +0300
Subject: [PATCH 0660/1238] Update index.md

---
 docs/en/sql-reference/window-functions/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
index 211656ed07f..5a6f13226a5 100644
--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -8,7 +8,7 @@ toc_title: Window Functions
 !!! warning "Warning"
 This is an experimental feature that is currently in development and is not ready
 for general use. It will change in unpredictable backwards-incompatible ways in
-the future releases.
+the future releases. Set `allow_experimental_window_functions = 1` to enable it.
 
 ClickHouse currently supports calculation of aggregate functions over a window.
 Pure window functions such as `rank`, `lag`, `lead` and so on are not yet supported.

From 610d7b755b9eb80abcd2696dd130ec57f14d34e5 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Thu, 4 Feb 2021 21:50:15 +0300
Subject: [PATCH 0661/1238] Don't forget to update position of inner buffer

---
 src/IO/ReadWriteBufferFromHTTP.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h
index de10f268dc3..9c77fc3a517 100644
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@@ -205,6 +205,8 @@ namespace detail
         {
             if (next_callback)
                 next_callback(count());
+            if (working_buffer.size())
+                impl->position() = position();
             if (!impl->next())
                 return false;
             internal_buffer = impl->buffer();

From 19c38c61e6baad8d81ec52837d8ff18f5cdfc8b5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 4 Feb 2021 22:03:56 +0300
Subject: [PATCH 0662/1238] Add fuzzer for ColumnsDescription

---
 src/Storages/tests/CMakeLists.txt                 |  3 +++
 src/Storages/tests/columns_description_fuzzer.cpp | 14 ++++++++++++++
 2 files changed, 17 insertions(+)
 create mode 100644 src/Storages/tests/columns_description_fuzzer.cpp

diff --git a/src/Storages/tests/CMakeLists.txt b/src/Storages/tests/CMakeLists.txt
index 292f7603838..b58fed9edf5 100644
--- a/src/Storages/tests/CMakeLists.txt
+++ b/src/Storages/tests/CMakeLists.txt
@@ -29,4 +29,7 @@ target_link_libraries (transform_part_zk_nodes
 if (ENABLE_FUZZING)
     add_executable (mergetree_checksum_fuzzer mergetree_checksum_fuzzer.cpp)
     target_link_libraries (mergetree_checksum_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
+
+    add_executable (columns_description_fuzzer columns_description_fuzzer.cpp)
+    target_link_libraries (columns_description_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
 endif ()
diff --git a/src/Storages/tests/columns_description_fuzzer.cpp b/src/Storages/tests/columns_description_fuzzer.cpp
new file mode 100644
index 00000000000..b0f1c0a14f9
--- /dev/null
+++ b/src/Storages/tests/columns_description_fuzzer.cpp
@@ -0,0 +1,14 @@
+#include <Storages/ColumnsDescription.h>
+
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
+try
+{
+    using namespace DB;
+    ColumnsDescription columns = ColumnsDescription::parse(std::string(reinterpret_cast<const char *>(data), size));
+    return 0;
+}
+catch (...)
+{
+    return 1;
+}

From 84b88c68025e1d3237b1b8f850255a6023487505 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Thu, 4 Feb 2021 22:06:43 +0300
Subject: [PATCH 0663/1238] CC

---
 src/Disks/S3/DiskS3.cpp          | 4 ++--
 src/IO/ReadWriteBufferFromHTTP.h | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index 238db98c9cc..89413660e35 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -244,7 +244,7 @@ public:
         if (whence == SEEK_CUR)
         {
             /// If position within current working buffer - shift pos.
-            if (working_buffer.size() && size_t(getPosition() + offset_) < absolute_position)
+            if (!working_buffer.empty() && size_t(getPosition() + offset_) < absolute_position)
             {
                 pos += offset_;
                 return getPosition();
@@ -257,7 +257,7 @@ public:
         else if (whence == SEEK_SET)
         {
             /// If position within current working buffer - shift pos.
-            if (working_buffer.size() && size_t(offset_) >= absolute_position - working_buffer.size()
+            if (!working_buffer.empty() && size_t(offset_) >= absolute_position - working_buffer.size()
                 && size_t(offset_) < absolute_position)
             {
                 pos = working_buffer.end() - (absolute_position - offset_);
diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h
index 9c77fc3a517..9cd37bd00f8 100644
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@@ -76,9 +76,7 @@ public:
         }
     }
 
-    virtual ~UpdatableSessionBase()
-    {
-    }
+    virtual ~UpdatableSessionBase() = default;
 };
 
 
@@ -205,7 +203,7 @@ namespace detail
         {
             if (next_callback)
                 next_callback(count());
-            if (working_buffer.size())
+            if (!working_buffer.empty())
                 impl->position() = position();
             if (!impl->next())
                 return false;

From 409ff2f6b3f7b16cd9c15cca48b3332574bd8cd5 Mon Sep 17 00:00:00 2001
From: Dmitriy <sevirov@yandex-team.ru>
Date: Thu, 4 Feb 2021 22:13:55 +0300
Subject: [PATCH 0664/1238] Document system.opentelemetry_span_log system table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Задокументировал системную таблицу system.opentelemetry_span_log.
---
 .../system-tables/opentelemetry_span_log.md   | 49 +++++++++++++++++++
 .../system-tables/opentelemetry_span_log.md   | 45 +++++++++++++++++
 2 files changed, 94 insertions(+)
 create mode 100644 docs/en/operations/system-tables/opentelemetry_span_log.md
 create mode 100644 docs/ru/operations/system-tables/opentelemetry_span_log.md

diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md
new file mode 100644
index 00000000000..64fd549458a
--- /dev/null
+++ b/docs/en/operations/system-tables/opentelemetry_span_log.md
@@ -0,0 +1,49 @@
+# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log}
+
+Contains information about [trace spans](https://opentracing.io/docs/overview/spans/) for executed queries.
+
+Columns:
+
+-   `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — ID of the trace for executed query.
+
+-   `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`.
+
+-   `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`.
+
+-   `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation.
+
+-   `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds).
+
+-   `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds).
+
+-   `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`.
+
+-   `attribute.names` ([Array(String)](../../sql-reference/data-types/array.md)) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard.
+
+-   `attribute.values` ([Array(String)](../../sql-reference/data-types/array.md)) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard.
+
+**Example**
+
+``` sql
+SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical;
+```
+
+``` text
+Row 1:
+──────
+trace_id:         cdab0847-0d62-61d5-4d38-dd65b19a1914
+span_id:          701487461015578150
+parent_span_id:   2991972114672045096
+operation_name:   DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl()
+start_time_us:    1612374594529090
+finish_time_us:   1612374594529108
+finish_date:      2021-02-03
+attribute.names:  []
+attribute.values: []
+```
+
+**See Also**
+
+-   [OpenTelemetry](../../operations/opentelemetry.md)
+
+[Original article](https://clickhouse.tech/docs/en/operations/system_tables/opentelemetry_span_log) <!--hide-->
diff --git a/docs/ru/operations/system-tables/opentelemetry_span_log.md b/docs/ru/operations/system-tables/opentelemetry_span_log.md
new file mode 100644
index 00000000000..5c577eb691d
--- /dev/null
+++ b/docs/ru/operations/system-tables/opentelemetry_span_log.md
@@ -0,0 +1,45 @@
+# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log}
+
+Содержит информацию о [trace spans](https://opentracing.io/docs/overview/spans/) для выполненных запросов.
+
+Столбцы:
+
+-   `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — идентификатор трассировки для выполненного запроса.
+
+-   `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор `trace span`.
+
+-   `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор родительского `trace span`.
+
+-   `operation_name` ([String](../../sql-reference/data-types/string.md)) — имя операции.
+
+-   `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — время начала `trace span` (в микросекундах).
+
+-   `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — время окончания `trace span` (в микросекундах).
+
+-   `finish_date` ([Date](../../sql-reference/data-types/date.md)) — дата окончания `trace span`.
+
+-   `attribute.names` ([Array(String)](../../sql-reference/data-types/array.md)) — имена [атрибутов](https://opentelemetry.io/docs/go/instrumentation/#attributes) в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте [OpenTelemetry](https://opentelemetry.io/).
+
+-   `attribute.values` ([Array(String)](../../sql-reference/data-types/array.md)) — значения атрибутов в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте `OpenTelemetry`.
+
+**Пример**
+
+``` sql
+SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical;
+```
+
+``` text
+Row 1:
+──────
+trace_id:         cdab0847-0d62-61d5-4d38-dd65b19a1914
+span_id:          701487461015578150
+parent_span_id:   2991972114672045096
+operation_name:   DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl()
+start_time_us:    1612374594529090
+finish_time_us:   1612374594529108
+finish_date:      2021-02-03
+attribute.names:  []
+attribute.values: []
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/opentelemetry_span_log) <!--hide-->

From bcf5a70e4f1f210d551cc187a7b345e33ca3a9bb Mon Sep 17 00:00:00 2001
From: Russ Frank <rf@oden.io>
Date: Thu, 4 Feb 2021 14:28:45 -0500
Subject: [PATCH 0665/1238] check arg type

---
 src/AggregateFunctions/AggregateFunctionDeltaSum.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
index 239c5bf6535..aeb2549e826 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
@@ -11,6 +11,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
 namespace
@@ -27,6 +28,12 @@ AggregateFunctionPtr createAggregateFunctionDeltaSum(
         throw Exception("Incorrect number of arguments for aggregate function " + name,
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
+    DataTypePtr data_type = arguments[0];
+
+    if (!isNumber(data_type))
+        throw Exception("Illegal type " + arguments[0]->getName() + " of argument for aggregate function " + name,
+                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
     return AggregateFunctionPtr(createWithNumericType<AggregationFunctionDeltaSum>(*arguments[0], arguments, params));
 }
 

From c5312bf362929d95b2269c9c7c707adda20a5f84 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 4 Feb 2021 22:29:46 +0300
Subject: [PATCH 0666/1238] Trying to disable suspicious parameter

---
 src/Coordination/NuKeeperServer.cpp            | 7 +++++--
 src/Coordination/NuKeeperServer.h              | 2 +-
 src/Coordination/NuKeeperStorageDispatcher.cpp | 3 ++-
 src/Server/NuKeeperTCPHandler.cpp              | 2 ++
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 3910376ebda..aa1747ca3e6 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -43,7 +43,7 @@ void NuKeeperServer::addServer(int server_id_, const std::string & server_uri_,
 }
 
 
-void NuKeeperServer::startup()
+void NuKeeperServer::startup(int64_t operation_timeout_ms)
 {
     nuraft::raft_params params;
     params.heart_beat_interval_ = 500;
@@ -51,8 +51,10 @@ void NuKeeperServer::startup()
     params.election_timeout_upper_bound_ = 2000;
     params.reserved_log_items_ = 5000;
     params.snapshot_distance_ = 5000;
-    params.client_req_timeout_ = 10000;
+    params.client_req_timeout_ = operation_timeout_ms;
     params.auto_forwarding_ = true;
+    /// For some reason may lead to a very long timeouts
+    params.use_bg_thread_for_urgent_commit_ = false;
     params.return_method_ = nuraft::raft_params::blocking;
 
     nuraft::asio_service::options asio_opts{};
@@ -197,6 +199,7 @@ int64_t NuKeeperServer::getSessionID(int64_t session_timeout_ms)
     std::lock_guard lock(append_entries_mutex);
 
     auto result = raft_instance->append_entries({entry});
+
     if (!result->get_accepted())
         throw Exception(ErrorCodes::RAFT_ERROR, "Cannot send session_id request to RAFT");
 
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index 358a4212967..6151cd095e0 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -34,7 +34,7 @@ private:
 public:
     NuKeeperServer(int server_id_, const std::string & hostname_, int port_);
 
-    void startup();
+    void startup(int64_t operation_timeout_ms);
 
     NuKeeperStorage::ResponsesForSessions putRequest(const NuKeeperStorage::RequestForSession & request);
 
diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
index fbf54106316..e327272cab1 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -111,6 +111,7 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
     int myport;
     int32_t my_priority = 1;
 
+    operation_timeout = Poco::Timespan(0, config.getUInt("test_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000);
     Poco::Util::AbstractConfiguration::Keys keys;
     config.keys("test_keeper_server.raft_configuration", keys);
     bool my_can_become_leader = true;
@@ -141,7 +142,7 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
     server = std::make_unique<NuKeeperServer>(myid, myhostname, myport);
     try
     {
-        server->startup();
+        server->startup(operation_timeout.totalMilliseconds());
         if (shouldBuildQuorum(myid, my_priority, my_can_become_leader, server_configs))
         {
             for (const auto & [id, hostname, port, can_become_leader, priority] : server_configs)
diff --git a/src/Server/NuKeeperTCPHandler.cpp b/src/Server/NuKeeperTCPHandler.cpp
index 706b57ee71d..31ffc744aaa 100644
--- a/src/Server/NuKeeperTCPHandler.cpp
+++ b/src/Server/NuKeeperTCPHandler.cpp
@@ -331,7 +331,9 @@ void NuKeeperTCPHandler::runImpl()
     {
         try
         {
+            LOG_INFO(log, "Requesting session ID for the new client");
             session_id = nu_keeper_storage_dispatcher->getSessionID(session_timeout.totalMilliseconds());
+            LOG_INFO(log, "Received session ID {}", session_id);
         }
         catch (const Exception & e)
         {

From d2c1a97d8618bb94b28cabb7abaee0de187e2b4f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 4 Feb 2021 22:41:12 +0300
Subject: [PATCH 0667/1238] Minor modification

---
 src/Storages/tests/columns_description_fuzzer.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/tests/columns_description_fuzzer.cpp b/src/Storages/tests/columns_description_fuzzer.cpp
index b0f1c0a14f9..44fd667ff1c 100644
--- a/src/Storages/tests/columns_description_fuzzer.cpp
+++ b/src/Storages/tests/columns_description_fuzzer.cpp
@@ -6,6 +6,7 @@ try
 {
     using namespace DB;
     ColumnsDescription columns = ColumnsDescription::parse(std::string(reinterpret_cast<const char *>(data), size));
+    std::cerr << columns.toString() << "\n";
     return 0;
 }
 catch (...)

From 18f6b5bbad353431e5f7494103756264b0f2ca79 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Thu, 4 Feb 2021 22:41:44 +0300
Subject: [PATCH 0668/1238] add timeouts

---
 src/Databases/DatabaseReplicated.cpp        | 40 +++++----
 src/Databases/DatabaseReplicated.h          |  3 +-
 src/Databases/DatabaseReplicatedWorker.cpp  | 90 ++++++++++++++++-----
 src/Databases/DatabaseReplicatedWorker.h    |  2 +-
 src/Interpreters/DDLTask.cpp                |  4 -
 src/Interpreters/DDLTask.h                  |  2 +-
 src/Interpreters/DDLWorker.cpp              | 18 ++---
 src/Interpreters/DDLWorker.h                |  2 +-
 src/Interpreters/DatabaseCatalog.cpp        |  2 +-
 src/Interpreters/DatabaseCatalog.h          |  4 +-
 src/Interpreters/InterpreterAlterQuery.cpp  |  2 +-
 src/Interpreters/InterpreterCreateQuery.cpp |  2 +-
 src/Interpreters/InterpreterDropQuery.cpp   |  4 +-
 src/Interpreters/InterpreterRenameQuery.cpp |  2 +-
 tests/queries/skip_list.json                | 21 +++++
 15 files changed, 139 insertions(+), 59 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 44746cd5716..5a11787331c 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -35,6 +35,7 @@ namespace ErrorCodes
     extern const int DATABASE_REPLICATION_FAILED;
     extern const int UNKNOWN_DATABASE;
     extern const int NOT_IMPLEMENTED;
+    extern const int INCORRECT_QUERY;
 }
 
 zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
@@ -121,8 +122,8 @@ bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperP
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/counter/cnt-", "", zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/counter/cnt-", -1));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", "", zkutil::CreateMode::Persistent));
-    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/min_log_ptr", "1", zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/max_log_ptr", "1", zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/logs_to_keep", "1000", zkutil::CreateMode::Persistent));
 
     Coordination::Responses responses;
     auto res = current_zookeeper->tryMulti(ops, responses);
@@ -194,7 +195,7 @@ void DatabaseReplicated::onUnexpectedLogEntry(const String & entry_name, const Z
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} already executed, current pointer is {}", entry_number, log_entry_to_execute);
 
     /// Entry name is valid. Let's get min log pointer to check if replica is staled.
-    UInt32 min_snapshot = parse<UInt32>(zookeeper->get(zookeeper_path + "/min_log_ptr"));
+    UInt32 min_snapshot = parse<UInt32>(zookeeper->get(zookeeper_path + "/min_log_ptr"));   // FIXME
 
     if (log_entry_to_execute < min_snapshot)
     {
@@ -207,13 +208,15 @@ void DatabaseReplicated::onUnexpectedLogEntry(const String & entry_name, const Z
 }
 
 
-BlockIO DatabaseReplicated::propose(const ASTPtr & query)
+BlockIO DatabaseReplicated::propose(const ASTPtr & query, const Context & query_context)
 {
+    if (query_context.getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY)
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "It's not initial query. ON CLUSTER is not allowed for Replicated database.");
+
     if (const auto * query_alter = query->as<ASTAlterQuery>())
     {
         for (const auto & command : query_alter->command_list->children)
         {
-            //FIXME allow all types of queries (maybe we should execute ATTACH an similar queries on leader)
             if (!isSupportedAlterType(command->as<ASTAlterCommand&>().type))
                 throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED);
         }
@@ -225,17 +228,16 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query)
     DDLLogEntry entry;
     entry.query = queryToString(query);
     entry.initiator = ddl_worker->getCommonHostID();
-    String node_path = ddl_worker->tryEnqueueAndExecuteEntry(entry);
+    String node_path = ddl_worker->tryEnqueueAndExecuteEntry(entry, query_context);
 
     BlockIO io;
-    //FIXME use query context
-    if (global_context.getSettingsRef().distributed_ddl_task_timeout == 0)
+    if (query_context.getSettingsRef().distributed_ddl_task_timeout == 0)
         return io;
 
     //FIXME need list of all replicas, we can obtain it from zk
     Strings hosts_to_wait;
     hosts_to_wait.emplace_back(getFullReplicaName());
-    auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, global_context, hosts_to_wait);
+    auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, query_context, hosts_to_wait);
     io.in = std::move(stream);
     return io;
 }
@@ -295,17 +297,20 @@ void DatabaseReplicated::drop(const Context & context_)
 {
     auto current_zookeeper = getZooKeeper();
     current_zookeeper->set(replica_path, "DROPPED");
-    current_zookeeper->tryRemoveRecursive(replica_path);
     DatabaseAtomic::drop(context_);
+    current_zookeeper->tryRemoveRecursive(replica_path);
+}
+
+void DatabaseReplicated::stopReplication()
+{
+    if (ddl_worker)
+        ddl_worker->shutdown();
 }
 
 void DatabaseReplicated::shutdown()
 {
-    if (ddl_worker)
-    {
-        ddl_worker->shutdown();
-        ddl_worker = nullptr;
-    }
+    stopReplication();
+    ddl_worker = nullptr;
     DatabaseAtomic::shutdown();
 }
 
@@ -330,10 +335,15 @@ void DatabaseReplicated::renameTable(const Context & context, const String & tab
 
     if (txn->is_initial_query)
     {
+        if (!isTableExist(table_name, context))
+            throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", table_name);
+        if (exchange && !to_database.isTableExist(to_table_name, context))
+            throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", to_table_name);
+
         String statement;
         String statement_to;
         {
-            //FIXME It's not atomic (however we have only one thread)
+            /// NOTE It's not atomic (however, we have only one thread)
             ReadBufferFromFile in(getObjectMetadataPath(table_name), 4096);
             readStringUntilEOF(statement, in);
             if (exchange)
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 586f381c962..a866a61558c 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -60,8 +60,9 @@ public:
 
     String getEngineName() const override { return "Replicated"; }
 
-    BlockIO propose(const ASTPtr & query);
+    BlockIO propose(const ASTPtr & query, const Context & query_context);
 
+    void stopReplication();
     void shutdown() override;
 
     void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach) override;
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index 5af216c3d0d..1c000a8f0a7 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -9,6 +9,8 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int DATABASE_REPLICATION_FAILED;
+    extern const int NOT_A_LEADER;
+    extern const int UNFINISHED;
 }
 
 DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db, const Context & context_)
@@ -22,7 +24,7 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db
 
 void DatabaseReplicatedDDLWorker::initializeMainThread()
 {
-    do
+    while (!initialized && !stop_flag)
     {
         try
         {
@@ -36,17 +38,17 @@ void DatabaseReplicatedDDLWorker::initializeMainThread()
             sleepForSeconds(5);
         }
     }
-    while (!initialized && !stop_flag);
 }
 
 void DatabaseReplicatedDDLWorker::initializeReplication()
 {
     /// Check if we need to recover replica.
-    /// Invariant: replica is lost if it's log_ptr value is less then min_log_ptr value.
+    /// Invariant: replica is lost if it's log_ptr value is less then max_log_ptr - logs_to_keep.
 
     UInt32 our_log_ptr = parse<UInt32>(current_zookeeper->get(database->replica_path + "/log_ptr"));
-    UInt32 min_log_ptr = parse<UInt32>(current_zookeeper->get(database->zookeeper_path + "/min_log_ptr"));
-    if (our_log_ptr < min_log_ptr)
+    UInt32 max_log_ptr = parse<UInt32>(current_zookeeper->get(database->zookeeper_path + "/max_log_ptr"));
+    UInt32 logs_to_keep = parse<UInt32>(current_zookeeper->get(database->zookeeper_path + "/logs_to_keep"));
+    if (our_log_ptr + logs_to_keep < max_log_ptr)
         database->recoverLostReplica(current_zookeeper, 0);
 }
 
@@ -75,10 +77,19 @@ String DatabaseReplicatedDDLWorker::enqueueQuery(DDLLogEntry & entry)
     return node_path;
 }
 
-String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entry)
+String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entry, const Context & query_context)
 {
+    /// NOTE Possibly it would be better to execute initial query on the most up-to-date node,
+    /// but it requires more complex logic around /try node.
+
     auto zookeeper = getAndSetZooKeeper();
-    // TODO do not enqueue query if we have big replication lag
+    UInt32 our_log_ptr = parse<UInt32>(zookeeper->get(database->replica_path + "/log_ptr"));
+    UInt32 max_log_ptr = parse<UInt32>(zookeeper->get(database->zookeeper_path + "/max_log_ptr"));
+    assert(our_log_ptr <= max_log_ptr);
+    constexpr UInt32 max_replication_lag = 16;
+    if (max_replication_lag < max_log_ptr - our_log_ptr)
+        throw Exception(ErrorCodes::NOT_A_LEADER, "Cannot enqueue query on this replica, "
+                        "because it has replication lag of {} queries. Try other replica.", max_log_ptr - our_log_ptr);
 
     String entry_path = enqueueQuery(entry);
     auto try_node = zkutil::EphemeralNodeHolder::existing(entry_path + "/try", *zookeeper);
@@ -91,9 +102,18 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
     task->is_initial_query = true;
 
     LOG_DEBUG(log, "Waiting for worker thread to process all entries before {}", entry_name);
+    UInt64 timeout = query_context.getSettingsRef().distributed_ddl_task_timeout;
     {
         std::unique_lock lock{mutex};
-        wait_current_task_change.wait(lock, [&]() { assert(zookeeper->expired() || current_task <= entry_name); return zookeeper->expired() || current_task == entry_name; });
+        bool processed = wait_current_task_change.wait_for(lock, std::chrono::seconds(timeout), [&]()
+        {
+            assert(zookeeper->expired() || current_task <= entry_name);
+            return zookeeper->expired() || current_task == entry_name || stop_flag;
+        });
+
+        if (!processed)
+            throw Exception(ErrorCodes::UNFINISHED, "Timeout: Cannot enqueue query on this replica,"
+                            "most likely because replica is busy with previous queue entries");
     }
 
     if (zookeeper->expired())
@@ -116,8 +136,11 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
 {
     {
         std::lock_guard lock{mutex};
-        current_task = entry_name;
-        wait_current_task_change.notify_all();
+        if (current_task < entry_name)
+        {
+            current_task = entry_name;
+            wait_current_task_change.notify_all();
+        }
     }
 
     UInt32 our_log_ptr = parse<UInt32>(current_zookeeper->get(database->replica_path + "/log_ptr"));
@@ -135,18 +158,50 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
     String initiator_name;
     zkutil::EventPtr wait_committed_or_failed = std::make_shared<Poco::Event>();
 
-    if (zookeeper->tryGet(entry_path + "/try", initiator_name, nullptr, wait_committed_or_failed))
+    String try_node_path = entry_path + "/try";
+    if (zookeeper->tryGet(try_node_path, initiator_name, nullptr, wait_committed_or_failed))
     {
         task->is_initial_query = initiator_name == task->host_id_str;
+
         /// Query is not committed yet. We cannot just skip it and execute next one, because reordering may break replication.
-        //FIXME add some timeouts
         LOG_TRACE(log, "Waiting for initiator {} to commit or rollback entry {}", initiator_name, entry_path);
-        wait_committed_or_failed->wait();
+        constexpr size_t wait_time_ms = 1000;
+        constexpr size_t max_iterations = 3600;
+        size_t iteration = 0;
+
+        while (!wait_committed_or_failed->tryWait(wait_time_ms))
+        {
+            if (stop_flag)
+            {
+                /// We cannot return task to process and we cannot return nullptr too,
+                /// because nullptr means "task should not be executed".
+                /// We can only exit by exception.
+                throw Exception(ErrorCodes::UNFINISHED, "Replication was stopped");
+            }
+
+            if (max_iterations <= ++iteration)
+            {
+                /// What can we do if initiator hangs for some reason? Seems like we can remove /try node.
+                /// Initiator will fail to commit entry to ZK (including ops for replicated table) if /try does not exist.
+                /// But it's questionable.
+
+                /// We use tryRemove(...) because multiple hosts (including initiator) may try to do it concurrently.
+                auto code = zookeeper->tryRemove(try_node_path);
+                if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE)
+                    throw Coordination::Exception(code, try_node_path);
+
+                if (!zookeeper->exists(entry_path + "/committed"))
+                {
+                    out_reason = fmt::format("Entry {} was forcefully cancelled due to timeout", entry_name);
+                    return {};
+                }
+            }
+        }
     }
 
     if (!zookeeper->exists(entry_path + "/committed"))
     {
-        out_reason = "Entry " + entry_name + " hasn't been committed";
+        out_reason = fmt::format("Entry {} hasn't been committed", entry_name);
         return {};
     }
 
@@ -154,7 +209,7 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
     {
         assert(!zookeeper->exists(entry_path + "/try"));
         assert(zookeeper->exists(entry_path + "/committed") == (zookeeper->get(task->getFinishedNodePath()) == "0"));
-        out_reason = "Entry " + entry_name + " has been executed as initial query";
+        out_reason = fmt::format("Entry {} has been executed as initial query", entry_name);
         return {};
     }
 
@@ -169,8 +224,7 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
 
     if (task->entry.query.empty())
     {
-        //TODO better way to determine special entries
-        out_reason = "It's dummy task";
+        out_reason = fmt::format("Entry {} is a dummy task", entry_name);
         return {};
     }
 
@@ -178,7 +232,7 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
 
     if (zookeeper->exists(task->getFinishedNodePath()))
     {
-        out_reason = "Task has been already processed";
+        out_reason = fmt::format("Task {} has been already processed", entry_name);
         return {};
     }
 
diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h
index 6e29e48469b..e3fd58c4305 100644
--- a/src/Databases/DatabaseReplicatedWorker.h
+++ b/src/Databases/DatabaseReplicatedWorker.h
@@ -13,7 +13,7 @@ public:
 
     String enqueueQuery(DDLLogEntry & entry) override;
 
-    String tryEnqueueAndExecuteEntry(DDLLogEntry & entry);
+    String tryEnqueueAndExecuteEntry(DDLLogEntry & entry, const Context & query_context);
 
 private:
     void initializeMainThread() override;
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 55e613648ae..9737167fa4c 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -309,13 +309,9 @@ std::unique_ptr<Context> DatabaseReplicatedTask::makeQueryContext(Context & from
     {
         txn->ops.emplace_back(zkutil::makeRemoveRequest(entry_path + "/try", -1));
         txn->ops.emplace_back(zkutil::makeCreateRequest(entry_path + "/committed", host_id_str, zkutil::CreateMode::Persistent));
-        //txn->ops.emplace_back(zkutil::makeRemoveRequest(getActiveNodePath(), -1));
         txn->ops.emplace_back(zkutil::makeSetRequest(database->zookeeper_path + "/max_log_ptr", toString(getLogEntryNumber(entry_name)), -1));
     }
 
-    //if (execute_on_leader)
-    //    txn->ops.emplace_back(zkutil::makeCreateRequest(getShardNodePath() + "/executed", host_id_str, zkutil::CreateMode::Persistent));
-    //txn->ops.emplace_back(zkutil::makeCreateRequest(getFinishedNodePath(), execution_status.serializeText(), zkutil::CreateMode::Persistent));
     txn->ops.emplace_back(zkutil::makeSetRequest(database->replica_path + "/log_ptr", toString(getLogEntryNumber(entry_name)), -1));
 
     std::move(ops.begin(), ops.end(), std::back_inserter(txn->ops));
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 49f6d74a931..552f4919765 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -81,7 +81,6 @@ struct DDLTaskBase
     bool is_circular_replicated = false;
     bool execute_on_leader = false;
 
-    //MetadataTransactionPtr txn;
     Coordination::Requests ops;
     ExecutionStatus execution_status;
     bool was_executed = false;
@@ -163,6 +162,7 @@ struct MetadataTransaction
 
     void commit();
 
+    ~MetadataTransaction() { assert(state != CREATED || std::uncaught_exception()); }
 };
 
 }
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 545e00296e8..da2e878541d 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -341,7 +341,8 @@ void DDLWorker::scheduleTasks()
     {
         /// We will recheck status of last executed tasks. It's useful if main thread was just restarted.
         auto & min_task = *std::min_element(current_tasks.begin(), current_tasks.end());
-        begin_node = std::upper_bound(queue_nodes.begin(), queue_nodes.end(), min_task->entry_name);
+        String min_entry_name = last_skipped_entry_name ? std::min(min_task->entry_name, *last_skipped_entry_name) : min_task->entry_name;
+        begin_node = std::upper_bound(queue_nodes.begin(), queue_nodes.end(), min_entry_name);
         current_tasks.clear();
     }
 
@@ -358,6 +359,7 @@ void DDLWorker::scheduleTasks()
         {
             LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason);
             updateMaxDDLEntryID(entry_name);
+            last_skipped_entry_name.emplace(entry_name);
             continue;
         }
 
@@ -500,10 +502,7 @@ void DDLWorker::processTask(DDLTaskBase & task)
                 {
                     /// It's not CREATE DATABASE
                     auto table_id = context.tryResolveStorageID(*query_with_table, Context::ResolveOrdinary);
-                    DatabasePtr database;
-                    std::tie(database, storage) = DatabaseCatalog::instance().tryGetDatabaseAndTable(table_id, context);
-                    if (database && database->getEngineName() == "Replicated" && !typeid_cast<const DatabaseReplicatedTask *>(&task))
-                        throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER queries are not allowed for Replicated databases");
+                    storage = DatabaseCatalog::instance().tryGetTable(table_id, context);
                 }
 
                 task.execute_on_leader = storage && taskShouldBeExecutedOnLeader(task.query, storage) && !task.is_circular_replicated;
@@ -553,7 +552,8 @@ void DDLWorker::processTask(DDLTaskBase & task)
     updateMaxDDLEntryID(task.entry_name);
 
     /// FIXME: if server fails right here, the task will be executed twice. We need WAL here.
-    /// If ZooKeeper connection is lost here, we will try again to write query status.
+    /// NOTE: If ZooKeeper connection is lost here, we will try again to write query status.
+    /// NOTE: If both table and database are replicated, task is executed in single ZK transaction.
 
     bool status_written = task.ops.empty();
     if (!status_written)
@@ -959,12 +959,6 @@ void DDLWorker::runMainThread()
                 initialized = false;
                 LOG_INFO(log, "Lost ZooKeeper connection, will try to connect again: {}", getCurrentExceptionMessage(true));
             }
-            else if (e.code == Coordination::Error::ZNONODE)
-            {
-                // TODO add comment: when it happens and why it's expected?
-                // maybe because cleanup thread may remove nodes inside queue entry which are currently processed
-                LOG_ERROR(log, "ZooKeeper error: {}", getCurrentExceptionMessage(true));
-            }
             else
             {
                 LOG_ERROR(log, "Unexpected ZooKeeper error, will try to restart main thread: {}", getCurrentExceptionMessage(true));
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index d9fd4e58cb6..706face3885 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -115,7 +115,7 @@ protected:
     ZooKeeperPtr current_zookeeper;
 
     /// Save state of executed task to avoid duplicate execution on ZK error
-    //std::optional<String> last_entry_name;
+    std::optional<String> last_skipped_entry_name;
     std::list<DDLTaskPtr> current_tasks;
 
     std::shared_ptr<Poco::Event> queue_updated_event = std::make_shared<Poco::Event>();
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index 6313da7132d..f27fb93b2d4 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -609,7 +609,7 @@ DatabaseCatalog::updateDependency(const StorageID & old_from, const StorageID &
         view_dependencies[{new_from.getDatabaseName(), new_from.getTableName()}].insert(new_where);
 }
 
-std::unique_ptr<DDLGuard> DatabaseCatalog::getDDLGuard(const String & database, const String & table)
+DDLGuardPtr DatabaseCatalog::getDDLGuard(const String & database, const String & table)
 {
     std::unique_lock lock(ddl_guards_mutex);
     auto db_guard_iter = ddl_guards.try_emplace(database).first;
diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h
index c9f031ef678..bb82dbfc440 100644
--- a/src/Interpreters/DatabaseCatalog.h
+++ b/src/Interpreters/DatabaseCatalog.h
@@ -67,6 +67,8 @@ private:
     bool is_database_guard = false;
 };
 
+using DDLGuardPtr = std::unique_ptr<DDLGuard>;
+
 
 /// Creates temporary table in `_temporary_and_external_tables` with randomly generated unique StorageID.
 /// Such table can be accessed from everywhere by its ID.
@@ -120,7 +122,7 @@ public:
     void loadDatabases();
 
     /// Get an object that protects the table from concurrently executing multiple DDL operations.
-    std::unique_ptr<DDLGuard> getDDLGuard(const String & database, const String & table);
+    DDLGuardPtr getDDLGuard(const String & database, const String & table);
     /// Get an object that protects the database from concurrent DDL queries all tables in the database
     std::unique_lock<std::shared_mutex> getExclusiveDDLGuardForDatabase(const String & database);
 
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index 612f9833af5..cee9b9083ea 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -53,7 +53,7 @@ BlockIO InterpreterAlterQuery::execute()
     {
         auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name);
         guard->releaseTableLock();
-        return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
+        return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr, context);
     }
 
     StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context);
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 8d344545c8a..6af212172b2 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -886,7 +886,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
         {
             assertOrSetUUID(create, database);
             guard->releaseTableLock();
-            return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
+            return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr, context);
         }
     }
 
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index db2f463893e..b22d46358f9 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -139,7 +139,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
 
             ddl_guard->releaseTableLock();
             table.reset();
-            return typeid_cast<DatabaseReplicated *>(database.get())->propose(query.clone());
+            return typeid_cast<DatabaseReplicated *>(database.get())->propose(query.clone(), context);
         }
 
         if (query.kind == ASTDropQuery::Kind::Detach)
@@ -325,6 +325,8 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
             if (database->getEngineName() == "MaterializeMySQL")
                 stopDatabaseSynchronization(database);
 #endif
+            if (auto * replicated = typeid_cast<DatabaseReplicated *>(database.get()))
+                replicated->stopReplication();
 
             if (database->shouldBeEmptyOnDetach())
             {
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index d2f79ba071c..5bfc144e014 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -90,7 +90,7 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
             UniqueTableName to(elem.to_database_name, elem.to_table_name);
             ddl_guards[from]->releaseTableLock();
             ddl_guards[to]->releaseTableLock();
-            return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr);
+            return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr, context);
         }
         else
         {
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 273e00c8a23..adee777f900 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -103,6 +103,27 @@
         "memory_tracking",     /// FIXME remove it before merge
         "memory_tracking",
         "memory_usage",
+        "01533_multiple_nested",
+        "01575_disable_detach_table_of_dictionary",
+        "01457_create_as_table_function_structure",
+        "01415_inconsistent_merge_tree_settings",
+        "01413_allow_non_metadata_alters",
+        "01378_alter_rename_with_ttl_zookeeper",
+        "01349_mutation_datetime_key",
+        "01325_freeze_mutation_stuck",
+        "01272_suspicious_codecs",
+        "01181_db_atomic_drop_on_cluster",
+        "00957_delta_diff_bug",
+        "00910_zookeeper_custom_compression_codecs_replicated",
+        "00899_long_attach_memory_limit",
+        "00804_test_custom_compression_codes_log_storages",
+        "00804_test_alter_compression_codecs",
+        "00804_test_delta_codec_no_type_alter",
+        "00804_test_custom_compression_codecs",
+        "00753_alter_attach",
+        "00715_fetch_merged_or_mutated_part_zookeeper",
+        "00688_low_cardinality_serialization",
+        "01575_disable_detach_table_of_dictionary",
         "00738_lock_for_inner_table",
         "01666_blns",
         "01652_ignore_and_low_cardinality",

From d9aa1096cfc49c78839701a843238d0f721c2e28 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 4 Feb 2021 23:36:50 +0300
Subject: [PATCH 0669/1238] Build actions dag to evaluate missing defaults.

---
 .../AddingDefaultsBlockInputStream.cpp        |  7 +-
 src/Functions/replicate.cpp                   | 48 ++++++++---
 src/Interpreters/ActionsDAG.cpp               | 19 +++++
 src/Interpreters/ActionsDAG.h                 |  3 +
 src/Interpreters/addMissingDefaults.cpp       |  8 +-
 src/Interpreters/inplaceBlockConversions.cpp  | 83 ++++++-------------
 src/Interpreters/inplaceBlockConversions.h    |  6 +-
 .../Optimizations/filterPushDown.cpp          | 22 +++++
 src/Storages/MergeTree/IMergeTreeReader.cpp   |  8 +-
 9 files changed, 131 insertions(+), 73 deletions(-)
 create mode 100644 src/Processors/QueryPlan/Optimizations/filterPushDown.cpp

diff --git a/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/src/DataStreams/AddingDefaultsBlockInputStream.cpp
index 160d1b4fb76..045151465b8 100644
--- a/src/DataStreams/AddingDefaultsBlockInputStream.cpp
+++ b/src/DataStreams/AddingDefaultsBlockInputStream.cpp
@@ -171,7 +171,12 @@ Block AddingDefaultsBlockInputStream::readImpl()
     if (!evaluate_block.columns())
         evaluate_block.insert({ColumnConst::create(ColumnUInt8::create(1, 0), res.rows()), std::make_shared<DataTypeUInt8>(), "_dummy"});
 
-    evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
+    auto dag = createFillingMissingDefaultsExpression(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
+    if (dag)
+    {
+        auto actions = std::make_shared<ExpressionActions>(std::move(dag));
+        actions->execute(evaluate_block);
+    }
 
     std::unordered_map<size_t, MutableColumnPtr> mixed_columns;
 
diff --git a/src/Functions/replicate.cpp b/src/Functions/replicate.cpp
index adbb37a7c91..aa4a7d42c6c 100644
--- a/src/Functions/replicate.cpp
+++ b/src/Functions/replicate.cpp
@@ -11,6 +11,7 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int ILLEGAL_COLUMN;
+    extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
 }
 
 namespace
@@ -35,33 +36,54 @@ public:
 
     size_t getNumberOfArguments() const override
     {
-        return 2;
+        return 0;
     }
 
+    bool isVariadic() const override { return true; }
+
     bool useDefaultImplementationForNulls() const override { return false; }
 
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
-        const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
-        if (!array_type)
-            throw Exception("Second argument for function " + getName() + " must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        if (arguments.size() < 2)
+            throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
+                            "Function {} expect at leas two arguments, got {}", getName(), arguments.size());
+
+        for (size_t i = 1; i < arguments.size(); ++i)
+        {
+            const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].get());
+            if (!array_type)
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                                "Argument {} for function {} must be array.",
+                                i + 1, getName());
+        }
         return std::make_shared<DataTypeArray>(arguments[0]);
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
     {
         ColumnPtr first_column = arguments[0].column;
-        const ColumnArray * array_column = checkAndGetColumn<ColumnArray>(arguments[1].column.get());
-        ColumnPtr temp_column;
-        if (!array_column)
+        ColumnPtr offsets;
+
+        for (size_t i = 1; i < arguments.size(); ++i)
         {
-            const auto * const_array_column = checkAndGetColumnConst<ColumnArray>(arguments[1].column.get());
-            if (!const_array_column)
-                throw Exception("Unexpected column for replicate", ErrorCodes::ILLEGAL_COLUMN);
-            temp_column = const_array_column->convertToFullColumn();
-            array_column = checkAndGetColumn<ColumnArray>(temp_column.get());
+            const ColumnArray * array_column = checkAndGetColumn<ColumnArray>(arguments[i].column.get());
+            ColumnPtr temp_column;
+            if (!array_column)
+            {
+                const auto * const_array_column = checkAndGetColumnConst<ColumnArray>(arguments[i].column.get());
+                if (!const_array_column)
+                    throw Exception("Unexpected column for replicate", ErrorCodes::ILLEGAL_COLUMN);
+                temp_column = const_array_column->convertToFullColumn();
+                array_column = checkAndGetColumn<ColumnArray>(temp_column.get());
+            }
+
+            if (!offsets || offsets->empty())
+                offsets = array_column->getOffsetsPtr();
         }
-        return ColumnArray::create(first_column->replicate(array_column->getOffsets())->convertToFullColumnIfConst(), array_column->getOffsetsPtr());
+
+        const auto & offsets_data = assert_cast<const ColumnArray::ColumnOffsets &>(*offsets).getData();
+        return ColumnArray::create(first_column->replicate(offsets_data)->convertToFullColumnIfConst(), offsets);
     }
 };
 
diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index d8c40ffda2f..ed6f8527bc5 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -618,6 +618,25 @@ bool ActionsDAG::trivial() const
     return true;
 }
 
+void ActionsDAG::addMaterializingOutputActions()
+{
+    FunctionOverloadResolverPtr func_builder_materialize =
+            std::make_shared<FunctionOverloadResolverAdaptor>(
+                    std::make_unique<DefaultOverloadResolver>(
+                            std::make_shared<FunctionMaterialize>()));
+
+    Index new_index;
+    for (auto * node : index)
+    {
+        auto & name = node->result_name;
+        node = &addFunction(func_builder_materialize, {node}, {}, true);
+        node = &addAlias(*node, name, true);
+        new_index.insert(node);
+    }
+
+    index.swap(new_index);
+}
+
 ActionsDAGPtr ActionsDAG::makeConvertingActions(
     const ColumnsWithTypeAndName & source,
     const ColumnsWithTypeAndName & result,
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index b12da30e24f..c929f1a4a59 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -231,6 +231,9 @@ public:
 
     ActionsDAGPtr clone() const;
 
+    /// For apply materialize() function for every output.
+    /// Also add aliases so the result names remain unchanged.
+    void addMaterializingOutputActions();
 
     enum class MatchColumnsMode
     {
diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp
index 37a0812826b..7935ca04b13 100644
--- a/src/Interpreters/addMissingDefaults.cpp
+++ b/src/Interpreters/addMissingDefaults.cpp
@@ -7,6 +7,7 @@
 #include <Interpreters/inplaceBlockConversions.h>
 #include <Core/Block.h>
 #include <Storages/ColumnsDescription.h>
+#include <Interpreters/ExpressionActions.h>
 
 
 namespace DB
@@ -74,7 +75,12 @@ Block addMissingDefaults(
     }
 
     /// Computes explicitly specified values by default and materialized columns.
-    evaluateMissingDefaults(res, required_columns, columns, context);
+    auto dag = createFillingMissingDefaultsExpression(res, required_columns, columns, context);
+    if (dag)
+    {
+        auto actions = std::make_shared<ExpressionActions>(std::move(dag));
+        actions->execute(res);
+    }
     return res;
 }
 
diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp
index ab74aa7d631..3e2f055307a 100644
--- a/src/Interpreters/inplaceBlockConversions.cpp
+++ b/src/Interpreters/inplaceBlockConversions.cpp
@@ -24,7 +24,7 @@ namespace
 {
 
 /// Add all required expressions for missing columns calculation
-void addDefaultRequiredExpressionsRecursively(Block & block, const String & required_column, const ColumnsDescription & columns, ASTPtr default_expr_list_accum, NameSet & added_columns)
+void addDefaultRequiredExpressionsRecursively(const Block & block, const String & required_column, const ColumnsDescription & columns, ASTPtr default_expr_list_accum, NameSet & added_columns)
 {
     checkStackSize();
     if (block.has(required_column) || added_columns.count(required_column))
@@ -52,7 +52,7 @@ void addDefaultRequiredExpressionsRecursively(Block & block, const String & requ
     }
 }
 
-ASTPtr defaultRequiredExpressions(Block & block, const NamesAndTypesList & required_columns, const ColumnsDescription & columns)
+ASTPtr defaultRequiredExpressions(const Block & block, const NamesAndTypesList & required_columns, const ColumnsDescription & columns)
 {
     ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
 
@@ -87,67 +87,32 @@ ASTPtr convertRequiredExpressions(Block & block, const NamesAndTypesList & requi
     return conversion_expr_list;
 }
 
-void executeExpressionsOnBlock(
-    Block & block,
+ActionsDAGPtr createFillingMissingDefaultsExpression(
+    const Block & header,
     ASTPtr expr_list,
     bool save_unneeded_columns,
     const NamesAndTypesList & required_columns,
     const Context & context)
 {
     if (!expr_list)
-        return;
+        return nullptr;
 
-    if (!save_unneeded_columns)
-    {
-        auto syntax_result = TreeRewriter(context).analyze(expr_list, block.getNamesAndTypesList());
-        ExpressionAnalyzer{expr_list, syntax_result, context}.getActions(true)->execute(block);
-        return;
-    }
-
-    /** ExpressionAnalyzer eliminates "unused" columns, in order to ensure their safety
-      * we are going to operate on a copy instead of the original block */
-    Block copy_block{block};
-
-    auto syntax_result = TreeRewriter(context).analyze(expr_list, block.getNamesAndTypesList());
+    auto syntax_result = TreeRewriter(context).analyze(expr_list, header.getNamesAndTypesList());
     auto expression_analyzer = ExpressionAnalyzer{expr_list, syntax_result, context};
-    auto required_source_columns = syntax_result->requiredSourceColumns();
-    auto rows_was = copy_block.rows();
+    auto dag = expression_analyzer.getActionsDAG(true, !save_unneeded_columns);
 
-    // Delete all not needed columns in DEFAULT expression.
-    // They can intersect with columns added in PREWHERE
-    // test 00950_default_prewhere
-    // CLICKHOUSE-4523
-    for (const auto & delete_column : copy_block.getNamesAndTypesList())
+    if (save_unneeded_columns)
     {
-        if (std::find(required_source_columns.begin(), required_source_columns.end(), delete_column.name) == required_source_columns.end())
-        {
-            copy_block.erase(delete_column.name);
-        }
+        Names required_names;
+        required_names.resize(required_columns.size());
+        for (const auto & column : required_columns)
+            required_names.push_back(column.name);
+
+        dag->removeUnusedActions(required_names);
+        dag->addMaterializingOutputActions();
     }
 
-    if (copy_block.columns() == 0)
-    {
-        // Add column to indicate block size in execute()
-        copy_block.insert({DataTypeUInt8().createColumnConst(rows_was, 0u), std::make_shared<DataTypeUInt8>(), "__dummy"});
-    }
-
-    expression_analyzer.getActions(true)->execute(copy_block);
-
-    /// move evaluated columns to the original block, materializing them at the same time
-    size_t pos = 0;
-    for (auto col = required_columns.begin(); col != required_columns.end(); ++col, ++pos)
-    {
-        if (copy_block.has(col->name))
-        {
-            auto evaluated_col = copy_block.getByName(col->name);
-            evaluated_col.column = evaluated_col.column->convertToFullColumnIfConst();
-
-            if (block.has(col->name))
-                block.getByName(col->name) = std::move(evaluated_col);
-            else
-                block.insert(pos, std::move(evaluated_col));
-        }
-    }
+    return dag;
 }
 
 }
@@ -157,19 +122,25 @@ void performRequiredConversions(Block & block, const NamesAndTypesList & require
     ASTPtr conversion_expr_list = convertRequiredExpressions(block, required_columns);
     if (conversion_expr_list->children.empty())
         return;
-    executeExpressionsOnBlock(block, conversion_expr_list, true, required_columns, context);
+
+    if (auto dag = createFillingMissingDefaultsExpression(block, conversion_expr_list, true, required_columns, context))
+    {
+        auto expression = std::make_shared<ExpressionActions>(std::move(dag));
+        expression->execute(block);
+    }
 }
 
-void evaluateMissingDefaults(Block & block,
+ActionsDAGPtr createFillingMissingDefaultsExpression(
+    const Block & header,
     const NamesAndTypesList & required_columns,
     const ColumnsDescription & columns,
     const Context & context, bool save_unneeded_columns)
 {
     if (!columns.hasDefaults())
-        return;
+        return nullptr;
 
-    ASTPtr default_expr_list = defaultRequiredExpressions(block, required_columns, columns);
-    executeExpressionsOnBlock(block, default_expr_list, save_unneeded_columns, required_columns, context);
+    ASTPtr expr_list = defaultRequiredExpressions(header, required_columns, columns);
+    return createFillingMissingDefaultsExpression(header, expr_list, save_unneeded_columns, required_columns, context);
 }
 
 }
diff --git a/src/Interpreters/inplaceBlockConversions.h b/src/Interpreters/inplaceBlockConversions.h
index 066975ab4bc..ffba4a45ba1 100644
--- a/src/Interpreters/inplaceBlockConversions.h
+++ b/src/Interpreters/inplaceBlockConversions.h
@@ -12,9 +12,13 @@ class Context;
 class NamesAndTypesList;
 class ColumnsDescription;
 
+class ActionsDAG;
+using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
+
 /// Adds missing defaults to block according to required_columns
 /// using columns description
-void evaluateMissingDefaults(Block & block,
+ActionsDAGPtr createFillingMissingDefaultsExpression(
+    const Block & header,
     const NamesAndTypesList & required_columns,
     const ColumnsDescription & columns,
     const Context & context, bool save_unneeded_columns = true);
diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
new file mode 100644
index 00000000000..0d651897bf8
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
@@ -0,0 +1,22 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <Processors/QueryPlan/FilterStep.h>
+
+namespace DB::QueryPlanOptimizations
+{
+
+size_t tryPushDownLimit(QueryPlan::Node * node, QueryPlan::Nodes &)
+{
+    auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
+    if (!filter_step)
+        return 0;
+
+    QueryPlan::Node * child_node = node->children.front();
+    auto & child = child_node->step;
+
+    if (const auto * adding_const_column = typeid_cast<const AddingConstColumnStep *>(child.get()))
+    {
+
+    }
+}
+
+}
diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp
index 0140b32e12c..052f6922cac 100644
--- a/src/Storages/MergeTree/IMergeTreeReader.cpp
+++ b/src/Storages/MergeTree/IMergeTreeReader.cpp
@@ -186,7 +186,13 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns
             additional_columns.insert({res_columns[pos], name_and_type->type, name_and_type->name});
         }
 
-        DB::evaluateMissingDefaults(additional_columns, columns, metadata_snapshot->getColumns(), storage.global_context);
+        auto dag = DB::createFillingMissingDefaultsExpression(
+                additional_columns, columns, metadata_snapshot->getColumns(), storage.global_context);
+        if (dag)
+        {
+            auto actions = std::make_shared<ExpressionActions>(std::move(dag));
+            actions->execute(additional_columns);
+        }
 
         /// Move columns from block.
         name_and_type = columns.begin();

From 0db2aa3c2e8aa39eb814b8c8a22e904cb8528025 Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Fri, 5 Feb 2021 01:21:24 +0300
Subject: [PATCH 0670/1238] Check that position doesn't go beyond end (#20039)

---
 src/IO/ReadBuffer.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h
index 3d6eb6970ce..68ebf154597 100644
--- a/src/IO/ReadBuffer.h
+++ b/src/IO/ReadBuffer.h
@@ -55,6 +55,8 @@ public:
       */
     bool next()
     {
+        assert(position() <= working_buffer.end());
+
         bytes += offset();
         bool res = nextImpl();
         if (!res)
@@ -62,6 +64,9 @@ public:
 
         pos = working_buffer.begin() + nextimpl_working_buffer_offset;
         nextimpl_working_buffer_offset = 0;
+
+        assert(position() <= working_buffer.end());
+
         return res;
     }
 

From 48b415d3e4eac5a1741a87ad36e788fb7420654b Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 5 Feb 2021 01:26:40 +0300
Subject: [PATCH 0671/1238] Remove never existing
 insert_in_memory_parts_timeout

Fixes: a43cb93be5 ("remove questionable functionality")
Cc: @CurtizJ
---
 src/Core/Settings.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index c4cf3803913..797483d94d9 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -86,8 +86,6 @@ class IColumn;
     \
     M(Bool, optimize_move_to_prewhere, true, "Allows disabling WHERE to PREWHERE optimization in SELECT queries from MergeTree.", 0) \
     \
-    M(Milliseconds, insert_in_memory_parts_timeout, 600000, "", 0) \
-    \
     M(UInt64, replication_alter_partitions_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) \
     M(UInt64, replication_alter_columns_timeout, 60, "Wait for actions to change the table structure within the specified number of seconds. 0 - wait unlimited time.", 0) \
     \

From a46d65f99d959c273856b00cf3178af946461abc Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 22 Jan 2021 22:07:47 +0300
Subject: [PATCH 0672/1238] Fix typo in comment for memoryTrackerCanThrow()

---
 src/Common/MemoryTracker.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index d037142fbfb..a584885cf0f 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -24,8 +24,8 @@ namespace
 ///
 /// - when it is explicitly blocked with LockExceptionInThread
 ///
-/// - to avoid std::terminate(), when stack unwinding is current in progress in
-///   this thread.
+/// - to avoid std::terminate(), when stack unwinding is currently in progress
+///   in this thread.
 ///
 ///   NOTE: that since C++11 destructor marked with noexcept by default, and
 ///   this means that any throw from destructor (that is not marked with

From 4beb5c1b8ab0bc8620685ccf967ef31a566ca19c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 28 Jan 2021 07:04:07 +0300
Subject: [PATCH 0673/1238] TCPHandler: Move constructor into the module and
 add missing headers

---
 src/Server/TCPHandler.cpp | 11 +++++++++++
 src/Server/TCPHandler.h   | 12 ++----------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 12d1a0249b7..d8c0a48bc32 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1,6 +1,7 @@
 #include <iomanip>
 #include <ext/scope_guard.h>
 #include <Poco/Net/NetException.h>
+#include <Poco/Util/LayeredConfiguration.h>
 #include <Common/CurrentThread.h>
 #include <Common/Stopwatch.h>
 #include <Common/NetException.h>
@@ -56,6 +57,16 @@ namespace ErrorCodes
     extern const int SUPPORT_IS_DISABLED;
 }
 
+TCPHandler::TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_)
+    : Poco::Net::TCPServerConnection(socket_)
+    , server(server_)
+    , parse_proxy_protocol(parse_proxy_protocol_)
+    , log(&Poco::Logger::get("TCPHandler"))
+    , connection_context(server.context())
+    , query_context(server.context())
+    , server_display_name(std::move(server_display_name_))
+{
+}
 
 void TCPHandler::runImpl()
 {
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index 0d3109a6591..c650c997657 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -10,6 +10,7 @@
 #include <IO/Progress.h>
 #include <DataStreams/BlockIO.h>
 #include <Interpreters/InternalTextLogsQueue.h>
+#include <Interpreters/Context.h>
 #include <Client/TimeoutSetter.h>
 
 #include "IServer.h"
@@ -110,16 +111,7 @@ public:
       * Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP.
       */
     TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_,
-        std::string server_display_name_)
-        : Poco::Net::TCPServerConnection(socket_)
-        , server(server_)
-        , parse_proxy_protocol(parse_proxy_protocol_)
-        , log(&Poco::Logger::get("TCPHandler"))
-        , connection_context(server.context())
-        , query_context(server.context())
-        , server_display_name(std::move(server_display_name_))
-    {
-    }
+        std::string server_display_name_);
 
     void run() override;
 

From 98e3a99a88cfdb220189f41d8579d94ea48ddcd5 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 22 Jan 2021 21:56:50 +0300
Subject: [PATCH 0674/1238] Do not catch exceptions during final flush in
 writers destructors

Since this hides real problems, since destructor does final flush and if
it fails, then data will be lost.

One of such examples if MEMORY_LIMIT_EXCEEDED exception, so lock
exceptions from destructors, by using
MemoryTracker::LockExceptionInThread to block these exception, and allow
others (so std::terminate will be called, since this is c++11 with
noexcept for destructors by default).

Here is an example, that leads to empty block in the distributed batch:

    2021.01.21 12:43:18.619739 [ 46468 ] {7bd60d75-ebcb-45d2-874d-260df9a4ddac} <Error> virtual DB::CompressedWriteBuffer::~CompressedWriteBuffer(): Code: 241, e.displayText() = DB::Exception: Memory limit (for user) exceeded: would use 332.07 GiB (attempt to allocate chunk of 4355342 bytes), maximum: 256.00 GiB, Stack trace (when copying this message, always include the lines below):

    0. DB::Exception::Exception<>() @ 0x86f7b88 in /usr/bin/clickhouse
    ...
    4. void DB::PODArrayBase<>::resize<>(unsigned long) @ 0xe9e878d in /usr/bin/clickhouse
    5. DB::CompressedWriteBuffer::nextImpl() @ 0xe9f0296 in /usr/bin/clickhouse
    6. DB::CompressedWriteBuffer::~CompressedWriteBuffer() @ 0xe9f0415 in /usr/bin/clickhouse
    7. DB::DistributedBlockOutputStream::writeToShard() @ 0xf6bed4a in /usr/bin/clickhouse
---
 src/Common/ZooKeeper/IKeeper.h               |  2 +-
 src/Compression/CompressedWriteBuffer.cpp    | 12 ++++-------
 src/DataStreams/IBlockOutputStream.h         |  2 +-
 src/IO/AsynchronousWriteBuffer.h             | 22 +++++++-------------
 src/IO/BrotliWriteBuffer.cpp                 | 13 +++++-------
 src/IO/HexWriteBuffer.cpp                    | 13 ++++--------
 src/IO/IReadableWriteBuffer.h                |  2 +-
 src/IO/LZMADeflatingWriteBuffer.cpp          | 14 +++++--------
 src/IO/WriteBufferFromFile.cpp               | 13 +++++-------
 src/IO/WriteBufferFromFileDescriptor.cpp     | 17 +++++++--------
 src/IO/WriteBufferFromHTTPServerResponse.cpp | 12 ++++-------
 src/IO/WriteBufferFromOStream.cpp            | 13 ++++--------
 src/IO/WriteBufferFromPocoSocket.cpp         | 12 ++++-------
 src/IO/WriteBufferFromS3.cpp                 | 12 ++++-------
 src/IO/WriteBufferFromVector.h               | 12 ++++-------
 src/IO/WriteBufferValidUTF8.cpp              | 12 ++++-------
 src/IO/ZlibDeflatingWriteBuffer.cpp          | 11 ++++++++--
 src/IO/ZstdDeflatingWriteBuffer.cpp          | 16 +++++++++++---
 18 files changed, 88 insertions(+), 122 deletions(-)

diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h
index 9d4a2ebb16a..c53ea60ec7c 100644
--- a/src/Common/ZooKeeper/IKeeper.h
+++ b/src/Common/ZooKeeper/IKeeper.h
@@ -331,7 +331,7 @@ public:
 class IKeeper
 {
 public:
-    virtual ~IKeeper() {}
+    virtual ~IKeeper() = default;
 
     /// If expired, you can only destroy the object. All other methods will throw exception.
     virtual bool isExpired() const = 0;
diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp
index 02f418dcdf7..8d146e8de23 100644
--- a/src/Compression/CompressedWriteBuffer.cpp
+++ b/src/Compression/CompressedWriteBuffer.cpp
@@ -8,6 +8,7 @@
 #include <Compression/CompressionFactory.h>
 
 #include <Common/MemorySanitizer.h>
+#include <Common/MemoryTracker.h>
 
 
 namespace DB
@@ -49,14 +50,9 @@ CompressedWriteBuffer::CompressedWriteBuffer(
 
 CompressedWriteBuffer::~CompressedWriteBuffer()
 {
-    try
-    {
-        next();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    next();
 }
 
 }
diff --git a/src/DataStreams/IBlockOutputStream.h b/src/DataStreams/IBlockOutputStream.h
index 4cc1257e955..79c13b6fa47 100644
--- a/src/DataStreams/IBlockOutputStream.h
+++ b/src/DataStreams/IBlockOutputStream.h
@@ -57,7 +57,7 @@ public:
       */
     virtual std::string getContentType() const { return "text/plain; charset=UTF-8"; }
 
-    virtual ~IBlockOutputStream() {}
+    virtual ~IBlockOutputStream() = default;
 
     /** Don't let to alter table while instance of stream is alive.
       */
diff --git a/src/IO/AsynchronousWriteBuffer.h b/src/IO/AsynchronousWriteBuffer.h
index 74b5804691b..8c44f8c7d4a 100644
--- a/src/IO/AsynchronousWriteBuffer.h
+++ b/src/IO/AsynchronousWriteBuffer.h
@@ -1,10 +1,8 @@
 #pragma once
 
-#include <math.h>
-
 #include <vector>
-
 #include <Common/ThreadPool.h>
+#include <Common/MemoryTracker.h>
 #include <IO/WriteBuffer.h>
 
 
@@ -53,18 +51,14 @@ public:
 
     ~AsynchronousWriteBuffer() override
     {
-        try
-        {
-            if (started)
-                pool.wait();
+        /// FIXME move final flush into the caller
+        MemoryTracker::LockExceptionInThread lock;
 
-            swapBuffers();
-            out.next();
-        }
-        catch (...)
-        {
-            tryLogCurrentException(__PRETTY_FUNCTION__);
-        }
+        if (started)
+            pool.wait();
+
+        swapBuffers();
+        out.next();
     }
 
     /// That is executed in a separate thread
diff --git a/src/IO/BrotliWriteBuffer.cpp b/src/IO/BrotliWriteBuffer.cpp
index e4e3713d379..d14c94ca43d 100644
--- a/src/IO/BrotliWriteBuffer.cpp
+++ b/src/IO/BrotliWriteBuffer.cpp
@@ -6,6 +6,8 @@
 #    include <IO/BrotliWriteBuffer.h>
 #    include <brotli/encode.h>
 
+#include <Common/MemoryTracker.h>
+
 namespace DB
 {
 
@@ -47,14 +49,9 @@ BrotliWriteBuffer::BrotliWriteBuffer(std::unique_ptr<WriteBuffer> out_, int comp
 
 BrotliWriteBuffer::~BrotliWriteBuffer()
 {
-    try
-    {
-        finish();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    finish();
 }
 
 void BrotliWriteBuffer::nextImpl()
diff --git a/src/IO/HexWriteBuffer.cpp b/src/IO/HexWriteBuffer.cpp
index d7b8a993ce5..4e3403ba74b 100644
--- a/src/IO/HexWriteBuffer.cpp
+++ b/src/IO/HexWriteBuffer.cpp
@@ -1,6 +1,6 @@
 #include <common/types.h>
 #include <Common/hex.h>
-#include <Common/Exception.h>
+#include <Common/MemoryTracker.h>
 #include <IO/HexWriteBuffer.h>
 
 
@@ -22,14 +22,9 @@ void HexWriteBuffer::nextImpl()
 
 HexWriteBuffer::~HexWriteBuffer()
 {
-    try
-    {
-        nextImpl();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    nextImpl();
 }
 
 }
diff --git a/src/IO/IReadableWriteBuffer.h b/src/IO/IReadableWriteBuffer.h
index a02dd4e23cb..539825e3a85 100644
--- a/src/IO/IReadableWriteBuffer.h
+++ b/src/IO/IReadableWriteBuffer.h
@@ -17,7 +17,7 @@ struct IReadableWriteBuffer
         return getReadBufferImpl();
     }
 
-    virtual ~IReadableWriteBuffer() {}
+    virtual ~IReadableWriteBuffer() = default;
 
 protected:
 
diff --git a/src/IO/LZMADeflatingWriteBuffer.cpp b/src/IO/LZMADeflatingWriteBuffer.cpp
index e3051f1de65..5803bc1e9f1 100644
--- a/src/IO/LZMADeflatingWriteBuffer.cpp
+++ b/src/IO/LZMADeflatingWriteBuffer.cpp
@@ -1,4 +1,5 @@
 #include <IO/LZMADeflatingWriteBuffer.h>
+#include <Common/MemoryTracker.h>
 
 #if !defined(ARCADIA_BUILD)
 
@@ -48,16 +49,11 @@ LZMADeflatingWriteBuffer::LZMADeflatingWriteBuffer(
 
 LZMADeflatingWriteBuffer::~LZMADeflatingWriteBuffer()
 {
-    try
-    {
-        finish();
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
 
-        lzma_end(&lstr);
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    finish();
+    lzma_end(&lstr);
 }
 
 void LZMADeflatingWriteBuffer::nextImpl()
diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp
index aeed4862fba..b3a63842326 100644
--- a/src/IO/WriteBufferFromFile.cpp
+++ b/src/IO/WriteBufferFromFile.cpp
@@ -3,6 +3,7 @@
 #include <errno.h>
 
 #include <Common/ProfileEvents.h>
+#include <Common/MemoryTracker.h>
 
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteHelpers.h>
@@ -77,14 +78,10 @@ WriteBufferFromFile::~WriteBufferFromFile()
     if (fd < 0)
         return;
 
-    try
-    {
-        next();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+
+    next();
 
     ::close(fd);
 }
diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp
index a59ae20c588..bfd874ee396 100644
--- a/src/IO/WriteBufferFromFileDescriptor.cpp
+++ b/src/IO/WriteBufferFromFileDescriptor.cpp
@@ -8,6 +8,7 @@
 #include <Common/ProfileEvents.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/Stopwatch.h>
+#include <Common/MemoryTracker.h>
 
 #include <IO/WriteBufferFromFileDescriptor.h>
 #include <IO/WriteHelpers.h>
@@ -90,17 +91,15 @@ WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor(
 
 WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor()
 {
-    try
+    if (fd < 0)
     {
-        if (fd >= 0)
-            next();
-        else
-            assert(!offset() && "attempt to write after close");
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
+        assert(!offset() && "attempt to write after close");
+        return;
     }
+
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    next();
 }
 
 
diff --git a/src/IO/WriteBufferFromHTTPServerResponse.cpp b/src/IO/WriteBufferFromHTTPServerResponse.cpp
index 0f30f1352e3..fb9a6a99d2b 100644
--- a/src/IO/WriteBufferFromHTTPServerResponse.cpp
+++ b/src/IO/WriteBufferFromHTTPServerResponse.cpp
@@ -7,6 +7,7 @@
 #include <Common/Exception.h>
 #include <Common/NetException.h>
 #include <Common/Stopwatch.h>
+#include <Common/MemoryTracker.h>
 
 #if !defined(ARCADIA_BUILD)
 #    include <Common/config.h>
@@ -206,14 +207,9 @@ void WriteBufferFromHTTPServerResponse::finalize()
 
 WriteBufferFromHTTPServerResponse::~WriteBufferFromHTTPServerResponse()
 {
-    try
-    {
-        finalize();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    finalize();
 }
 
 }
diff --git a/src/IO/WriteBufferFromOStream.cpp b/src/IO/WriteBufferFromOStream.cpp
index 2c45a21a0a3..cf731934c93 100644
--- a/src/IO/WriteBufferFromOStream.cpp
+++ b/src/IO/WriteBufferFromOStream.cpp
@@ -1,5 +1,5 @@
 #include <IO/WriteBufferFromOStream.h>
-#include <Common/Exception.h>
+#include <Common/MemoryTracker.h>
 
 
 namespace DB
@@ -42,14 +42,9 @@ WriteBufferFromOStream::WriteBufferFromOStream(
 
 WriteBufferFromOStream::~WriteBufferFromOStream()
 {
-    try
-    {
-        next();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    next();
 }
 
 }
diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp
index c05dc11e330..284fa5dbd97 100644
--- a/src/IO/WriteBufferFromPocoSocket.cpp
+++ b/src/IO/WriteBufferFromPocoSocket.cpp
@@ -5,6 +5,7 @@
 #include <Common/Exception.h>
 #include <Common/NetException.h>
 #include <Common/Stopwatch.h>
+#include <Common/MemoryTracker.h>
 
 
 namespace ProfileEvents
@@ -70,14 +71,9 @@ WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_
 
 WriteBufferFromPocoSocket::~WriteBufferFromPocoSocket()
 {
-    try
-    {
-        next();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    next();
 }
 
 }
diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp
index 09aabb1b21d..a6ec60b295f 100644
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@@ -4,6 +4,7 @@
 
 #    include <IO/WriteBufferFromS3.h>
 #    include <IO/WriteHelpers.h>
+#    include <Common/MemoryTracker.h>
 
 #    include <aws/s3/S3Client.h>
 #    include <aws/s3/model/CreateMultipartUploadRequest.h>
@@ -78,6 +79,8 @@ void WriteBufferFromS3::nextImpl()
 
 void WriteBufferFromS3::finalize()
 {
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
     finalizeImpl();
 }
 
@@ -104,14 +107,7 @@ void WriteBufferFromS3::finalizeImpl()
 
 WriteBufferFromS3::~WriteBufferFromS3()
 {
-    try
-    {
-        finalizeImpl();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    finalizeImpl();
 }
 
 void WriteBufferFromS3::createMultipartUpload()
diff --git a/src/IO/WriteBufferFromVector.h b/src/IO/WriteBufferFromVector.h
index 2a9810f3461..1dcf2c3f327 100644
--- a/src/IO/WriteBufferFromVector.h
+++ b/src/IO/WriteBufferFromVector.h
@@ -3,6 +3,7 @@
 #include <vector>
 
 #include <IO/WriteBuffer.h>
+#include <Common/MemoryTracker.h>
 
 
 namespace DB
@@ -93,14 +94,9 @@ public:
 
     ~WriteBufferFromVector() override
     {
-        try
-        {
-            finalize();
-        }
-        catch (...)
-        {
-            tryLogCurrentException(__PRETTY_FUNCTION__);
-        }
+        /// FIXME move final flush into the caller
+        MemoryTracker::LockExceptionInThread lock;
+        finalize();
     }
 };
 
diff --git a/src/IO/WriteBufferValidUTF8.cpp b/src/IO/WriteBufferValidUTF8.cpp
index f1f04e9805b..1071ac1078d 100644
--- a/src/IO/WriteBufferValidUTF8.cpp
+++ b/src/IO/WriteBufferValidUTF8.cpp
@@ -1,5 +1,6 @@
 #include <Poco/UTF8Encoding.h>
 #include <IO/WriteBufferValidUTF8.h>
+#include <Common/MemoryTracker.h>
 #include <common/types.h>
 
 #ifdef __SSE2__
@@ -136,14 +137,9 @@ void WriteBufferValidUTF8::finish()
 
 WriteBufferValidUTF8::~WriteBufferValidUTF8()
 {
-    try
-    {
-        finish();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+    finish();
 }
 
 }
diff --git a/src/IO/ZlibDeflatingWriteBuffer.cpp b/src/IO/ZlibDeflatingWriteBuffer.cpp
index 8efe96877e4..4b838ac6d0a 100644
--- a/src/IO/ZlibDeflatingWriteBuffer.cpp
+++ b/src/IO/ZlibDeflatingWriteBuffer.cpp
@@ -1,5 +1,7 @@
 #include <IO/ZlibDeflatingWriteBuffer.h>
 #include <Common/MemorySanitizer.h>
+#include <Common/MemoryTracker.h>
+#include <Common/Exception.h>
 
 
 namespace DB
@@ -46,16 +48,21 @@ ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer(
 
 ZlibDeflatingWriteBuffer::~ZlibDeflatingWriteBuffer()
 {
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+
+    finish();
+
     try
     {
-        finish();
-
         int rc = deflateEnd(&zstr);
         if (rc != Z_OK)
             throw Exception(std::string("deflateEnd failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED);
     }
     catch (...)
     {
+        /// It is OK not to terminate under an error from deflateEnd()
+        /// since all data already written to the stream.
         tryLogCurrentException(__PRETTY_FUNCTION__);
     }
 }
diff --git a/src/IO/ZstdDeflatingWriteBuffer.cpp b/src/IO/ZstdDeflatingWriteBuffer.cpp
index df28820e382..9b79d5ae513 100644
--- a/src/IO/ZstdDeflatingWriteBuffer.cpp
+++ b/src/IO/ZstdDeflatingWriteBuffer.cpp
@@ -1,4 +1,6 @@
 #include <IO/ZstdDeflatingWriteBuffer.h>
+#include <Common/MemoryTracker.h>
+#include <Common/Exception.h>
 
 namespace DB
 {
@@ -28,14 +30,22 @@ ZstdDeflatingWriteBuffer::ZstdDeflatingWriteBuffer(
 
 ZstdDeflatingWriteBuffer::~ZstdDeflatingWriteBuffer()
 {
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock;
+
+    finish();
+
     try
     {
-        finish();
-
-        ZSTD_freeCCtx(cctx);
+        int err = ZSTD_freeCCtx(cctx);
+        /// This is just in case, since it is impossible to get an error by using this wrapper.
+        if (unlikely(err))
+            throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "ZSTD_freeCCtx failed: error code: {}; zstd version: {}", err, ZSTD_VERSION_STRING);
     }
     catch (...)
     {
+        /// It is OK not to terminate under an error from ZSTD_freeCCtx()
+        /// since all data already written to the stream.
         tryLogCurrentException(__PRETTY_FUNCTION__);
     }
 }

From 64c0bf98290362fa216c05b070aa122a12af3c25 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 28 Jan 2021 07:07:51 +0300
Subject: [PATCH 0675/1238] TCPHandler: catch exceptions from the WriteBuffer
 in destructor

For TCPHandler it is safe thing todo.

Otherwise *San will report [1]:

    2021.01.24 15:33:40.103996 [ 270 ] {} <Trace> BaseDaemon: Received signal -1
    2021.01.24 15:33:40.110693 [ 270 ] {} <Fatal> BaseDaemon: (version 21.2.1.5789, build id: FF421B087D1E2EAA19FA17B5AB3AE413832744E0) (from thread 48318) Terminate called for uncaught exception:
    2021.01.24 15:33:40.114845 [ 270 ] {} <Trace> BaseDaemon: Received signal 6
    2021.01.24 15:33:40.138738 [ 218027 ] {} <Fatal> BaseDaemon: ########################################
    2021.01.24 15:33:40.138838 [ 218027 ] {} <Fatal> BaseDaemon: (version 21.2.1.5789, build id: FF421B087D1E2EAA19FA17B5AB3AE413832744E0) (from thread 48318) (no query) Received signal Aborted (6)
    2021.01.24 15:33:40.138912 [ 218027 ] {} <Fatal> BaseDaemon:
    2021.01.24 15:33:40.139277 [ 218027 ] {} <Fatal> BaseDaemon: Stack trace: 0x7f185474118b 0x7f1854720859 0xaddc0cc 0x2af9fab8 0x2af9fa04 0xa91758b 0x1e418bb5 0x20725b4f 0x20725d9e 0x266b47a3 0x269772f5 0x26971847 0x7f18548f6609 0x7f185481d293
    2021.01.24 15:33:40.139637 [ 218027 ] {} <Fatal> BaseDaemon: 3. raise @ 0x4618b in /usr/lib/x86_64-linux-gnu/libc-2.31.so
    2021.01.24 15:33:40.140113 [ 218027 ] {} <Fatal> BaseDaemon: 4. abort @ 0x25859 in /usr/lib/x86_64-linux-gnu/libc-2.31.so
    2021.01.24 15:33:40.144121 [ 218027 ] {} <Fatal> BaseDaemon: 5. ./obj-x86_64-linux-gnu/../base/daemon/BaseDaemon.cpp:0: terminate_handler() @ 0xaddc0cc in /usr/bin/clickhouse
    2021.01.24 15:33:40.151208 [ 218027 ] {} <Fatal> BaseDaemon: 6. ./obj-x86_64-linux-gnu/../contrib/libcxxabi/src/cxa_handlers.cpp:61: std::__terminate(void (*)()) @ 0x2af9fab8 in /usr/bin/clickhouse
    2021.01.24 15:33:40.153085 [ 218027 ] {} <Fatal> BaseDaemon: 7. ./obj-x86_64-linux-gnu/../contrib/libcxxabi/src/cxa_handlers.cpp:0: std::terminate() @ 0x2af9fa04 in /usr/bin/clickhouse
    2021.01.24 15:33:40.155209 [ 218027 ] {} <Fatal> BaseDaemon: 8. ? @ 0xa91758b in /usr/bin/clickhouse
    2021.01.24 15:33:40.156621 [ 218027 ] {} <Fatal> BaseDaemon: 9. ./obj-x86_64-linux-gnu/../src/IO/WriteBufferFromPocoSocket.cpp:0: DB::WriteBufferFromPocoSocket::~WriteBufferFromPocoSocket() @ 0x1e418bb5 in /usr/bin/clickhouse
    2021.01.24 15:33:40.161041 [ 218027 ] {} <Fatal> BaseDaemon: 10. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/memory:2518: DB::TCPHandler::~TCPHandler() @ 0x20725b4f in /usr/bin/clickhouse
    2021.01.24 15:33:40.164557 [ 218027 ] {} <Fatal> BaseDaemon: 11. ./obj-x86_64-linux-gnu/../src/Server/TCPHandler.h:101: DB::TCPHandler::~TCPHandler() @ 0x20725d9e in /usr/bin/clickhouse
    2021.01.24 15:33:40.165921 [ 218027 ] {} <Fatal> BaseDaemon: 12. ./obj-x86_64-linux-gnu/../contrib/poco/Foundation/include/Poco/AtomicCounter.h:314: Poco::Net::TCPServerDispatcher::run() @ 0x266b47a3 in /usr/bin/clickhouse
    2021.01.24 15:33:40.167347 [ 218027 ] {} <Fatal> BaseDaemon: 13. ./obj-x86_64-linux-gnu/../contrib/poco/Foundation/src/ThreadPool.cpp:0: Poco::PooledThread::run() @ 0x269772f5 in /usr/bin/clickhouse
    2021.01.24 15:33:40.169401 [ 218027 ] {} <Fatal> BaseDaemon: 14. ./obj-x86_64-linux-gnu/../contrib/poco/Foundation/src/Thread_POSIX.cpp:0: Poco::ThreadImpl::runnableEntry(void*) @ 0x26971847 in /usr/bin/clickhouse
    2021.01.24 15:33:40.169498 [ 218027 ] {} <Fatal> BaseDaemon: 15. start_thread @ 0x9609 in /usr/lib/x86_64-linux-gnu/libpthread-2.31.so
    2021.01.24 15:33:40.169566 [ 218027 ] {} <Fatal> BaseDaemon: 16. __clone @ 0x122293 in /usr/lib/x86_64-linux-gnu/libc-2.31.so
    2021.01.24 15:33:41.027601 [ 218027 ] {} <Fatal> BaseDaemon: Calculated checksum of the binary: 63D7491B39260494BA0D785E1860B427. There is no information about the reference checksum.

  [1]: https://clickhouse-test-reports.s3.yandex.net/19451/1e16bd6f337985a82fbdf4eded695dc6e663af58/stress_test_(address).html#fail1

v2: Fix catching errors in WriteBufferFromPocoSocket destructor
---
 src/Server/TCPHandler.cpp | 12 ++++++++++++
 src/Server/TCPHandler.h   |  1 +
 2 files changed, 13 insertions(+)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index d8c0a48bc32..f48e3507b63 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -67,6 +67,18 @@ TCPHandler::TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket
     , server_display_name(std::move(server_display_name_))
 {
 }
+TCPHandler::~TCPHandler()
+{
+    try
+    {
+        state.reset();
+        out->next();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+    }
+}
 
 void TCPHandler::runImpl()
 {
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index c650c997657..463900c18b3 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -112,6 +112,7 @@ public:
       */
     TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_,
         std::string server_display_name_);
+    ~TCPHandler() override;
 
     void run() override;
 

From d59b45e4961be70a794ee790f340034589f72683 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Fri, 5 Feb 2021 02:14:17 +0300
Subject: [PATCH 0676/1238] Fix build

---
 src/IO/BrotliReadBuffer.cpp        |  2 +-
 src/IO/HashingReadBuffer.h         | 11 ++++++-----
 src/IO/LZMAInflatingReadBuffer.cpp |  2 +-
 src/IO/LimitReadBuffer.cpp         |  2 +-
 src/IO/WriteBuffer.h               |  4 ++--
 src/IO/ZlibInflatingReadBuffer.cpp |  2 +-
 src/IO/ZstdInflatingReadBuffer.cpp |  2 +-
 7 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/IO/BrotliReadBuffer.cpp b/src/IO/BrotliReadBuffer.cpp
index 70d3a76e629..41991ad0516 100644
--- a/src/IO/BrotliReadBuffer.cpp
+++ b/src/IO/BrotliReadBuffer.cpp
@@ -77,7 +77,7 @@ bool BrotliReadBuffer::nextImpl()
         if (in->eof())
         {
             eof = true;
-            return working_buffer.size() != 0;
+            return !working_buffer.empty();
         }
         else
         {
diff --git a/src/IO/HashingReadBuffer.h b/src/IO/HashingReadBuffer.h
index 9fcd6dc6b41..08b6de69dcb 100644
--- a/src/IO/HashingReadBuffer.h
+++ b/src/IO/HashingReadBuffer.h
@@ -1,10 +1,11 @@
 #pragma once
 
-#include <IO/ReadBuffer.h>
 #include <IO/HashingWriteBuffer.h>
+#include <IO/ReadBuffer.h>
 
 namespace DB
 {
+
 /*
  * Calculates the hash from the read data. When reading, the data is read from the nested ReadBuffer.
  * Small pieces are copied into its own memory.
@@ -12,14 +13,14 @@ namespace DB
 class HashingReadBuffer : public IHashingBuffer<ReadBuffer>
 {
 public:
-    HashingReadBuffer(ReadBuffer & in_, size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) :
-        IHashingBuffer<ReadBuffer>(block_size_), in(in_)
+    explicit HashingReadBuffer(ReadBuffer & in_, size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE)
+        : IHashingBuffer<ReadBuffer>(block_size_), in(in_)
     {
         working_buffer = in.buffer();
         pos = in.position();
 
         /// calculate hash from the data already read
-        if (working_buffer.size())
+        if (!working_buffer.empty())
         {
             calculateHash(pos, working_buffer.end() - pos);
         }
@@ -39,7 +40,7 @@ private:
         return res;
     }
 
-private:
     ReadBuffer & in;
 };
+
 }
diff --git a/src/IO/LZMAInflatingReadBuffer.cpp b/src/IO/LZMAInflatingReadBuffer.cpp
index e30e8df5f9d..6a0a7e5ee31 100644
--- a/src/IO/LZMAInflatingReadBuffer.cpp
+++ b/src/IO/LZMAInflatingReadBuffer.cpp
@@ -66,7 +66,7 @@ bool LZMAInflatingReadBuffer::nextImpl()
         if (in->eof())
         {
             eof = true;
-            return working_buffer.size() != 0;
+            return !working_buffer.empty();
         }
         else
         {
diff --git a/src/IO/LimitReadBuffer.cpp b/src/IO/LimitReadBuffer.cpp
index f36facfdd99..b0d734c9ca0 100644
--- a/src/IO/LimitReadBuffer.cpp
+++ b/src/IO/LimitReadBuffer.cpp
@@ -50,7 +50,7 @@ LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exc
 LimitReadBuffer::~LimitReadBuffer()
 {
     /// Update underlying buffer's position in case when limit wasn't reached.
-    if (working_buffer.size() != 0)
+    if (!working_buffer.empty())
         in.position() = position();
 }
 
diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h
index 6abcc1c8eed..d425f813d7b 100644
--- a/src/IO/WriteBuffer.h
+++ b/src/IO/WriteBuffer.h
@@ -61,7 +61,7 @@ public:
     /** it is desirable in the derived classes to place the next() call in the destructor,
       * so that the last data is written
       */
-    virtual ~WriteBuffer() {}
+    virtual ~WriteBuffer() = default;
 
     inline void nextIfAtEnd()
     {
@@ -75,7 +75,7 @@ public:
         size_t bytes_copied = 0;
 
         /// Produces endless loop
-        assert(working_buffer.size() > 0);
+        assert(!working_buffer.empty());
 
         while (bytes_copied < n)
         {
diff --git a/src/IO/ZlibInflatingReadBuffer.cpp b/src/IO/ZlibInflatingReadBuffer.cpp
index 0b23bef1b10..bea83c74e21 100644
--- a/src/IO/ZlibInflatingReadBuffer.cpp
+++ b/src/IO/ZlibInflatingReadBuffer.cpp
@@ -70,7 +70,7 @@ bool ZlibInflatingReadBuffer::nextImpl()
         if (in->eof())
         {
             eof = true;
-            return working_buffer.size() != 0;
+            return !working_buffer.empty();
         }
         else
         {
diff --git a/src/IO/ZstdInflatingReadBuffer.cpp b/src/IO/ZstdInflatingReadBuffer.cpp
index 94a0b56fc6d..b441a6a7210 100644
--- a/src/IO/ZstdInflatingReadBuffer.cpp
+++ b/src/IO/ZstdInflatingReadBuffer.cpp
@@ -54,7 +54,7 @@ bool ZstdInflatingReadBuffer::nextImpl()
     if (in->eof())
     {
         eof = true;
-        return working_buffer.size() != 0;
+        return !working_buffer.empty();
     }
 
     return true;

From 2f31eabc13b052feb163cbe9ba13267f9bf784d7 Mon Sep 17 00:00:00 2001
From: Russ Frank <rf@oden.io>
Date: Thu, 4 Feb 2021 21:22:03 -0500
Subject: [PATCH 0677/1238] turn off ubsan for deltasum functions

---
 src/AggregateFunctions/AggregateFunctionDeltaSum.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
index 5fb6063d7e8..7d384438912 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
@@ -40,7 +40,7 @@ public:
 
     DataTypePtr getReturnType() const override { return std::make_shared<DataTypeNumber<T>>(); }
 
-    void ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         auto value = static_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
 
@@ -59,7 +59,7 @@ public:
         }
     }
 
-    void ALWAYS_INLINE merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         if ((this->data(place).last < this->data(rhs).first) && this->data(place).seen_last && this->data(rhs).seen_first)
         {

From b892fff406b2a69ed372acad528505f6f4314544 Mon Sep 17 00:00:00 2001
From: hexiaoting <hewenting_ict@163.com>
Date: Fri, 5 Feb 2021 10:31:16 +0800
Subject: [PATCH 0678/1238] Add alexey's fix: using another logic and  more
 comments

---
 src/DataTypes/NumberTraits.h                         | 12 +++++++-----
 src/Functions/tests/gtest_number_traits.cpp          |  2 +-
 .../0_stateless/01692_mod_enlarge_type.reference     |  2 +-
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/DataTypes/NumberTraits.h b/src/DataTypes/NumberTraits.h
index 4d534df0b42..14bd32c87a3 100644
--- a/src/DataTypes/NumberTraits.h
+++ b/src/DataTypes/NumberTraits.h
@@ -104,14 +104,16 @@ template <typename A, typename B> struct ResultOfIntegerDivision
         sizeof(A)>::Type;
 };
 
-/** Division with remainder you get a number with the same number of bits as in divisor.
+/** Division with remainder you get a number with the same number of bits as in divisor,
+  * or larger in case of signed type.
     */
 template <typename A, typename B> struct ResultOfModulo
 {
-    using Type0 = typename Construct<
-        is_signed_v<A> || is_signed_v<B>,
-        false,
-        (is_signed_v<A> || is_signed_v<B>) ? std::max(sizeof(A), sizeof(B)) : sizeof(B)>::Type;
+    static constexpr bool result_is_signed = is_signed_v<A>;
+    /// If modulo of division can yield negative number, we need larger type to accommodate it.
+    /// Example: toInt32(-199) % toUInt8(200) will return -199 that does not fit in Int8, only in Int16.
+    static constexpr size_t size_of_result = result_is_signed ? nextSize(sizeof(B)) : sizeof(B);
+    using Type0 = typename Construct<result_is_signed, false, size_of_result>::Type;
     using Type = std::conditional_t<std::is_floating_point_v<A> || std::is_floating_point_v<B>, Float64, Type0>;
 };
 
diff --git a/src/Functions/tests/gtest_number_traits.cpp b/src/Functions/tests/gtest_number_traits.cpp
index 7664b4fcbdc..7f25c6cbeb7 100644
--- a/src/Functions/tests/gtest_number_traits.cpp
+++ b/src/Functions/tests/gtest_number_traits.cpp
@@ -258,7 +258,7 @@ TEST(NumberTraits, Others)
     ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfFloatingPointDivision<DB::UInt16, DB::Int16>::Type()), "Float64");
     ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfFloatingPointDivision<DB::UInt32, DB::Int16>::Type()), "Float64");
     ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfIntegerDivision<DB::UInt8, DB::Int16>::Type()), "Int8");
-    ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfModulo<DB::UInt32, DB::Int8>::Type()), "Int8");
+    ASSERT_EQ(getTypeString(DB::NumberTraits::ResultOfModulo<DB::UInt32, DB::Int8>::Type()), "UInt8");
 }
 
 
diff --git a/tests/queries/0_stateless/01692_mod_enlarge_type.reference b/tests/queries/0_stateless/01692_mod_enlarge_type.reference
index 6d962821ad6..fe7df569ea2 100644
--- a/tests/queries/0_stateless/01692_mod_enlarge_type.reference
+++ b/tests/queries/0_stateless/01692_mod_enlarge_type.reference
@@ -1,2 +1,2 @@
 -199
-146	Int32
+146	Int16

From e1359b01a1cc34c7a6e5fead6568b6ecae5ba0a9 Mon Sep 17 00:00:00 2001
From: hexiaoting <hewenting_ict@163.com>
Date: Fri, 5 Feb 2021 11:11:27 +0800
Subject: [PATCH 0679/1238] Remove unnecessary codes

---
 src/Interpreters/CollectJoinOnKeysVisitor.cpp | 26 ++++++-------------
 src/Interpreters/CollectJoinOnKeysVisitor.h   |  2 +-
 2 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
index 99b8e24ff59..29e3ebc52b0 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
@@ -78,9 +78,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
     {
         ASTPtr left = func.arguments->children.at(0);
         ASTPtr right = func.arguments->children.at(1);
-        bool need_optimize = false;
-        auto table_numbers = getTableNumbers(left, right, data, &need_optimize);
-        if (!need_optimize)
+        auto table_numbers = getTableNumbers(left, right, data);
+        if (table_numbers.first != table_numbers.second)
         {
             // related to two different tables
             data.addJoinKeys(left, right, table_numbers);
@@ -104,9 +103,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
         {
             ASTPtr left = func.arguments->children.at(0);
             ASTPtr right = func.arguments->children.at(1);
-            bool need_optimize_unused = false;
-            auto table_numbers = getTableNumbers(left, right, data, &need_optimize_unused);
-            if (table_numbers.first != 0)
+            auto table_numbers = getTableNumbers(left, right, data);
+            if (table_numbers.first != table_numbers.second)
             {
                 throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'",
                     ErrorCodes::NOT_IMPLEMENTED);
@@ -126,8 +124,7 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
 
         ASTPtr left = func.arguments->children.at(0);
         ASTPtr right = func.arguments->children.at(1);
-        bool need_optimize_unused;
-        auto table_numbers = getTableNumbers(left, right, data, &need_optimize_unused);
+        auto table_numbers = getTableNumbers(left, right, data);
 
         data.addAsofJoinKeys(left, right, table_numbers, inequality);
     }
@@ -152,8 +149,9 @@ void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector<co
         getIdentifiers(child, out);
 }
 
+
 std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast,
-                                                                    Data & data, bool *need_optimize)
+                                                                    Data & data)
 {
     std::vector<const ASTIdentifier *> left_identifiers;
     std::vector<const ASTIdentifier *> right_identifiers;
@@ -162,20 +160,11 @@ std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr
     getIdentifiers(right_ast, right_identifiers);
 
     if (left_identifiers.empty() || right_identifiers.empty())
-    {
-        *need_optimize = true;
         return {0, 0};
-    }
 
     size_t left_idents_table = getTableForIdentifiers(left_identifiers, data);
     size_t right_idents_table = getTableForIdentifiers(right_identifiers, data);
 
-    if (left_idents_table && left_idents_table == right_idents_table)
-    {
-        *need_optimize = true;
-        return {0, 0};
-    }
-
     return std::make_pair(left_idents_table, right_idents_table);
 }
 
@@ -260,6 +249,7 @@ size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector<const ASTIde
                         + " are from different tables.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
         }
     }
+    assert(table_number != 0);
 
     return table_number;
 }
diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h
index 050acb87ae2..42133cf0b6e 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.h
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.h
@@ -59,7 +59,7 @@ private:
     static void visit(const ASTFunction & func, const ASTPtr & ast, Data & data);
 
     static void getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out);
-    static std::pair<size_t, size_t> getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data, bool *need_optimize);
+    static std::pair<size_t, size_t> getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data);
     static const ASTIdentifier * unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases);
     static size_t getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data);
 };

From 581495ae235a46be9942819ab5b2cc72afc945e8 Mon Sep 17 00:00:00 2001
From: zhangxiao871 <zhangxiao871@ZBMAC-C02DN6312.local>
Date: Fri, 5 Feb 2021 14:45:28 +0800
Subject: [PATCH 0680/1238] Supports system.zookeeper path IN query.

---
 docs/en/operations/system-tables/zookeeper.md |   2 +-
 docs/zh/operations/system-tables/zookeeper.md |   2 +-
 .../System/StorageSystemZooKeeper.cpp         | 179 ++++++++++++------
 3 files changed, 126 insertions(+), 57 deletions(-)

diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md
index ddb4d305964..c63434877d3 100644
--- a/docs/en/operations/system-tables/zookeeper.md
+++ b/docs/en/operations/system-tables/zookeeper.md
@@ -1,7 +1,7 @@
 # system.zookeeper {#system-zookeeper}
 
 The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config.
-The query must have a ‘path’ equality condition in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for.
+The query must have a ‘path’ equality condition in the WHERE clause, or ‘path’ condition in the set in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for.
 
 The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node.
 To output data for all root nodes, write path = ‘/’.
diff --git a/docs/zh/operations/system-tables/zookeeper.md b/docs/zh/operations/system-tables/zookeeper.md
index b66e5262df3..930148ba639 100644
--- a/docs/zh/operations/system-tables/zookeeper.md
+++ b/docs/zh/operations/system-tables/zookeeper.md
@@ -6,7 +6,7 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
 # 系统。动物园管理员 {#system-zookeeper}
 
 如果未配置ZooKeeper，则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。
-查询必须具有 ‘path’ WHERE子句中的平等条件。 这是ZooKeeper中您想要获取数据的孩子的路径。
+查询必须具有 ‘path’ WHERE子句中的相等条件或者在某个集合中的条件。 这是ZooKeeper中您想要获取数据的孩子的路径。
 
 查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。
 要输出所有根节点的数据，write path= ‘/’.
diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index 287650ef86c..8fa5ccbd630 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -12,6 +12,9 @@
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/typeid_cast.h>
+#include <Parsers/ASTSubquery.h>
+#include <Interpreters/Set.h>
+#include <Interpreters/interpretSubquery.h>
 
 
 namespace DB
@@ -43,8 +46,24 @@ NamesAndTypesList StorageSystemZooKeeper::getNamesAndTypes()
     };
 }
 
+using Paths = Strings;
 
-static bool extractPathImpl(const IAST & elem, String & res, const Context & context)
+static String pathCorrected(const String & path)
+{
+    String path_corrected;
+    /// path should starts with '/', otherwise ZBADARGUMENTS will be thrown in
+    /// ZooKeeper::sendThread and the session will fail.
+    if (path[0] != '/')
+        path_corrected = '/';
+    path_corrected += path;
+    /// In all cases except the root, path must not end with a slash.
+    if (path_corrected != "/" && path_corrected.back() == '/')
+        path_corrected.resize(path_corrected.size() - 1);
+    return path_corrected;
+}
+
+
+static bool extractPathImpl(const IAST & elem, Paths & res, const Context & context)
 {
     const auto * function = elem.as<ASTFunction>();
     if (!function)
@@ -59,15 +78,65 @@ static bool extractPathImpl(const IAST & elem, String & res, const Context & con
         return false;
     }
 
-    if (function->name == "equals")
-    {
-        const auto & args = function->arguments->as<ASTExpressionList &>();
-        ASTPtr value;
+    const auto & args = function->arguments->as<ASTExpressionList &>();
+    if (args.children.size() != 2)
+        return false;
 
-        if (args.children.size() != 2)
+    if (function->name == "in")
+    {
+        const ASTIdentifier * ident = args.children.at(0)->as<ASTIdentifier>();
+        if (!ident || ident->name() != "path")
             return false;
 
+        ASTPtr value = args.children.at(1);
+
+        if (value->as<ASTSubquery>())
+        {
+            auto interpreter_subquery = interpretSubquery(value, context, {}, {});
+            auto stream = interpreter_subquery->execute().getInputStream();
+            SizeLimits limites(context.getSettingsRef().max_rows_in_set, context.getSettingsRef().max_bytes_in_set, OverflowMode::THROW);
+            Set set(limites, true, context.getSettingsRef().transform_null_in);
+            set.setHeader(stream->getHeader());
+
+            stream->readPrefix();
+            while (Block block = stream->read())
+            {
+                set.insertFromBlock(block);
+            }
+            set.finishInsert();
+            stream->readSuffix();
+
+            set.checkColumnsNumber(1);
+            const auto & set_column = *set.getSetElements()[0];
+            for (size_t row = 0; row < set_column.size(); ++row)
+                res.emplace_back(set_column[row].safeGet<String>());
+        }
+        else
+        {
+            auto evaluated = evaluateConstantExpressionAsLiteral(value, context);
+            const auto * literal = evaluated->as<ASTLiteral>();
+            if (!literal)
+                return false;
+
+            if (String str; literal->value.tryGet(str))
+            {
+                res.emplace_back(str);
+            }
+            else if (Tuple tuple; literal->value.tryGet(tuple))
+            {
+                for (auto element : tuple)
+                    res.emplace_back(element.safeGet<String>());
+            }
+            else
+                return false;
+        }
+
+        return true;
+    }
+    else if (function->name == "equals")
+    {
         const ASTIdentifier * ident;
+        ASTPtr value;
         if ((ident = args.children.at(0)->as<ASTIdentifier>()))
             value = args.children.at(1);
         else if ((ident = args.children.at(1)->as<ASTIdentifier>()))
@@ -86,7 +155,7 @@ static bool extractPathImpl(const IAST & elem, String & res, const Context & con
         if (literal->value.getType() != Field::Types::String)
             return false;
 
-        res = literal->value.safeGet<String>();
+        res.emplace_back(literal->value.safeGet<String>());
         return true;
     }
 
@@ -96,69 +165,69 @@ static bool extractPathImpl(const IAST & elem, String & res, const Context & con
 
 /** Retrieve from the query a condition of the form `path = 'path'`, from conjunctions in the WHERE clause.
   */
-static String extractPath(const ASTPtr & query, const Context & context)
+static Paths extractPath(const ASTPtr & query, const Context & context)
 {
     const auto & select = query->as<ASTSelectQuery &>();
     if (!select.where())
-        return "";
+        return Paths();
 
-    String res;
-    return extractPathImpl(*select.where(), res, context) ? res : "";
+    Paths res;
+    return extractPathImpl(*select.where(), res, context) ? res : Paths();
 }
 
 
 void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const
 {
-    String path = extractPath(query_info.query, context);
-    if (path.empty())
-        throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' in WHERE clause.", ErrorCodes::BAD_ARGUMENTS);
+    const Paths & paths = extractPath(query_info.query, context);
+    if (paths.empty())
+        throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' or path IN ('path1','path2'...) or path IN (subquery) in WHERE clause.", ErrorCodes::BAD_ARGUMENTS);
 
     zkutil::ZooKeeperPtr zookeeper = context.getZooKeeper();
 
-    String path_corrected;
-    /// path should starts with '/', otherwise ZBADARGUMENTS will be thrown in
-    /// ZooKeeper::sendThread and the session will fail.
-    if (path[0] != '/')
-        path_corrected = '/';
-    path_corrected += path;
-    /// In all cases except the root, path must not end with a slash.
-    if (path_corrected != "/" && path_corrected.back() == '/')
-        path_corrected.resize(path_corrected.size() - 1);
-
-    zkutil::Strings nodes = zookeeper->getChildren(path_corrected);
-
-    String path_part = path_corrected;
-    if (path_part == "/")
-        path_part.clear();
-
-    std::vector<std::future<Coordination::GetResponse>> futures;
-    futures.reserve(nodes.size());
-    for (const String & node : nodes)
-        futures.push_back(zookeeper->asyncTryGet(path_part + '/' + node));
-
-    for (size_t i = 0, size = nodes.size(); i < size; ++i)
+    std::unordered_set<String> paths_corrected;
+    for (const auto & path : paths)
     {
-        auto res = futures[i].get();
-        if (res.error == Coordination::Error::ZNONODE)
-            continue;   /// Node was deleted meanwhile.
+        const String & path_corrected = pathCorrected(path);
+        auto [it, inserted] = paths_corrected.emplace(path_corrected);
+        if (!inserted) /// Do not repeat processing.
+            continue;
 
-        const Coordination::Stat & stat = res.stat;
+        zkutil::Strings nodes = zookeeper->getChildren(path_corrected);
 
-        size_t col_num = 0;
-        res_columns[col_num++]->insert(nodes[i]);
-        res_columns[col_num++]->insert(res.data);
-        res_columns[col_num++]->insert(stat.czxid);
-        res_columns[col_num++]->insert(stat.mzxid);
-        res_columns[col_num++]->insert(UInt64(stat.ctime / 1000));
-        res_columns[col_num++]->insert(UInt64(stat.mtime / 1000));
-        res_columns[col_num++]->insert(stat.version);
-        res_columns[col_num++]->insert(stat.cversion);
-        res_columns[col_num++]->insert(stat.aversion);
-        res_columns[col_num++]->insert(stat.ephemeralOwner);
-        res_columns[col_num++]->insert(stat.dataLength);
-        res_columns[col_num++]->insert(stat.numChildren);
-        res_columns[col_num++]->insert(stat.pzxid);
-        res_columns[col_num++]->insert(path);          /// This is the original path. In order to process the request, condition in WHERE should be triggered.
+        String path_part = path_corrected;
+        if (path_part == "/")
+            path_part.clear();
+
+        std::vector<std::future<Coordination::GetResponse>> futures;
+        futures.reserve(nodes.size());
+        for (const String & node : nodes)
+            futures.push_back(zookeeper->asyncTryGet(path_part + '/' + node));
+
+        for (size_t i = 0, size = nodes.size(); i < size; ++i)
+        {
+            auto res = futures[i].get();
+            if (res.error == Coordination::Error::ZNONODE)
+                continue; /// Node was deleted meanwhile.
+
+            const Coordination::Stat & stat = res.stat;
+
+            size_t col_num = 0;
+            res_columns[col_num++]->insert(nodes[i]);
+            res_columns[col_num++]->insert(res.data);
+            res_columns[col_num++]->insert(stat.czxid);
+            res_columns[col_num++]->insert(stat.mzxid);
+            res_columns[col_num++]->insert(UInt64(stat.ctime / 1000));
+            res_columns[col_num++]->insert(UInt64(stat.mtime / 1000));
+            res_columns[col_num++]->insert(stat.version);
+            res_columns[col_num++]->insert(stat.cversion);
+            res_columns[col_num++]->insert(stat.aversion);
+            res_columns[col_num++]->insert(stat.ephemeralOwner);
+            res_columns[col_num++]->insert(stat.dataLength);
+            res_columns[col_num++]->insert(stat.numChildren);
+            res_columns[col_num++]->insert(stat.pzxid);
+            res_columns[col_num++]->insert(
+                path); /// This is the original path. In order to process the request, condition in WHERE should be triggered.
+        }
     }
 }
 

From 52b52ede226282fe609c766566b806206c5985cb Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Fri, 5 Feb 2021 10:06:17 +0300
Subject: [PATCH 0681/1238] Update
 00840_long_concurrent_select_and_drop_deadlock.sh

---
 .../00840_long_concurrent_select_and_drop_deadlock.sh           | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh
index f7659bc3728..60a2d8eb9a0 100755
--- a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh
+++ b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh
@@ -1,7 +1,5 @@
 #!/usr/bin/env bash
 
-# remove this comment before merge
-
 set -e
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)

From a1bcd4b128622ec2db79ea7564c60664784b52cc Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Fri, 5 Feb 2021 10:06:39 +0300
Subject: [PATCH 0682/1238] Update 01232_preparing_sets_race_condition_long.sh

---
 .../0_stateless/01232_preparing_sets_race_condition_long.sh     | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/queries/0_stateless/01232_preparing_sets_race_condition_long.sh b/tests/queries/0_stateless/01232_preparing_sets_race_condition_long.sh
index 5aaac7cd86e..e42e68a6589 100755
--- a/tests/queries/0_stateless/01232_preparing_sets_race_condition_long.sh
+++ b/tests/queries/0_stateless/01232_preparing_sets_race_condition_long.sh
@@ -1,7 +1,5 @@
 #!/usr/bin/env bash
 
-# remove this comment before merge
-
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh

From cb791dbc7fc3b71290a7f86c6e5494cbc14bd977 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Fri, 5 Feb 2021 10:06:53 +0300
Subject: [PATCH 0683/1238] Update 01443_merge_truncate_long.sh

---
 tests/queries/0_stateless/01443_merge_truncate_long.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/queries/0_stateless/01443_merge_truncate_long.sh b/tests/queries/0_stateless/01443_merge_truncate_long.sh
index 538e457a5d8..ffd5f225ffe 100755
--- a/tests/queries/0_stateless/01443_merge_truncate_long.sh
+++ b/tests/queries/0_stateless/01443_merge_truncate_long.sh
@@ -1,7 +1,5 @@
 #!/usr/bin/env bash
 
-# remove this comment before merge
-
 set -e
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)

From ab98040003b5e6c3e324f19b6c11c26fb0c8c96e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 5 Feb 2021 10:15:28 +0300
Subject: [PATCH 0684/1238] More logs

---
 src/Coordination/LoggerWrapper.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Coordination/LoggerWrapper.h b/src/Coordination/LoggerWrapper.h
index 00d4c6544a5..fcc24edea14 100644
--- a/src/Coordination/LoggerWrapper.h
+++ b/src/Coordination/LoggerWrapper.h
@@ -11,7 +11,7 @@ class LoggerWrapper : public nuraft::logger
 public:
     LoggerWrapper(const std::string & name)
         : log(&Poco::Logger::get(name))
-        , level(4)
+        , level(6)
     {
         log->setLevel(level);
     }

From 3becb80c13f21f54f29229f596cfa6e93e063d23 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Thu, 4 Feb 2021 23:43:03 -0800
Subject: [PATCH 0685/1238] Docs - date_add, date_diff

---
 .../functions/date-time-functions.md          | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 9de780fb596..967f489f2ab 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -435,6 +435,87 @@ Result:
 
 -   [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone)
 
+## date\_add {#date_add}
+
+Adds specified date/time interval to the provided date.
+
+**Syntax** 
+
+``` sql
+date_add(unit, value, date)
+```
+
+Aliases: `dateAdd`, `DATE_ADD`. 
+
+**Parameters**
+
+-   `unit` - The unit of time - [String](../syntax.md#syntax-string-literal).
+    Possible values:
+
+    - `second`
+    - `minute`
+    - `hour`
+    - `day`
+    - `week`
+    - `month`
+    - `quarter`
+    - `year`
+-   `value` - Amount of the specified unit of time.    
+-   `date` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+
+
+**Example**
+
+```sql
+select DATE_ADD(YEAR, 3, toDate('2018-01-01'));
+```
+
+```text
+┌─plus(toDate('2018-01-01'), toIntervalYear(3))─┐
+│                                    2021-01-01 │
+└───────────────────────────────────────────────┘
+```
+
+## date\_diff {#date_diff}
+
+Returns the difference between two dates in terms of the specified unit.
+
+**Syntax** 
+
+``` sql
+date_sub(unit, date1, date2)
+```
+
+Aliases: `date_diff`, `DATE_DIFF`. 
+
+**Parameters**
+
+-   `unit` - The unit of time - [String](../syntax.md#syntax-string-literal).
+    Possible values:
+
+    - `second`
+    - `minute`
+    - `hour`
+    - `day`
+    - `week`
+    - `month`
+    - `quarter`
+    - `year`
+    
+-   `date1`, `date2` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+
+**Example**
+
+```sql
+select DATE_DIFF(MONTH, toDate('2018-12-18'), toDate('2018-01-01'));
+```
+
+```text
+┌─dateDiff('month', toDate('2018-12-18'), toDate('2018-01-01'))─┐
+│                                                           -11 │
+└───────────────────────────────────────────────────────────────┘
+```
+    
 ## now {#now}
 
 Returns the current date and time. 

From c6c1541c9f8154aafdc66f1a37592454d2b565f0 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Fri, 5 Feb 2021 10:53:26 +0300
Subject: [PATCH 0686/1238] Remove assert from CollectJoinOnKeysVisitor.cpp

---
 src/Interpreters/CollectJoinOnKeysVisitor.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
index 29e3ebc52b0..ba151b7f903 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
@@ -249,7 +249,6 @@ size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector<const ASTIde
                         + " are from different tables.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
         }
     }
-    assert(table_number != 0);
 
     return table_number;
 }

From 606c914bb5b8dd602da08f75580d22823f8b81c0 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Fri, 5 Feb 2021 00:03:45 -0800
Subject: [PATCH 0687/1238] Docs - timestamp_add|sub

---
 .../functions/date-time-functions.md          | 80 +++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 967f489f2ab..d0288d25074 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -515,6 +515,86 @@ select DATE_DIFF(MONTH, toDate('2018-12-18'), toDate('2018-01-01'));
 │                                                           -11 │
 └───────────────────────────────────────────────────────────────┘
 ```
+
+## timestamp\_add {#timestamp_add}
+
+Adds the specified time value with the provided date or date time value.
+
+**Syntax** 
+
+``` sql
+timestamp_add(date, INTERVAL value unit)
+```
+
+Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. 
+
+**Parameters**
+    
+-   `date` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+-   `value` -  Amount of the specified unit of time - [String](../syntax.md#syntax-string-literal)
+-   `unit` - The unit of time interval - [String](../syntax.md#syntax-string-literal).
+    Possible values:
+
+    - `second`
+    - `minute`
+    - `hour`
+    - `day`
+    - `week`
+    - `month`
+    - `quarter`
+    - `year`
+    
+**Example**
+
+```sql
+select timestamp_add(toDate('2018-01-01'), INTERVAL 3 MONTH);
+```
+
+```text
+┌─plus(toDate('2018-01-01'), toIntervalMonth(3))─┐
+│                                     2018-04-01 │
+└────────────────────────────────────────────────┘
+```
+
+## timestamp\_sub {#timestamp_sub}
+
+Returns the difference between two dates in terms of the specified unit.
+
+**Syntax** 
+
+``` sql
+timestamp_sub(unit, value, date)
+```
+
+Aliases: `timeStampSub`, `TIMESTAMP_SUB`. 
+
+**Parameters**
+
+-   `unit` - The unit of time - [String](../syntax.md#syntax-string-literal).
+    Possible values:
+
+    - `second`
+    - `minute`
+    - `hour`
+    - `day`
+    - `week`
+    - `month`
+    - `quarter`
+    - `year`
+- value -  Amount of the specified unit of time. [String](../syntax.md#syntax-string-literal).   
+-   `date1`, `date2` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+
+**Example**
+
+```sql
+select timestamp_sub(MONTH, 5, toDateTime('2018-12-18 01:02:03'));
+```
+
+```text
+┌─minus(toDateTime('2018-12-18 01:02:03'), toIntervalMonth(5))─┐
+│                                          2018-07-18 01:02:03 │
+└──────────────────────────────────────────────────────────────┘
+```
     
 ## now {#now}
 

From 8c60e84067e1bac3f746c4c3b9b9faaaf9235bc8 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Fri, 5 Feb 2021 00:04:56 -0800
Subject: [PATCH 0688/1238] Docs - minor fixes

---
 .../sql-reference/functions/date-time-functions.md  | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index d0288d25074..fa4ea7a739e 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -467,7 +467,7 @@ Aliases: `dateAdd`, `DATE_ADD`.
 **Example**
 
 ```sql
-select DATE_ADD(YEAR, 3, toDate('2018-01-01'));
+select date_add(YEAR, 3, toDate('2018-01-01'));
 ```
 
 ```text
@@ -501,13 +501,12 @@ Aliases: `date_diff`, `DATE_DIFF`.
     - `month`
     - `quarter`
     - `year`
-    
--   `date1`, `date2` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+-   `date1`,`date2` - Dates or Dates with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 
 **Example**
 
 ```sql
-select DATE_DIFF(MONTH, toDate('2018-12-18'), toDate('2018-01-01'));
+select date_diff(MONTH, toDate('2018-12-18'), toDate('2018-01-01'));
 ```
 
 ```text
@@ -558,7 +557,7 @@ select timestamp_add(toDate('2018-01-01'), INTERVAL 3 MONTH);
 
 ## timestamp\_sub {#timestamp_sub}
 
-Returns the difference between two dates in terms of the specified unit.
+Returns the difference between two dates in the specified unit.
 
 **Syntax** 
 
@@ -581,8 +580,8 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`.
     - `month`
     - `quarter`
     - `year`
-- value -  Amount of the specified unit of time. [String](../syntax.md#syntax-string-literal).   
--   `date1`, `date2` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+- `value` -  Amount of the specified unit of time. [String](../syntax.md#syntax-string-literal).   
+- `date1`, `date2` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 
 **Example**
 

From 5ca00072551b5494920c4e8885ba1923e6fe8e84 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Fri, 5 Feb 2021 11:14:51 +0300
Subject: [PATCH 0689/1238] Updated description

---
 .../aggregate-functions/reference/argmax.md   | 49 +++++++++++++++++--
 .../aggregate-functions/reference/argmin.md   | 49 +++++++++++++++++--
 2 files changed, 88 insertions(+), 10 deletions(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
index 35e87d49e60..fe30de95eab 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
@@ -4,13 +4,46 @@ toc_priority: 106
 
 # argMax {#agg-function-argmax}
 
-Syntax: `argMax(arg, val)` or `argMax(tuple(arg, val))`
+Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, returns the first of these values encountered.
 
-Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, the first of these values encountered is output.
+Tuple version of this function will return the tuple with the maximum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
 
-Tuple version of this function will return the tuple with the maximum `val` value. It is convinient for use with `SimpleAggregateFunction`.
+**Syntax**
 
-**Example:**
+``` sql
+argMax(arg, val)
+```
+
+or
+
+``` sql
+argMax(tuple(arg, val))
+```
+
+**Parameters**
+
+-   `arg` — Argument.
+-   `val` — Value.
+
+or 
+
+-   `tuple(arg, val)` — [Tuple](../../../sql-reference/data-types/tuple.md).
+
+**Returned value**
+
+-   `arg` value that corresponds to maximum `val` value.
+
+Type: matches `arg` type. 
+
+For tuple in the input:
+
+-   Tuple with maximum `val` value.
+
+Type: [Tuple](../../../sql-reference/data-types/tuple.md).
+
+**Example**
+
+Input table:
 
 ``` text
 ┌─user─────┬─salary─┐
@@ -20,12 +53,18 @@ Tuple version of this function will return the tuple with the maximum `val` valu
 └──────────┴────────┘
 ```
 
+Query:
+
 ``` sql
-SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary
+SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary;
 ```
 
+Result:
+
 ``` text
 ┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
 │ director             │ ('director',5000)           │
 └──────────────────────┴─────────────────────────────┘
 ```
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
index 72c9bce6817..484c1c1161f 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
@@ -4,13 +4,46 @@ toc_priority: 105
 
 # argMin {#agg-function-argmin}
 
-Syntax: `argMin(arg, val)` or `argMin(tuple(arg, val))`
+Calculates the `arg` value for a minimal `val` value. If there are several different values of `arg` for minimal values of `val`, returns the first of these values encountered.
 
-Calculates the `arg` value for a minimal `val` value. If there are several different values of `arg` for minimal values of `val`, the first of these values encountered is output.
+Tuple version of this function will return the tuple with the minimal `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
 
-Tuple version of this function will return the tuple with the minimal `val` value. It is convinient for use with `SimpleAggregateFunction`.
+**Syntax**
 
-**Example:**
+``` sql
+argMin(arg, val)
+```
+
+or
+
+``` sql
+argMin(tuple(arg, val))
+```
+
+**Parameters**
+
+-   `arg` — Argument.
+-   `val` — Value.
+
+or 
+
+-   `tuple(arg, val)` — [Tuple](../../../sql-reference/data-types/tuple.md).
+
+**Returned value**
+
+-   `arg` value that corresponds to minimal `val` value.
+
+Type: matches `arg` type. 
+
+For tuple in the input:
+
+-   Tuple with minimal `val` value.
+
+Type: [Tuple](../../../sql-reference/data-types/tuple.md).
+
+**Example**
+
+Input table:
 
 ``` text
 ┌─user─────┬─salary─┐
@@ -20,12 +53,18 @@ Tuple version of this function will return the tuple with the minimal `val` valu
 └──────────┴────────┘
 ```
 
+Query:
+
 ``` sql
-SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary
+SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary;
 ```
 
+Result:
+
 ``` text
 ┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐
 │ worker               │ ('worker',1000)             │
 └──────────────────────┴─────────────────────────────┘
 ```
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->

From 38c2add4f71e22add7d4c9e6dd267d948fffdf68 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Fri, 5 Feb 2021 11:15:11 +0300
Subject: [PATCH 0690/1238] Added translation

---
 .../aggregate-functions/reference/argmax.md   | 65 ++++++++++++++++++-
 .../aggregate-functions/reference/argmin.md   | 55 +++++++++++++---
 2 files changed, 109 insertions(+), 11 deletions(-)

diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
index 97edd5773c8..2ff7afb4ad6 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
@@ -4,8 +4,67 @@ toc_priority: 106
 
 # argMax {#agg-function-argmax}
 
-Синтаксис: `argMax(arg, val)`
+Вычисляет значение `arg` при максимальном значении `val`. Если есть несколько разных значений `arg` для максимальных значений `val`, возвращает первое попавшееся из таких значений.
 
-Вычисляет значение arg при максимальном значении val. Если есть несколько разных значений arg для максимальных значений val, то выдаётся первое попавшееся из таких значений.
+Если функции передан кортеж, то будет выведен кортеж с максимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
 
-[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
+**Синтаксис**
+
+``` sql
+argMax(arg, val)
+```
+
+или
+
+``` sql
+argMax(tuple(arg, val))
+```
+
+**Параметры**
+
+-   `arg` — аргумент.
+-   `val` — значение.
+
+или
+
+-   `tuple(arg, val)` — [Tuple](../../../sql-reference/data-types/tuple.md).
+
+**Возвращаемое значение**
+
+-   Значение `arg`, соответствующее максимальному значению `val`.
+
+Тип: соответствует типу `arg`. 
+
+Если передан кортеж:
+
+-   Кортеж с максимальным значением `val`.
+
+Тип: [Tuple](../../../sql-reference/data-types/tuple.md).
+
+**Пример**
+
+Таблица ввода:
+
+``` text
+┌─user─────┬─salary─┐
+│ director │   5000 │
+│ manager  │   3000 │
+│ worker   │   1000 │
+└──────────┴────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary;
+```
+
+Результат:
+
+``` text
+┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
+│ director             │ ('director',5000)           │
+└──────────────────────┴─────────────────────────────┘
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
index 58161cd226a..2d4095db30b 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
@@ -4,11 +4,46 @@ toc_priority: 105
 
 # argMin {#agg-function-argmin}
 
-Синтаксис: `argMin(arg, val)`
+Вычисляет значение `arg` при минимальном значении `val`. Если есть несколько разных значений `arg` для минимальных значений `val`, возвращает первое попавшееся из таких значений.
 
-Вычисляет значение arg при минимальном значении val. Если есть несколько разных значений arg для минимальных значений val, то выдаётся первое попавшееся из таких значений.
+Если функции передан кортеж, то будет выведен кортеж с минимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
 
-**Пример:**
+**Синтаксис**
+
+``` sql
+argMin(arg, val)
+```
+
+или
+
+``` sql
+argMin(tuple(arg, val))
+```
+
+**Параметры**
+
+-   `arg` — аргумент.
+-   `val` — значение.
+
+или
+
+-   `tuple(arg, val)` — [Tuple](../../../sql-reference/data-types/tuple.md).
+
+**Возвращаемое значение**
+
+-   Значение `arg`, соответствующее минимальному значению `val`.
+
+Тип: соответствует типу `arg`. 
+
+Если передан кортеж:
+
+-   Кортеж с минимальным значением `val`.
+
+Тип: [Tuple](../../../sql-reference/data-types/tuple.md).
+
+**Пример**
+
+Таблица ввода:
 
 ``` text
 ┌─user─────┬─salary─┐
@@ -18,14 +53,18 @@ toc_priority: 105
 └──────────┴────────┘
 ```
 
+Запрос:
+
 ``` sql
-SELECT argMin(user, salary) FROM salary
+SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary;
 ```
 
+Результат:
+
 ``` text
-┌─argMin(user, salary)─┐
-│ worker               │
-└──────────────────────┘
+┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐
+│ worker               │ ('worker',1000)             │
+└──────────────────────┴─────────────────────────────┘
 ```
 
-[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->

From fd74aca6c52b77a2bf524f6acffce61cfd6567ae Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Fri, 5 Feb 2021 11:41:49 +0300
Subject: [PATCH 0691/1238] Update
 docs/en/operations/settings/merge-tree-settings.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/en/operations/settings/merge-tree-settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md
index 88ce026476f..77b68715ba9 100644
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@@ -190,7 +190,7 @@ During startup ClickHouse reads all parts of all tables (reads files with metada
 
 Limits the maximum number of partitions that can be accessed in one query.
 
-The setting value specified when table is created can be overridden via query-level setting.
+The setting value specified when the table is created can be overridden via query-level setting.
 
 Possible values:
 

From 29c01ed9d0dedbc6566bc41ac629e06b64865af6 Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Fri, 5 Feb 2021 11:41:57 +0300
Subject: [PATCH 0692/1238] Update
 docs/en/engines/table-engines/mergetree-family/mergetree.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/en/engines/table-engines/mergetree-family/mergetree.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 6781961254e..6580448792c 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -101,7 +101,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
     -   `max_parts_in_total` — Maximum number of parts in all partitions.
 	-   `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
 	-   `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
-    -   `max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. Can be overriden with query-level [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) setting.
+    -   `max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) in the global setting.
     
 **Example of Sections Setting**
 

From 1c51b4a4e4f5b87ca14238858468bec7f9ff869d Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Fri, 5 Feb 2021 11:47:02 +0300
Subject: [PATCH 0693/1238] MongoDB table engine now establishes connection
 only when it reads data.

---
 src/Storages/StorageMongoDB.cpp | 31 ++++++++++++++++++++++---------
 src/Storages/StorageMongoDB.h   | 17 ++++++++++-------
 2 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp
index be1159b1a63..09fd413af75 100644
--- a/src/Storages/StorageMongoDB.cpp
+++ b/src/Storages/StorageMongoDB.cpp
@@ -42,7 +42,6 @@ StorageMongoDB::StorageMongoDB(
     , collection_name(collection_name_)
     , username(username_)
     , password(password_)
-    , connection{std::make_shared<Poco::MongoDB::Connection>(host, port)}
 {
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(columns_);
@@ -51,6 +50,26 @@ StorageMongoDB::StorageMongoDB(
 }
 
 
+void StorageMongoDB::connectIfNotConnected()
+{
+    std::lock_guard lock{connection_mutex};
+    if (!connection)
+        connection = std::make_shared<Poco::MongoDB::Connection>(host, port);
+
+    if (!authentified)
+    {
+#       if POCO_VERSION >= 0x01070800
+            Poco::MongoDB::Database poco_db(database_name);
+            if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1))
+                throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
+#       else
+            authenticate(*connection, database_name, username, password);
+#       endif
+        authentified = true;
+    }
+}
+
+
 Pipe StorageMongoDB::read(
     const Names & column_names,
     const StorageMetadataPtr & metadata_snapshot,
@@ -60,15 +79,9 @@ Pipe StorageMongoDB::read(
     size_t max_block_size,
     unsigned)
 {
-    metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
+    connectIfNotConnected();
 
-#if POCO_VERSION >= 0x01070800
-    Poco::MongoDB::Database poco_db(database_name);
-    if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1))
-        throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
-#else
-    authenticate(*connection, database_name, username, password);
-#endif
+    metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
 
     Block sample_block;
     for (const String & column_name : column_names)
diff --git a/src/Storages/StorageMongoDB.h b/src/Storages/StorageMongoDB.h
index d7b71495574..54706337e3e 100644
--- a/src/Storages/StorageMongoDB.h
+++ b/src/Storages/StorageMongoDB.h
@@ -40,16 +40,19 @@ public:
         size_t max_block_size,
         unsigned num_streams) override;
 
-
 private:
-    std::string host;
-    short unsigned int port;
-    std::string database_name;
-    std::string collection_name;
-    std::string username;
-    std::string password;
+    void connectIfNotConnected();
+
+    const std::string host;
+    const short unsigned int port;
+    const std::string database_name;
+    const std::string collection_name;
+    const std::string username;
+    const std::string password;
 
     std::shared_ptr<Poco::MongoDB::Connection> connection;
+    bool authentified = false;
+    std::mutex connection_mutex;
 };
 
 }

From 7738f86ec8902fe0ab2868bedc283c643de8772d Mon Sep 17 00:00:00 2001
From: zhangxiao871 <zhangxiao871@ZBMAC-C02DN6312.local>
Date: Fri, 5 Feb 2021 17:02:27 +0800
Subject: [PATCH 0694/1238] add test.

---
 .../0_stateless/01700_system_zookeeper_path_in.reference      | 4 ++++
 tests/queries/0_stateless/01700_system_zookeeper_path_in.sql  | 3 +++
 2 files changed, 7 insertions(+)
 create mode 100644 tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
 create mode 100644 tests/queries/0_stateless/01700_system_zookeeper_path_in.sql

diff --git a/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference b/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
new file mode 100644
index 00000000000..9af4379148a
--- /dev/null
+++ b/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
@@ -0,0 +1,4 @@
+clickhouse
+clickhouse
+clickhouse
+task_queue
diff --git a/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql b/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql
new file mode 100644
index 00000000000..4ffded411ee
--- /dev/null
+++ b/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql
@@ -0,0 +1,3 @@
+SELECT name FROM system.zookeeper WHERE path = '/';
+SELECT name FROM system.zookeeper WHERE path IN ('/');
+SELECT name FROM system.zookeeper WHERE path IN ('/','/clickhouse');
\ No newline at end of file

From e00e7ae56ed97c9b68ea25751c6cf3ae8d1ea0c8 Mon Sep 17 00:00:00 2001
From: zhangxiao871 <zhangxiao871@ZBMAC-C02DN6312.local>
Date: Fri, 5 Feb 2021 17:08:13 +0800
Subject: [PATCH 0695/1238] add test.

---
 .../0_stateless/01700_system_zookeeper_path_in.reference   | 7 +++++--
 .../queries/0_stateless/01700_system_zookeeper_path_in.sql | 5 ++++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference b/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
index 9af4379148a..78462f9fc0e 100644
--- a/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
+++ b/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
@@ -1,4 +1,7 @@
 clickhouse
-clickhouse
-clickhouse
 task_queue
+clickhouse
+task_queue
+clickhouse
+task_queue
+ddl
diff --git a/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql b/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql
index 4ffded411ee..a5c7488ef97 100644
--- a/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql
+++ b/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql
@@ -1,3 +1,6 @@
 SELECT name FROM system.zookeeper WHERE path = '/';
+SELECT name FROM system.zookeeper WHERE path = 'clickhouse';
 SELECT name FROM system.zookeeper WHERE path IN ('/');
-SELECT name FROM system.zookeeper WHERE path IN ('/','/clickhouse');
\ No newline at end of file
+SELECT name FROM system.zookeeper WHERE path IN ('clickhouse');
+SELECT name FROM system.zookeeper WHERE path IN ('/','/clickhouse');
+SELECT name FROM system.zookeeper WHERE path IN (SELECT concat('/clickhouse/',name) FROM system.zookeeper WHERE (path = '/clickhouse/'));
\ No newline at end of file

From 6824f13a3567b00caf804475b8a1911dbcbee003 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 5 Feb 2021 12:13:19 +0300
Subject: [PATCH 0696/1238] tmp

---
 programs/client/QueryFuzzer.cpp               |  10 +
 src/Interpreters/WindowDescription.cpp        |  20 +-
 src/Interpreters/WindowDescription.h          |  12 +-
 src/Parsers/ASTWindowDefinition.cpp           |   4 +-
 src/Parsers/ExpressionElementParsers.cpp      |  19 +-
 src/Processors/QueryPlan/WindowStep.cpp       |  19 +-
 src/Processors/Transforms/WindowTransform.cpp | 187 ++++++++++++++----
 src/Processors/Transforms/WindowTransform.h   |   4 +-
 8 files changed, 222 insertions(+), 53 deletions(-)

diff --git a/programs/client/QueryFuzzer.cpp b/programs/client/QueryFuzzer.cpp
index 05c20434820..8d8d8daaf39 100644
--- a/programs/client/QueryFuzzer.cpp
+++ b/programs/client/QueryFuzzer.cpp
@@ -363,6 +363,16 @@ void QueryFuzzer::fuzzWindowFrame(WindowFrame & frame)
             frame.end_offset = getRandomField(0).get<Int64>();
             break;
         }
+        case 5:
+        {
+            frame.begin_preceding = fuzz_rand() % 2;
+            break;
+        }
+        case 6:
+        {
+            frame.end_preceding = fuzz_rand() % 2;
+            break;
+        }
         default:
             break;
     }
diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp
index 6e72f056b16..b5fe91188fd 100644
--- a/src/Interpreters/WindowDescription.cpp
+++ b/src/Interpreters/WindowDescription.cpp
@@ -60,7 +60,7 @@ void WindowFrame::toString(WriteBuffer & buf) const
     {
         buf << abs(begin_offset);
         buf << " "
-            << (begin_offset > 0 ? "FOLLOWING" : "PRECEDING");
+            << (begin_preceding ? "PRECEDING" : "FOLLOWING");
     }
     buf << " AND ";
     if (end_type == BoundaryType::Current)
@@ -75,12 +75,17 @@ void WindowFrame::toString(WriteBuffer & buf) const
     {
         buf << abs(end_offset);
         buf << " "
-            << (end_offset > 0 ? "FOLLOWING" : "PRECEDING");
+            << (end_preceding ? "PRECEDING" : "FOLLOWING");
     }
 }
 
 void WindowFrame::checkValid() const
 {
+    // UNBOUNDED PRECEDING end and UNBOUNDED FOLLOWING start should have been
+    // forbidden at the parsing level.
+    assert(!(begin_type == BoundaryType::Unbounded && !begin_preceding));
+    assert(!(end_type == BoundaryType::Unbounded && end_preceding));
+
     if (begin_type == BoundaryType::Unbounded
         || end_type == BoundaryType::Unbounded)
     {
@@ -89,14 +94,14 @@ void WindowFrame::checkValid() const
 
     if (begin_type == BoundaryType::Current
         && end_type == BoundaryType::Offset
-        && end_offset > 0)
+        && !end_preceding)
     {
         return;
     }
 
     if (end_type == BoundaryType::Current
         && begin_type == BoundaryType::Offset
-        && begin_offset < 0)
+        && begin_preceding)
     {
         return;
     }
@@ -112,16 +117,13 @@ void WindowFrame::checkValid() const
     if (end_type == BoundaryType::Offset
         && begin_type == BoundaryType::Offset)
     {
-        if (type == FrameType::Rows)
+        if (!(end_preceding && !begin_preceding))
         {
-            if (end_offset >= begin_offset)
+            if (begin_offset <= end_offset)
             {
                 return;
             }
         }
-
-        // For RANGE and GROUPS, we must check that end follows begin if sorted
-        // according to ORDER BY (we don't support them yet).
     }
 
     throw Exception(ErrorCodes::BAD_ARGUMENTS,
diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h
index 447352f7a83..9388a4c7cf8 100644
--- a/src/Interpreters/WindowDescription.h
+++ b/src/Interpreters/WindowDescription.h
@@ -40,17 +40,17 @@ struct WindowFrame
 
     // UNBOUNDED FOLLOWING for the frame end doesn't make much sense, so
     // Unbounded here means UNBOUNDED PRECEDING.
-    // Offset might be both preceding and following, preceding is negative
-    // (be careful, this is not symmetric w/the frame end unlike in the grammar,
-    // so a positive literal in PRECEDING will give a negative number here).
+    // Offset might be both preceding and following, controlled by begin_preceding,
+    // but the offset value must be positive.
     BoundaryType begin_type = BoundaryType::Unbounded;
     // This should have been a Field but I'm getting some crazy linker errors.
     int64_t begin_offset = 0;
+    bool begin_preceding = true;
 
-    // Here as well, Unbounded is UNBOUNDED FOLLOWING, and positive Offset is
-    // following.
+    // Here as well, Unbounded is UNBOUNDED FOLLOWING.
     BoundaryType end_type = BoundaryType::Current;
     int64_t end_offset = 0;
+    bool end_preceding = false;
 
 
     // Throws BAD_ARGUMENTS exception if the frame definition is incorrect, e.g.
@@ -67,8 +67,10 @@ struct WindowFrame
         return other.type == type
             && other.begin_type == begin_type
             && other.begin_offset == begin_offset
+            && other.begin_preceding == begin_preceding
             && other.end_type == end_type
             && other.end_offset == end_offset
+            && other.end_preceding == end_preceding
             ;
     }
 
diff --git a/src/Parsers/ASTWindowDefinition.cpp b/src/Parsers/ASTWindowDefinition.cpp
index e141ba2ff4e..dba2935e630 100644
--- a/src/Parsers/ASTWindowDefinition.cpp
+++ b/src/Parsers/ASTWindowDefinition.cpp
@@ -72,7 +72,7 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
         {
             settings.ostr << abs(frame.begin_offset);
             settings.ostr << " "
-                << (frame.begin_offset > 0 ? "FOLLOWING" : "PRECEDING");
+                << (!frame.begin_preceding ? "FOLLOWING" : "PRECEDING");
         }
         settings.ostr << " AND ";
         if (frame.end_type == WindowFrame::BoundaryType::Current)
@@ -87,7 +87,7 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
         {
             settings.ostr << abs(frame.end_offset);
             settings.ostr << " "
-                << (frame.end_offset > 0 ? "FOLLOWING" : "PRECEDING");
+                << (!frame.end_preceding ? "FOLLOWING" : "PRECEDING");
         }
     }
 }
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 3f4403bc264..9622d6a273b 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -577,6 +577,13 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
                     "Frame offset must be between {} and {}, but {} is given",
                     INT_MAX, INT_MIN, node->frame.begin_offset);
             }
+
+            if (node->frame.begin_offset < 0)
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                    "Frame start offset must be greater than zero, {} given",
+                    node->frame.begin_offset);
+            }
         }
         else
         {
@@ -585,10 +592,11 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
 
         if (keyword_preceding.ignore(pos, expected))
         {
-            node->frame.begin_offset = -node->frame.begin_offset;
+            node->frame.begin_preceding = true;
         }
         else if (keyword_following.ignore(pos, expected))
         {
+            node->frame.begin_preceding = false;
             if (node->frame.begin_type == WindowFrame::BoundaryType::Unbounded)
             {
                 throw Exception(ErrorCodes::NOT_IMPLEMENTED,
@@ -638,6 +646,13 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
                         "Frame offset must be between {} and {}, but {} is given",
                         INT_MAX, INT_MIN, node->frame.end_offset);
                 }
+
+                if (node->frame.end_offset < 0)
+                {
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Frame end offset must be greater than zero, {} given",
+                        node->frame.end_offset);
+                }
             }
             else
             {
@@ -646,6 +661,7 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
 
             if (keyword_preceding.ignore(pos, expected))
             {
+                node->frame.end_preceding = true;
                 if (node->frame.end_type == WindowFrame::BoundaryType::Unbounded)
                 {
                     throw Exception(ErrorCodes::NOT_IMPLEMENTED,
@@ -657,6 +673,7 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
             else if (keyword_following.ignore(pos, expected))
             {
                 // Positive offset or UNBOUNDED FOLLOWING.
+                node->frame.end_preceding = false;
             }
             else
             {
diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp
index 1a71ca0adc7..04d374963ff 100644
--- a/src/Processors/QueryPlan/WindowStep.cpp
+++ b/src/Processors/QueryPlan/WindowStep.cpp
@@ -9,6 +9,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
 static ITransformingStep::Traits getTraits()
 {
     return ITransformingStep::Traits
@@ -55,9 +60,21 @@ WindowStep::WindowStep(const DataStream & input_stream_,
     , window_functions(window_functions_)
     , input_header(input_stream_.header)
 {
+    const auto & frame = window_description.frame;
     // We don't remove any columns, only add, so probably we don't have to update
     // the output DataStream::distinct_columns.
-    window_description.frame.checkValid();
+    frame.checkValid();
+
+    // RANGE OFFSET requires exactly one ORDER BY column.
+    if (frame.type == WindowFrame::FrameType::Range
+        && (frame.begin_type == WindowFrame::BoundaryType::Offset
+            || frame.end_type == WindowFrame::BoundaryType::Offset)
+        && window_description.order_by.size() != 1)
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            "The RANGE OFFSET window frame requires exactly one ORDER BY column, {} given",
+            window_description.order_by.size());
+    }
 }
 
 void WindowStep::transformPipeline(QueryPipeline & pipeline)
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 474d1a3c452..8d5f4450472 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -254,7 +254,8 @@ void WindowTransform::advanceFrameStartRowsOffset()
 {
     // Just recalculate it each time by walking blocks.
     const auto [moved_row, offset_left] = moveRowNumber(current_row,
-        window_description.frame.begin_offset);
+        window_description.frame.begin_offset
+            * (window_description.frame.begin_preceding ? -1 : 1));
 
     frame_start = moved_row;
 
@@ -289,41 +290,123 @@ void WindowTransform::advanceFrameStartRowsOffset()
     assert(offset_left >= 0);
 }
 
-void WindowTransform::advanceFrameStartChoose()
+template <typename ColumnType>
+static bool isBeforeFrameStart(const ColumnType * compared_column,
+    size_t compared_row, const ColumnType * reference_column,
+    size_t reference_row,
+    typename ColumnType::ValueType offset, bool is_preceding)
 {
-    switch (window_description.frame.begin_type)
+    // The frame is [[current_row] + begin_offset, [current_row] + end_offset]
+    // We need to return "is to the left of frame", that is,
+    // [tested_row] < [current_row] + begin_offset
+    // 1) right side overflows to positive, the condition is
+    // false.
+    // 2) [frame_start] + begin_offset overflows to negative, the condition is
+    // false.
+    // 3) no overflows, perform the comparison normally
+
+    const auto compared_value_data = compared_column->getDataAt(compared_row);
+    assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
+    auto compared_value = unalignedLoad<typename ColumnType::ValueType>(
+        compared_value_data.data);
+
+    const auto reference_value_data = reference_column->getDataAt(reference_row);
+    assert(reference_value_data.size == sizeof(typename ColumnType::ValueType));
+    auto reference_value = unalignedLoad<typename ColumnType::ValueType>(
+        reference_value_data.data);
+
+    bool is_overflow;
+    bool overflow_to_negative;
+    if (is_preceding)
     {
-        case WindowFrame::BoundaryType::Unbounded:
-            // UNBOUNDED PRECEDING, just mark it valid. It is initialized when
-            // the new partition starts.
-            frame_started = true;
-            return;
-        case WindowFrame::BoundaryType::Current:
-            // CURRENT ROW differs between frame types only in how the peer
-            // groups are accounted.
-            assert(partition_start <= peer_group_start);
-            assert(peer_group_start < partition_end);
-            assert(peer_group_start <= current_row);
-            frame_start = peer_group_start;
-            frame_started = true;
-            return;
-        case WindowFrame::BoundaryType::Offset:
-            switch (window_description.frame.type)
-            {
-                case WindowFrame::FrameType::Rows:
-                    advanceFrameStartRowsOffset();
-                    return;
-                default:
-                    // Fallthrough to the "not implemented" error.
-                    break;
-            }
-            break;
+        // The frame start is ref] - offset, inclusive. We answer
+        // "is compared row to the left of the frame?", i.e.
+        // [compared] < [ref] - offset.
+        is_overflow = __builtin_sub_overflow(reference_value, offset,
+            &reference_value);
+        overflow_to_negative = offset > 0;
+    }
+    else
+    {
+        // The frame start is [ref] + offset, inclusive. We answer
+        // "is compared row to the left of the frame?", i.e.
+        // [compared] < [ref] + offset.
+        is_overflow = __builtin_add_overflow(reference_value, offset,
+            &reference_value);
+        overflow_to_negative = offset < 0;
     }
 
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-        "Frame start type '{}' for frame '{}' is not implemented",
-        WindowFrame::toString(window_description.frame.begin_type),
-        WindowFrame::toString(window_description.frame.type));
+    fmt::print(stderr,
+        "compared [{}] = {}, ref [{}] = {}, offset {} overflow {} to negative {}\n",
+        compared_row, toString(compared_value),
+        reference_row, toString(reference_value),
+        toString(offset), is_overflow, overflow_to_negative);
+
+    if (is_overflow)
+    {
+        if (overflow_to_negative)
+        {
+            // Overflow to the negative, [compared] must be greater.
+            return false;
+        }
+        else
+        {
+            // Overflow to the positive, [compared] must be less.
+            return true;
+        }
+    }
+    else
+    {
+        // No overflow, compare normally.
+        return compared_value < reference_value;
+    }
+}
+
+template <typename ColumnType>
+void WindowTransform::advanceFrameStartRangeOffset()
+{
+    // [frame_start] + begin_offset < [current_row]
+    // 1) [frame_start] + begin_offset overflows to positive, the condition is
+    // false.
+    // 2) [frame_start] + begin_offset overflows to negative, the condition is
+    // false.
+    // 3) no overflows, perform the comparison normally
+
+    const auto * reference_column = assert_cast<const ColumnType *>(
+        inputAt(current_row)[order_by_indices[0]].get());
+    for (; frame_start < partition_end; advanceRowNumber(frame_start))
+    {
+        const auto * compared_column = assert_cast<const ColumnType *>(
+            inputAt(frame_start)[order_by_indices[0]].get());
+        if (!isBeforeFrameStart(compared_column, frame_start.row,
+            reference_column, current_row.row,
+            window_description.frame.begin_offset,
+            window_description.frame.begin_preceding))
+        {
+            frame_started = true;
+            return;
+        }
+    }
+
+    frame_started = partition_ended;
+}
+
+void WindowTransform::advanceFrameStartRangeOffsetDispatch()
+{
+    // Dispatch on the type of the ORDER BY column.
+    assert(order_by_indices.size() == 1);
+    const IColumn * column = inputAt(current_row)[order_by_indices[0]].get();
+
+    if (typeid_cast<const ColumnVector<UInt8> *>(column))
+    {
+        advanceFrameStartRangeOffset<ColumnVector<UInt8>>();
+    }
+    else
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+            "The RANGE OFFSET frame start for '{}' ORDER BY column is not implemented",
+            demangle(typeid(*column).name()));
+    }
 }
 
 void WindowTransform::advanceFrameStart()
@@ -334,7 +417,41 @@ void WindowTransform::advanceFrameStart()
     }
 
     const auto frame_start_before = frame_start;
-    advanceFrameStartChoose();
+
+    switch (window_description.frame.begin_type)
+    {
+        case WindowFrame::BoundaryType::Unbounded:
+            // UNBOUNDED PRECEDING, just mark it valid. It is initialized when
+            // the new partition starts.
+            frame_started = true;
+            break;
+        case WindowFrame::BoundaryType::Current:
+            // CURRENT ROW differs between frame types only in how the peer
+            // groups are accounted.
+            assert(partition_start <= peer_group_start);
+            assert(peer_group_start < partition_end);
+            assert(peer_group_start <= current_row);
+            frame_start = peer_group_start;
+            frame_started = true;
+            break;
+        case WindowFrame::BoundaryType::Offset:
+            switch (window_description.frame.type)
+            {
+                case WindowFrame::FrameType::Rows:
+                    advanceFrameStartRowsOffset();
+                    break;
+                case WindowFrame::FrameType::Range:
+                    advanceFrameStartRangeOffsetDispatch();
+                    break;
+                default:
+                    throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "Frame start type '{}' for frame '{}' is not implemented",
+                        WindowFrame::toString(window_description.frame.begin_type),
+                        WindowFrame::toString(window_description.frame.type));
+            }
+            break;
+    }
+
     assert(frame_start_before <= frame_start);
     if (frame_start == frame_start_before)
     {
@@ -474,7 +591,9 @@ void WindowTransform::advanceFrameEndRowsOffset()
     // Walk the specified offset from the current row. The "+1" is needed
     // because the frame_end is a past-the-end pointer.
     const auto [moved_row, offset_left] = moveRowNumber(current_row,
-        window_description.frame.end_offset + 1);
+        window_description.frame.end_offset
+            * (window_description.frame.end_preceding ? -1 : 1)
+            + 1);
 
     if (partition_end <= moved_row)
     {
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index bb1a9aefd64..b45913965ee 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -107,8 +107,10 @@ public:
 private:
     void advancePartitionEnd();
     void advanceFrameStart();
-    void advanceFrameStartChoose();
     void advanceFrameStartRowsOffset();
+    void advanceFrameStartRangeOffsetDispatch();
+    template <typename T>
+    void advanceFrameStartRangeOffset();
     void advanceFrameEndCurrentRow();
     void advanceFrameEndUnbounded();
     void advanceFrameEndRowsOffset();

From a34cd32552888bfca449bfbf433a7b3997b5a2dc Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 5 Feb 2021 12:25:16 +0300
Subject: [PATCH 0697/1238] Fix build

---
 .../Optimizations/filterPushDown.cpp          | 22 -------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 src/Processors/QueryPlan/Optimizations/filterPushDown.cpp

diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
deleted file mode 100644
index 0d651897bf8..00000000000
--- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <Processors/QueryPlan/Optimizations/Optimizations.h>
-#include <Processors/QueryPlan/FilterStep.h>
-
-namespace DB::QueryPlanOptimizations
-{
-
-size_t tryPushDownLimit(QueryPlan::Node * node, QueryPlan::Nodes &)
-{
-    auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
-    if (!filter_step)
-        return 0;
-
-    QueryPlan::Node * child_node = node->children.front();
-    auto & child = child_node->step;
-
-    if (const auto * adding_const_column = typeid_cast<const AddingConstColumnStep *>(child.get()))
-    {
-
-    }
-}
-
-}

From b75a8a0eb3975e126d2a57bb6073e8d5f72fd094 Mon Sep 17 00:00:00 2001
From: zhangxiao871 <zhangxiao871@ZBMAC-C02DN6312.local>
Date: Fri, 5 Feb 2021 17:46:08 +0800
Subject: [PATCH 0698/1238] documentation.

---
 docs/en/operations/system-tables/zookeeper.md | 4 ++++
 docs/zh/operations/system-tables/zookeeper.md | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md
index c63434877d3..713cb9269d4 100644
--- a/docs/en/operations/system-tables/zookeeper.md
+++ b/docs/en/operations/system-tables/zookeeper.md
@@ -7,6 +7,10 @@ The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs da
 To output data for all root nodes, write path = ‘/’.
 If the path specified in ‘path’ doesn’t exist, an exception will be thrown.
 
+The query `SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` outputs data for all children on the `/` and `/clickhouse` node.
+If in the specified ‘path’ collection has doesn't exist path, an exception will be thrown.
+It can be used to do a batch of ZooKeeper path queries.
+
 Columns:
 
 -   `name` (String) — The name of the node.
diff --git a/docs/zh/operations/system-tables/zookeeper.md b/docs/zh/operations/system-tables/zookeeper.md
index 930148ba639..f7e816ccee6 100644
--- a/docs/zh/operations/system-tables/zookeeper.md
+++ b/docs/zh/operations/system-tables/zookeeper.md
@@ -12,6 +12,10 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
 要输出所有根节点的数据，write path= ‘/’.
 如果在指定的路径 ‘path’ 不存在，将引发异常。
 
+查询`SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` 输出`/` 和 `/clickhouse`节点上所有子节点的数据。
+如果在指定的 ‘path’ 集合中有不存在的路径，将引发异常。
+它可以用来做一批ZooKeeper路径查询。
+
 列:
 
 -   `name` (String) — The name of the node.

From 294e68e0571600738af8be45a3aac974829941a0 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Fri, 5 Feb 2021 12:46:59 +0300
Subject: [PATCH 0699/1238] Added translation

---
 .../table-engines/mergetree-family/mergetree.md    |  3 ++-
 docs/ru/operations/settings/merge-tree-settings.md | 14 +++++++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
index 9b2a5eafca3..57b05b259de 100644
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@@ -91,6 +91,7 @@ ORDER BY expr
 	-   `max_parts_in_total` — максимальное количество кусков во всех партициях.
 	-   `max_compress_block_size` — максимальный размер блоков несжатых данных перед сжатием для записи в таблицу. Вы также можете задать этот параметр в глобальных настройках (смотрите [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
 	-   `min_compress_block_size` — минимальный размер блоков несжатых данных, необходимых для сжатия при записи следующей засечки. Вы также можете задать этот параметр в глобальных настройках (смотрите [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
+    -   `max_partitions_to_read` — Ограничивает максимальное число партиций для чтения в одном запросе. Также возможно указать настройку [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) в глобальных настройках.
 
 **Пример задания секций**
 
@@ -666,4 +667,4 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
 
 После выполнения фоновых слияний или мутаций старые куски не удаляются сразу, а через некоторое время (табличная настройка `old_parts_lifetime`). Также они не перемещаются на другие тома или диски, поэтому до момента удаления они продолжают учитываться при подсчёте занятого дискового пространства.
 
-[Оригинальная статья](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/) <!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/mergetree-family/mergetree/) <!--hide-->
diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md
index e78d4c98683..b8adc73c7ce 100644
--- a/docs/ru/operations/settings/merge-tree-settings.md
+++ b/docs/ru/operations/settings/merge-tree-settings.md
@@ -181,4 +181,16 @@ Eсли суммарное число активных кусков во все
 
 При старте ClickHouse читает все куски всех таблиц (читает файлы с метаданными кусков), чтобы построить в ОЗУ список всех кусков. В некоторых системах с большим количеством кусков этот процесс может занимать длительное время, и это время можно сократить, увеличив `max_part_loading_threads` (если при этом процессе есть недозагруженность CPU и диска).
 
-{## [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/merge-tree-settings/) ##}
+## max_partitions_to_read {#max-partitions-to-read}
+
+Ограничивает максимальное число партиций для чтения в одном запросе.
+
+Указанное при создании таблицы значение настройки может быть переназначено настройкой на уровне запроса.
+
+Возможные значения:
+
+-   Любое положительное целое число.
+
+Значение по умолчанию: -1 (неограничено).
+
+[Original article](https://clickhouse.tech/docs/ru/operations/settings/merge_tree_settings/) <!--hide-->

From 7cbc135e726547a5e42f6fe16ef01197f9dfd440 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 5 Feb 2021 12:54:34 +0300
Subject: [PATCH 0700/1238] More isolated code

---
 src/DataStreams/RemoteQueryExecutor.cpp       |  78 +++----
 src/DataStreams/RemoteQueryExecutor.h         |   9 +-
 .../MergeTree/MergeTreeDataSelectExecutor.cpp | 219 +++++++++++-------
 .../MergeTree/MergeTreeDataSelectExecutor.h   |  19 ++
 4 files changed, 189 insertions(+), 136 deletions(-)

diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp
index 27b3de66497..fc3870b3f22 100644
--- a/src/DataStreams/RemoteQueryExecutor.cpp
+++ b/src/DataStreams/RemoteQueryExecutor.cpp
@@ -173,9 +173,7 @@ void RemoteQueryExecutor::sendQuery()
     {
         std::lock_guard lock(duplicated_part_uuids_mutex);
         if (!duplicated_part_uuids.empty())
-        {
             multiplexed_connections->sendIgnoredPartUUIDs(duplicated_part_uuids);
-        }
     }
 
     multiplexed_connections->sendQuery(timeouts, query, query_id, stage, modified_client_info, true);
@@ -206,29 +204,9 @@ Block RemoteQueryExecutor::read()
         Packet packet = multiplexed_connections->receivePacket();
 
         if (auto block = processPacket(std::move(packet)))
-        {
-            if (got_duplicated_part_uuids)
-            {
-                /// Cancel previous query and disconnect before retry.
-                cancel();
-                multiplexed_connections->disconnect();
-
-                /// Only resend once, otherwise throw an exception
-                if (!resent_query)
-                {
-                    if (log)
-                        LOG_DEBUG(log, "Found duplicate UUIDs, will retry query without those parts");
-
-                    resent_query = true;
-                    sent_query = false;
-                    got_duplicated_part_uuids = false;
-                    /// Consecutive read will implicitly send query first.
-                    return read();
-                }
-                throw Exception("Found duplicate uuids while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS);
-            }
             return *block;
-        }
+        else if (got_duplicated_part_uuids)
+            return std::get<Block>(restartQueryWithoutDuplicatedUUIDs());
     }
 }
 
@@ -266,29 +244,9 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext>
         else
         {
             if (auto data = processPacket(std::move(read_context->packet)))
-            {
-                if (got_duplicated_part_uuids)
-                {
-                    /// Cancel previous query and disconnect before retry.
-                    cancel(&read_context);
-                    multiplexed_connections->disconnect();
-
-                    /// Only resend once, otherwise throw an exception
-                    if (!resent_query)
-                    {
-                        if (log)
-                            LOG_DEBUG(log, "Found duplicate UUIDs, will retry query without those parts");
-
-                        resent_query = true;
-                        sent_query = false;
-                        got_duplicated_part_uuids = false;
-                        /// Consecutive read will implicitly send query first.
-                        return read(read_context);
-                    }
-                    throw Exception("Found duplicate uuids while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS);
-                }
                 return std::move(*data);
-            }
+            else if (got_duplicated_part_uuids)
+                return restartQueryWithoutDuplicatedUUIDs(&read_context);
         }
     }
     while (true);
@@ -297,16 +255,38 @@ std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext>
 #endif
 }
 
+
+std::variant<Block, int> RemoteQueryExecutor::restartQueryWithoutDuplicatedUUIDs(std::unique_ptr<ReadContext> * read_context)
+{
+    /// Cancel previous query and disconnect before retry.
+    cancel(read_context);
+    multiplexed_connections->disconnect();
+
+    /// Only resend once, otherwise throw an exception
+    if (!resent_query)
+    {
+        if (log)
+            LOG_DEBUG(log, "Found duplicate UUIDs, will retry query without those parts");
+
+        resent_query = true;
+        sent_query = false;
+        got_duplicated_part_uuids = false;
+        /// Consecutive read will implicitly send query first.
+        if (!read_context)
+            return read();
+        else
+            return read(*read_context);
+    }
+    throw Exception("Found duplicate uuids while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS);
+}
+
 std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
 {
     switch (packet.type)
     {
         case Protocol::Server::PartUUIDs:
             if (!setPartUUIDs(packet.part_uuids))
-            {
                 got_duplicated_part_uuids = true;
-                return Block();
-            }
             break;
         case Protocol::Server::Data:
             /// If the block is not empty and is not a header block
diff --git a/src/DataStreams/RemoteQueryExecutor.h b/src/DataStreams/RemoteQueryExecutor.h
index 843cf75f1f8..6a10627b948 100644
--- a/src/DataStreams/RemoteQueryExecutor.h
+++ b/src/DataStreams/RemoteQueryExecutor.h
@@ -174,10 +174,14 @@ private:
     /// Send all temporary tables to remote servers
     void sendExternalTables();
 
-    /** Set part uuids to a query context, collected from remote replicas.
-      */
+    /// Set part uuids to a query context, collected from remote replicas.
+    /// Return true if duplicates found.
     bool setPartUUIDs(const std::vector<UUID> & uuids);
 
+    /// Cancell query and restart it with info about duplicated UUIDs
+    /// only for `allow_experimental_query_deduplication`.
+    std::variant<Block, int> restartQueryWithoutDuplicatedUUIDs(std::unique_ptr<ReadContext> * read_context = nullptr);
+
     /// If wasn't sent yet, send request to cancel all connections to replicas
     void tryCancel(const char * reason, std::unique_ptr<ReadContext> * read_context);
 
@@ -192,6 +196,7 @@ private:
 
     /// Reads packet by packet
     Block readPackets();
+
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 740288e3b46..6b6098321cf 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -6,7 +6,6 @@
 #include <Poco/File.h>
 
 #include <Common/FieldVisitors.h>
-#include <Storages/MergeTree/PartitionPruner.h>
 #include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
 #include <Storages/MergeTree/MergeTreeSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h>
@@ -267,92 +266,13 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
         }
     }
 
-    /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`,
-    ///  as well as `max_block_number_to_read`.
-    /// Skip parts uuids if any to the query context, or skip parts which uuids marked as excluded.
-    {
-        Context & query_context
-            = context.hasQueryContext() ? const_cast<Context &>(context).getQueryContext() : const_cast<Context &>(context);
+    const Context & query_context = context.hasQueryContext() ? context.getQueryContext() : context;
 
-        /// process_parts prepare parts that have to be read for the query,
-        /// returns false if duplicated parts' UUID have been met
-        auto select_parts = [&] (MergeTreeData::DataPartsVector & selected_parts) -> bool
-        {
-            auto ignored_part_uuids = query_context.getIgnoredPartUUIDs();
-            std::unordered_set<UUID> temp_part_uuids;
+    if (query_context.getSettingsRef().allow_experimental_query_deduplication)
+        selectPartsToReadWithUUIDFilter(parts, part_values, minmax_idx_condition, partition_pruner, max_block_numbers_to_read, query_context);
+    else
+        selectPartsToRead(parts, part_values, minmax_idx_condition, partition_pruner, max_block_numbers_to_read);
 
-            auto prev_parts = selected_parts;
-            selected_parts.clear();
-
-            for (const auto & part : prev_parts)
-            {
-                if (part_values.find(part->name) == part_values.end())
-                    continue;
-
-                if (part->isEmpty())
-                    continue;
-
-                if (minmax_idx_condition
-                    && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx.hyperrectangle, data.minmax_idx_column_types)
-                            .can_be_true)
-                    continue;
-
-                if (partition_pruner)
-                {
-                    if (partition_pruner->canBePruned(part))
-                        continue;
-                }
-
-                if (max_block_numbers_to_read)
-                {
-                    auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);
-                    if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second)
-                        continue;
-                }
-
-                /// populate UUIDs and exclude ignored parts if enabled
-                if (query_context.getSettingsRef().allow_experimental_query_deduplication && part->uuid != UUIDHelpers::Nil)
-                {
-                    /// Skip the part if its uuid is meant to be excluded
-                    if (ignored_part_uuids->has(part->uuid))
-                        continue;
-
-                    auto result = temp_part_uuids.insert(part->uuid);
-                    if (!result.second)
-                        throw Exception("Found a part with the same UUID on the same replica.", ErrorCodes::LOGICAL_ERROR);
-                }
-
-                selected_parts.push_back(part);
-            }
-
-            if (!temp_part_uuids.empty())
-            {
-                auto duplicates = query_context.getPartUUIDs()->add(std::vector<UUID>{temp_part_uuids.begin(), temp_part_uuids.end()});
-                if (!duplicates.empty())
-                {
-                    /// on a local replica with prefer_localhost_replica=1 if any duplicates appeared during the first pass,
-                    /// adding them to the exclusion, so they will be skipped on second pass
-                    query_context.getIgnoredPartUUIDs()->add(duplicates);
-                    return false;
-                }
-            }
-
-            return true;
-        };
-
-        /// Process parts that have to be read for a query.
-        auto needs_retry = !select_parts(parts);
-        /// If any duplicated part UUIDs met during the first step, try to ignore them in second pass
-        if (needs_retry)
-        {
-            if (log)
-                LOG_DEBUG(log, "Found duplicate uuids locally, will retry part selection without them");
-
-            /// Second attempt didn't help, throw an exception
-            if (!select_parts(parts))
-                throw Exception("Found duplicate UUIDs while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS);
-        }
-    }
 
     /// Sampling.
     Names column_names_to_read = real_column_names;
@@ -1924,5 +1844,134 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
     return res;
 }
 
+void MergeTreeDataSelectExecutor::selectPartsToRead(
+    MergeTreeData::DataPartsVector & parts,
+    const std::unordered_set<String> & part_values,
+    const std::optional<KeyCondition> & minmax_idx_condition,
+    std::optional<PartitionPruner> & partition_pruner,
+    const PartitionIdToMaxBlock * max_block_numbers_to_read) const
+{
+    auto prev_parts = parts;
+    parts.clear();
+
+    for (const auto & part : prev_parts)
+    {
+        if (part_values.find(part->name) == part_values.end())
+            continue;
+
+        if (part->isEmpty())
+            continue;
+
+        if (minmax_idx_condition && !minmax_idx_condition->checkInHyperrectangle(
+                part->minmax_idx.hyperrectangle, data.minmax_idx_column_types).can_be_true)
+            continue;
+
+        if (partition_pruner)
+        {
+            if (partition_pruner->canBePruned(part))
+                continue;
+        }
+
+        if (max_block_numbers_to_read)
+        {
+            auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);
+            if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second)
+                continue;
+        }
+
+        parts.push_back(part);
+    }
+}
+
+void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
+    MergeTreeData::DataPartsVector & parts,
+    const std::unordered_set<String> & part_values,
+    const std::optional<KeyCondition> & minmax_idx_condition,
+    std::optional<PartitionPruner> & partition_pruner,
+    const PartitionIdToMaxBlock * max_block_numbers_to_read,
+    const Context & query_context) const
+{
+    /// const_cast to add UUIDs to context. Bad practice.
+    Context & non_const_context = const_cast<Context &>(query_context);
+
+    /// process_parts prepare parts that have to be read for the query,
+    /// returns false if duplicated parts' UUID have been met
+    auto select_parts = [&] (MergeTreeData::DataPartsVector & selected_parts) -> bool
+    {
+        auto ignored_part_uuids = non_const_context.getIgnoredPartUUIDs();
+        std::unordered_set<UUID> temp_part_uuids;
+
+        auto prev_parts = selected_parts;
+        selected_parts.clear();
+
+        for (const auto & part : prev_parts)
+        {
+            if (part_values.find(part->name) == part_values.end())
+                continue;
+
+            if (part->isEmpty())
+                continue;
+
+            if (minmax_idx_condition
+                && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx.hyperrectangle, data.minmax_idx_column_types)
+                        .can_be_true)
+                continue;
+
+            if (partition_pruner)
+            {
+                if (partition_pruner->canBePruned(part))
+                    continue;
+            }
+
+            if (max_block_numbers_to_read)
+            {
+                auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);
+                if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second)
+                    continue;
+            }
+
+            /// populate UUIDs and exclude ignored parts if enabled
+            if (part->uuid != UUIDHelpers::Nil)
+            {
+                /// Skip the part if its uuid is meant to be excluded
+                if (ignored_part_uuids->has(part->uuid))
+                    continue;
+
+                auto result = temp_part_uuids.insert(part->uuid);
+                if (!result.second)
+                    throw Exception("Found a part with the same UUID on the same replica.", ErrorCodes::LOGICAL_ERROR);
+            }
+
+            selected_parts.push_back(part);
+        }
+
+        if (!temp_part_uuids.empty())
+        {
+            auto duplicates = non_const_context.getPartUUIDs()->add(std::vector<UUID>{temp_part_uuids.begin(), temp_part_uuids.end()});
+            if (!duplicates.empty())
+            {
+                /// on a local replica with prefer_localhost_replica=1 if any duplicates appeared during the first pass,
+                /// adding them to the exclusion, so they will be skipped on second pass
+                non_const_context.getIgnoredPartUUIDs()->add(duplicates);
+                return false;
+            }
+        }
+
+        return true;
+    };
+
+    /// Process parts that have to be read for a query.
+    auto needs_retry = !select_parts(parts);
+
+    /// If any duplicated part UUIDs met during the first step, try to ignore them in second pass
+    if (needs_retry)
+    {
+        LOG_DEBUG(log, "Found duplicate uuids locally, will retry part selection without them");
+
+        /// Second attempt didn't help, throw an exception
+        if (!select_parts(parts))
+            throw Exception("Found duplicate UUIDs while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS);
+    }
+}
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
index c3b3020ebf5..04a3be3d3f0 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
@@ -4,6 +4,7 @@
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/RangesInDataPart.h>
+#include <Storages/MergeTree/PartitionPruner.h>
 
 
 namespace DB
@@ -113,6 +114,24 @@ private:
         const Settings & settings,
         const MergeTreeReaderSettings & reader_settings,
         Poco::Logger * log);
+
+    /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`,
+    ///  as well as `max_block_number_to_read`.
+    void selectPartsToRead(
+        MergeTreeData::DataPartsVector & parts,
+        const std::unordered_set<String> & part_values,
+        const std::optional<KeyCondition> & minmax_idx_condition,
+        std::optional<PartitionPruner> & partition_pruner,
+        const PartitionIdToMaxBlock * max_block_numbers_to_read) const;
+
+    /// Same as previous but also skip parts uuids if any to the query context, or skip parts which uuids marked as excluded.
+    void selectPartsToReadWithUUIDFilter(
+        MergeTreeData::DataPartsVector & parts,
+        const std::unordered_set<String> & part_values,
+        const std::optional<KeyCondition> & minmax_idx_condition,
+        std::optional<PartitionPruner> & partition_pruner,
+        const PartitionIdToMaxBlock * max_block_numbers_to_read,
+        const Context & query_context) const;
 };
 
 }

From 449e8e3fd97ecc3c02fa64a6e763ebe8e54a019b Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 5 Feb 2021 13:15:02 +0300
Subject: [PATCH 0701/1238] More checks for setting

---
 src/Server/TCPHandler.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 0d040652342..fa213dcdc55 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -535,7 +535,9 @@ void TCPHandler::processOrdinaryQuery()
     /// Pull query execution result, if exists, and send it to network.
     if (state.io.in)
     {
-        sendPartUUIDs();
+
+        if (query_context->getSettingsRef().allow_experimental_query_deduplication)
+            sendPartUUIDs();
 
         /// This allows the client to prepare output format
         if (Block header = state.io.in->getHeader())
@@ -601,7 +603,8 @@ void TCPHandler::processOrdinaryQueryWithProcessors()
 {
     auto & pipeline = state.io.pipeline;
 
-    sendPartUUIDs();
+    if (query_context->getSettingsRef().allow_experimental_query_deduplication)
+        sendPartUUIDs();
 
     /// Send header-block, to allow client to prepare output format for data to send.
     {

From 16d399aa3539be6a5f4d6b4ba3d7bb6acd542096 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Fri, 5 Feb 2021 13:31:18 +0300
Subject: [PATCH 0702/1238] Another build fix

---
 src/IO/MemoryReadWriteBuffer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/MemoryReadWriteBuffer.cpp b/src/IO/MemoryReadWriteBuffer.cpp
index 0b0d9704de6..69bcd52a8d2 100644
--- a/src/IO/MemoryReadWriteBuffer.cpp
+++ b/src/IO/MemoryReadWriteBuffer.cpp
@@ -61,7 +61,7 @@ private:
             position() = nullptr;
         }
 
-        return buffer().size() != 0;
+        return !buffer().empty();
     }
 
     using Container = std::forward_list<BufferBase::Buffer>;

From aafadc06df5136c15f260ed6eb271f250e973571 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 5 Feb 2021 13:31:46 +0300
Subject: [PATCH 0703/1238] Better types

---
 src/Interpreters/Context.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 4dbdf390473..ea9ea172d3f 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -266,8 +266,8 @@ private:
     using SampleBlockCache = std::unordered_map<std::string, Block>;
     mutable SampleBlockCache sample_block_cache;
 
-    std::shared_ptr<PartUUIDs> part_uuids; /// set of parts' uuids, is used for query parts deduplication
-    std::shared_ptr<PartUUIDs> ignored_part_uuids; /// set of parts' uuids are meant to be excluded from query processing
+    PartUUIDsPtr part_uuids; /// set of parts' uuids, is used for query parts deduplication
+    PartUUIDsPtr ignored_part_uuids; /// set of parts' uuids are meant to be excluded from query processing
 
     NameToNameMap query_parameters;   /// Dictionary with query parameters for prepared statements.
                                                      /// (key=name, value)

From f7dbdc623cc4122f7b03f621e2c3f4c4d745b74f Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Fri, 5 Feb 2021 13:45:38 +0300
Subject: [PATCH 0704/1238] Update style.md

---
 docs/ru/development/style.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md
index 4d71dca46a7..1b211259bbb 100644
--- a/docs/ru/development/style.md
+++ b/docs/ru/development/style.md
@@ -714,6 +714,7 @@ auto s = std::string{"Hello"};
 ### Пользовательская ошибка {#error-messages-user-error}
 
 Такая ошибка вызвана действиями пользователя (неверный синтаксис запроса) или конфигурацией внешних систем (кончилось место на диске). Предполагается, что пользователь может устранить её самостоятельно. Для этого в сообщении об ошибке должна содержаться следующая информация:
+
 * что произошло. Это должно объясняться в пользовательских терминах (`Function pow() is not supported for data type UInt128`), а не загадочными конструкциями из кода (`runtime overload resolution failed in DB::BinaryOperationBuilder<FunctionAdaptor<pow>::Impl, UInt128, Int8>::kaboongleFastPath()`).
 * почему/где/когда -- любой контекст, который помогает отладить проблему. Представьте, как бы её отлаживали вы (программировать и пользоваться отладчиком нельзя).
 * что можно предпринять для устранения ошибки. Здесь можно перечислить типичные причины проблемы, настройки, влияющие на это поведение, и так далее.

From 9869f70a0d42ea7898b67bbe793f084503e4bb95 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 5 Feb 2021 14:41:44 +0300
Subject: [PATCH 0705/1238] Remove AddMissed step and transform.

---
 .../AddingDefaultBlockOutputStream.cpp        | 17 +++-
 .../AddingDefaultBlockOutputStream.h          | 14 +--
 src/Functions/replicate.cpp                   | 95 +++++++------------
 src/Functions/replicate.h                     | 38 ++++++++
 src/Interpreters/addMissingDefaults.cpp       | 77 ++++++++-------
 src/Interpreters/addMissingDefaults.h         |  7 +-
 src/Processors/QueryPlan/AddingMissedStep.cpp | 45 ---------
 src/Processors/QueryPlan/AddingMissedStep.h   | 26 -----
 .../Optimizations/filterPushDown.cpp          | 22 +++++
 .../Transforms/AddingMissedTransform.cpp      | 27 ------
 .../Transforms/AddingMissedTransform.h        | 36 -------
 src/Storages/StorageBuffer.cpp                | 13 ++-
 12 files changed, 172 insertions(+), 245 deletions(-)
 create mode 100644 src/Functions/replicate.h
 delete mode 100644 src/Processors/QueryPlan/AddingMissedStep.cpp
 delete mode 100644 src/Processors/QueryPlan/AddingMissedStep.h
 create mode 100644 src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
 delete mode 100644 src/Processors/Transforms/AddingMissedTransform.cpp
 delete mode 100644 src/Processors/Transforms/AddingMissedTransform.h

diff --git a/src/DataStreams/AddingDefaultBlockOutputStream.cpp b/src/DataStreams/AddingDefaultBlockOutputStream.cpp
index 74300a371fb..23cd5e2c3c8 100644
--- a/src/DataStreams/AddingDefaultBlockOutputStream.cpp
+++ b/src/DataStreams/AddingDefaultBlockOutputStream.cpp
@@ -1,13 +1,28 @@
 #include <DataStreams/AddingDefaultBlockOutputStream.h>
 #include <Interpreters/addMissingDefaults.h>
+#include <Interpreters/ExpressionActions.h>
 
 
 namespace DB
 {
 
+AddingDefaultBlockOutputStream::AddingDefaultBlockOutputStream(
+    const BlockOutputStreamPtr & output_,
+    const Block & header_,
+    const Block & output_block_,
+    const ColumnsDescription & columns_,
+    const Context & context_)
+    : output(output_), header(header_)
+{
+    auto dag = addMissingDefaults(header_, output_block_.getNamesAndTypesList(), columns_, context_);
+    actions = std::make_shared<ExpressionActions>(std::move(dag));
+}
+
 void AddingDefaultBlockOutputStream::write(const Block & block)
 {
-    output->write(addMissingDefaults(block, output_block.getNamesAndTypesList(), columns, context));
+    auto copy = block;
+    actions->execute(copy);
+    output->write(copy);
 }
 
 void AddingDefaultBlockOutputStream::flush()
diff --git a/src/DataStreams/AddingDefaultBlockOutputStream.h b/src/DataStreams/AddingDefaultBlockOutputStream.h
index 5b46c533f7f..bff8ad7f39a 100644
--- a/src/DataStreams/AddingDefaultBlockOutputStream.h
+++ b/src/DataStreams/AddingDefaultBlockOutputStream.h
@@ -8,6 +8,9 @@
 namespace DB
 {
 
+class ExpressionActions;
+using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
+
 class Context;
 
 /** This stream adds three types of columns into block
@@ -24,11 +27,7 @@ public:
         const Block & header_,
         const Block & output_block_,
         const ColumnsDescription & columns_,
-        const Context & context_)
-        : output(output_), header(header_), output_block(output_block_),
-          columns(columns_), context(context_)
-    {
-    }
+        const Context & context_);
 
     Block getHeader() const override { return header; }
     void write(const Block & block) override;
@@ -41,10 +40,7 @@ public:
 private:
     BlockOutputStreamPtr output;
     const Block header;
-    /// Blocks after this stream should have this structure
-    const Block output_block;
-    const ColumnsDescription columns;
-    const Context & context;
+    ExpressionActionsPtr actions;
 };
 
 
diff --git a/src/Functions/replicate.cpp b/src/Functions/replicate.cpp
index aa4a7d42c6c..bc41d817326 100644
--- a/src/Functions/replicate.cpp
+++ b/src/Functions/replicate.cpp
@@ -1,3 +1,4 @@
+#include <Functions/replicate.h>
 #include <Functions/IFunctionImpl.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
@@ -14,79 +15,47 @@ namespace ErrorCodes
     extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
 }
 
-namespace
+DataTypePtr FunctionReplicate::getReturnTypeImpl(const DataTypes & arguments) const
 {
+    if (arguments.size() < 2)
+        throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
+                        "Function {} expect at leas two arguments, got {}", getName(), arguments.size());
 
-/** Creates an array, multiplying the column (the first argument) by the number of elements in the array (the second argument).
-  */
-class FunctionReplicate : public IFunction
+    for (size_t i = 1; i < arguments.size(); ++i)
+    {
+        const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].get());
+        if (!array_type)
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                            "Argument {} for function {} must be array.",
+                            i + 1, getName());
+    }
+    return std::make_shared<DataTypeArray>(arguments[0]);
+}
+
+ColumnPtr FunctionReplicate::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const
 {
-public:
-    static constexpr auto name = "replicate";
+    ColumnPtr first_column = arguments[0].column;
+    ColumnPtr offsets;
 
-    static FunctionPtr create(const Context &)
+    for (size_t i = 1; i < arguments.size(); ++i)
     {
-        return std::make_shared<FunctionReplicate>();
-    }
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    size_t getNumberOfArguments() const override
-    {
-        return 0;
-    }
-
-    bool isVariadic() const override { return true; }
-
-    bool useDefaultImplementationForNulls() const override { return false; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        if (arguments.size() < 2)
-            throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
-                            "Function {} expect at leas two arguments, got {}", getName(), arguments.size());
-
-        for (size_t i = 1; i < arguments.size(); ++i)
+        const ColumnArray * array_column = checkAndGetColumn<ColumnArray>(arguments[i].column.get());
+        ColumnPtr temp_column;
+        if (!array_column)
         {
-            const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].get());
-            if (!array_type)
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                                "Argument {} for function {} must be array.",
-                                i + 1, getName());
-        }
-        return std::make_shared<DataTypeArray>(arguments[0]);
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
-    {
-        ColumnPtr first_column = arguments[0].column;
-        ColumnPtr offsets;
-
-        for (size_t i = 1; i < arguments.size(); ++i)
-        {
-            const ColumnArray * array_column = checkAndGetColumn<ColumnArray>(arguments[i].column.get());
-            ColumnPtr temp_column;
-            if (!array_column)
-            {
-                const auto * const_array_column = checkAndGetColumnConst<ColumnArray>(arguments[i].column.get());
-                if (!const_array_column)
-                    throw Exception("Unexpected column for replicate", ErrorCodes::ILLEGAL_COLUMN);
-                temp_column = const_array_column->convertToFullColumn();
-                array_column = checkAndGetColumn<ColumnArray>(temp_column.get());
-            }
-
-            if (!offsets || offsets->empty())
-                offsets = array_column->getOffsetsPtr();
+            const auto * const_array_column = checkAndGetColumnConst<ColumnArray>(arguments[i].column.get());
+            if (!const_array_column)
+                throw Exception("Unexpected column for replicate", ErrorCodes::ILLEGAL_COLUMN);
+            temp_column = const_array_column->convertToFullColumn();
+            array_column = checkAndGetColumn<ColumnArray>(temp_column.get());
         }
 
-        const auto & offsets_data = assert_cast<const ColumnArray::ColumnOffsets &>(*offsets).getData();
-        return ColumnArray::create(first_column->replicate(offsets_data)->convertToFullColumnIfConst(), offsets);
+        if (!offsets || offsets->empty())
+            offsets = array_column->getOffsetsPtr();
     }
-};
 
+    const auto & offsets_data = assert_cast<const ColumnArray::ColumnOffsets &>(*offsets).getData();
+    return ColumnArray::create(first_column->replicate(offsets_data)->convertToFullColumnIfConst(), offsets);
 }
 
 void registerFunctionReplicate(FunctionFactory & factory)
diff --git a/src/Functions/replicate.h b/src/Functions/replicate.h
new file mode 100644
index 00000000000..6dbea4bd169
--- /dev/null
+++ b/src/Functions/replicate.h
@@ -0,0 +1,38 @@
+#pragma once
+#include <Functions/IFunctionImpl.h>
+
+namespace DB
+{
+
+class Context;
+
+class FunctionReplicate : public IFunction
+{
+public:
+    static constexpr auto name = "replicate";
+
+    static FunctionPtr create(const Context &)
+    {
+        return std::make_shared<FunctionReplicate>();
+    }
+
+    String getName() const override
+    {
+        return name;
+    }
+
+    size_t getNumberOfArguments() const override
+    {
+        return 0;
+    }
+
+    bool isVariadic() const override { return true; }
+
+    bool useDefaultImplementationForNulls() const override { return false; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override;
+};
+
+}
diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp
index 7935ca04b13..24772faa20a 100644
--- a/src/Interpreters/addMissingDefaults.cpp
+++ b/src/Interpreters/addMissingDefaults.cpp
@@ -8,80 +8,93 @@
 #include <Core/Block.h>
 #include <Storages/ColumnsDescription.h>
 #include <Interpreters/ExpressionActions.h>
+#include <Functions/IFunctionAdaptors.h>
+#include <Functions/replicate.h>
+#include <Functions/materialize.h>
 
 
 namespace DB
 {
 
-Block addMissingDefaults(
-    const Block & block,
+ActionsDAGPtr addMissingDefaults(
+    const Block & header,
     const NamesAndTypesList & required_columns,
     const ColumnsDescription & columns,
     const Context & context)
 {
     /// For missing columns of nested structure, you need to create not a column of empty arrays, but a column of arrays of correct lengths.
     /// First, remember the offset columns for all arrays in the block.
-    std::map<String, ColumnPtr> offset_columns;
+    std::map<String, Names> nested_groups;
 
-    for (size_t i = 0, size = block.columns(); i < size; ++i)
+    for (size_t i = 0, size = header.columns(); i < size; ++i)
     {
-        const auto & elem = block.getByPosition(i);
+        const auto & elem = header.getByPosition(i);
 
-        if (const ColumnArray * array = typeid_cast<const ColumnArray *>(&*elem.column))
+        if (typeid_cast<const ColumnArray *>(&*elem.column))
         {
             String offsets_name = Nested::extractTableName(elem.name);
-            auto & offsets_column = offset_columns[offsets_name];
 
-            /// If for some reason there are different offset columns for one nested structure, then we take nonempty.
-            if (!offsets_column || offsets_column->empty())
-                offsets_column = array->getOffsetsPtr();
+            auto & group = nested_groups[offsets_name];
+            if (group.empty())
+                group.push_back({});
+
+            group.push_back(elem.name);
         }
     }
 
-    const size_t rows = block.rows();
-    Block res;
+    auto actions = std::make_shared<ActionsDAG>(header.getColumnsWithTypeAndName());
+
+    FunctionOverloadResolverPtr func_builder_replicate =
+            std::make_shared<FunctionOverloadResolverAdaptor>(
+                    std::make_unique<DefaultOverloadResolver>(
+                            std::make_shared<FunctionReplicate>()));
+
+    FunctionOverloadResolverPtr func_builder_materialize =
+            std::make_shared<FunctionOverloadResolverAdaptor>(
+                    std::make_unique<DefaultOverloadResolver>(
+                            std::make_shared<FunctionMaterialize>()));
 
     /// We take given columns from input block and missed columns without default value
     /// (default and materialized will be computed later).
     for (const auto & column : required_columns)
     {
-        if (block.has(column.name))
-        {
-            res.insert(block.getByName(column.name));
+        if (header.has(column.name))
             continue;
-        }
 
         if (columns.hasDefault(column.name))
             continue;
 
         String offsets_name = Nested::extractTableName(column.name);
-        if (offset_columns.count(offsets_name))
+        if (nested_groups.count(offsets_name))
         {
-            ColumnPtr offsets_column = offset_columns[offsets_name];
-            DataTypePtr nested_type = typeid_cast<const DataTypeArray &>(*column.type).getNestedType();
-            UInt64 nested_rows = rows ? get<UInt64>((*offsets_column)[rows - 1]) : 0;
 
-            ColumnPtr nested_column = nested_type->createColumnConstWithDefaultValue(nested_rows)->convertToFullColumnIfConst();
-            auto new_column = ColumnArray::create(nested_column, offsets_column);
-            res.insert(ColumnWithTypeAndName(std::move(new_column), column.type, column.name));
+            DataTypePtr nested_type = typeid_cast<const DataTypeArray &>(*column.type).getNestedType();
+            ColumnPtr nested_column = nested_type->createColumnConstWithDefaultValue(0);
+            const auto & constant = actions->addColumn({std::move(nested_column), nested_type, column.name}, true);
+
+            auto & group = nested_groups[offsets_name];
+            group[0] = constant.result_name;
+            const auto & func = actions->addFunction(func_builder_replicate, group, {}, context);
+
+            actions->addAlias(func.result_name, column.name, true);
             continue;
         }
 
+        auto new_column = column.type->createColumnConstWithDefaultValue(0);
+        const auto * node = &actions->addColumn({std::move(new_column), column.type, column.name}, true);
+
         /** It is necessary to turn a constant column into a full column, since in part of blocks (from other parts),
         *  it can be full (or the interpreter may decide that it is constant everywhere).
         */
-        auto new_column = column.type->createColumnConstWithDefaultValue(rows)->convertToFullColumnIfConst();
-        res.insert(ColumnWithTypeAndName(std::move(new_column), column.type, column.name));
+        node = &actions->addFunction(func_builder_materialize, {node->result_name}, {}, context);
+        actions->addAlias(node->result_name, column.name, true);
     }
 
     /// Computes explicitly specified values by default and materialized columns.
-    auto dag = createFillingMissingDefaultsExpression(res, required_columns, columns, context);
-    if (dag)
-    {
-        auto actions = std::make_shared<ExpressionActions>(std::move(dag));
-        actions->execute(res);
-    }
-    return res;
+    if (auto dag = createFillingMissingDefaultsExpression(header, required_columns, columns, context))
+        actions = ActionsDAG::merge(std::move(*actions), std::move(*dag));
+
+    return actions;
 }
 
 }
diff --git a/src/Interpreters/addMissingDefaults.h b/src/Interpreters/addMissingDefaults.h
index ed5d5ce61ff..d8bed07e857 100644
--- a/src/Interpreters/addMissingDefaults.h
+++ b/src/Interpreters/addMissingDefaults.h
@@ -12,14 +12,17 @@ class Context;
 class NamesAndTypesList;
 class ColumnsDescription;
 
+class ActionsDAG;
+using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
+
 /** Adds three types of columns into block
   * 1. Columns, that are missed inside request, but present in table without defaults (missed columns)
   * 2. Columns, that are missed inside request, but present in table with defaults (columns with default values)
   * 3. Columns that materialized from other columns (materialized columns)
   * All three types of columns are materialized (not constants).
   */
-Block addMissingDefaults(
-    const Block & block,
+ActionsDAGPtr addMissingDefaults(
+    const Block & header,
     const NamesAndTypesList & required_columns,
     const ColumnsDescription & columns,
     const Context & context);
diff --git a/src/Processors/QueryPlan/AddingMissedStep.cpp b/src/Processors/QueryPlan/AddingMissedStep.cpp
deleted file mode 100644
index 359d0d46a87..00000000000
--- a/src/Processors/QueryPlan/AddingMissedStep.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include <Processors/QueryPlan/AddingMissedStep.h>
-#include <Processors/QueryPipeline.h>
-#include <Processors/Transforms/AddingMissedTransform.h>
-#include <IO/Operators.h>
-
-namespace DB
-{
-
-static ITransformingStep::Traits getTraits()
-{
-    return ITransformingStep::Traits
-    {
-        {
-            .preserves_distinct_columns = false, /// TODO: check if true later.
-            .returns_single_stream = false,
-            .preserves_number_of_streams = true,
-            .preserves_sorting = true,
-        },
-        {
-            .preserves_number_of_rows = true,
-        }
-    };
-}
-
-AddingMissedStep::AddingMissedStep(
-    const DataStream & input_stream_,
-    Block result_header_,
-    ColumnsDescription columns_,
-    const Context & context_)
-    : ITransformingStep(input_stream_, result_header_, getTraits())
-    , columns(std::move(columns_))
-    , context(context_)
-{
-    updateDistinctColumns(output_stream->header, output_stream->distinct_columns);
-}
-
-void AddingMissedStep::transformPipeline(QueryPipeline & pipeline)
-{
-    pipeline.addSimpleTransform([&](const Block & header)
-    {
-        return std::make_shared<AddingMissedTransform>(header, output_stream->header, columns, context);
-    });
-}
-
-}
diff --git a/src/Processors/QueryPlan/AddingMissedStep.h b/src/Processors/QueryPlan/AddingMissedStep.h
deleted file mode 100644
index ce755b79fdf..00000000000
--- a/src/Processors/QueryPlan/AddingMissedStep.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#pragma once
-#include <Processors/QueryPlan/ITransformingStep.h>
-#include <Storages/ColumnsDescription.h>
-
-namespace DB
-{
-
-/// Convert one block structure to another. See ConvertingTransform.
-class AddingMissedStep : public ITransformingStep
-{
-public:
-    AddingMissedStep(const DataStream & input_stream_,
-                     Block result_header_,
-                     ColumnsDescription columns_,
-                     const Context & context_);
-
-    String getName() const override { return "AddingMissed"; }
-
-    void transformPipeline(QueryPipeline & pipeline) override;
-
-private:
-    ColumnsDescription columns;
-    const Context & context;
-};
-
-}
diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
new file mode 100644
index 00000000000..0d651897bf8
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
@@ -0,0 +1,22 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <Processors/QueryPlan/FilterStep.h>
+
+namespace DB::QueryPlanOptimizations
+{
+
+size_t tryPushDownLimit(QueryPlan::Node * node, QueryPlan::Nodes &)
+{
+    auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
+    if (!filter_step)
+        return 0;
+
+    QueryPlan::Node * child_node = node->children.front();
+    auto & child = child_node->step;
+
+    if (const auto * adding_const_column = typeid_cast<const AddingConstColumnStep *>(child.get()))
+    {
+
+    }
+}
+
+}
diff --git a/src/Processors/Transforms/AddingMissedTransform.cpp b/src/Processors/Transforms/AddingMissedTransform.cpp
deleted file mode 100644
index 1344cce22a7..00000000000
--- a/src/Processors/Transforms/AddingMissedTransform.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#include <Processors/Transforms/AddingMissedTransform.h>
-#include <Interpreters/addMissingDefaults.h>
-
-
-namespace DB
-{
-
-AddingMissedTransform::AddingMissedTransform(
-    Block header_,
-    Block result_header_,
-    const ColumnsDescription & columns_,
-    const Context & context_)
-    : ISimpleTransform(std::move(header_), std::move(result_header_), false)
-    , columns(columns_), context(context_)
-{
-}
-
-void AddingMissedTransform::transform(Chunk & chunk)
-{
-    auto num_rows = chunk.getNumRows();
-    Block src = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
-
-    auto res = addMissingDefaults(src, getOutputPort().getHeader().getNamesAndTypesList(), columns, context);
-    chunk.setColumns(res.getColumns(), num_rows);
-}
-
-}
diff --git a/src/Processors/Transforms/AddingMissedTransform.h b/src/Processors/Transforms/AddingMissedTransform.h
deleted file mode 100644
index 561f908acef..00000000000
--- a/src/Processors/Transforms/AddingMissedTransform.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#pragma once
-
-#include <Processors/ISimpleTransform.h>
-#include <Storages/ColumnsDescription.h>
-
-
-namespace DB
-{
-
-
-/** This stream adds three types of columns into block
-  * 1. Columns, that are missed inside request, but present in table without defaults (missed columns)
-  * 2. Columns, that are missed inside request, but present in table with defaults (columns with default values)
-  * 3. Columns that materialized from other columns (materialized columns)
-  * All three types of columns are materialized (not constants).
-  */
-class AddingMissedTransform : public ISimpleTransform
-{
-public:
-    AddingMissedTransform(
-        Block header_,
-        Block result_header_,
-        const ColumnsDescription & columns_,
-        const Context & context_);
-
-    String getName() const override { return "AddingMissed"; }
-
-private:
-    void transform(Chunk &) override;
-
-    const ColumnsDescription columns;
-    const Context & context;
-};
-
-
-}
diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index ce74567c62b..bf02a04c704 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -4,7 +4,7 @@
 #include <Interpreters/InterpreterAlterQuery.h>
 #include <Interpreters/castColumn.h>
 #include <Interpreters/evaluateConstantExpression.h>
-#include <Processors/QueryPlan/AddingMissedStep.h>
+#include <Interpreters/addMissingDefaults.h>
 #include <DataStreams/IBlockInputStream.h>
 #include <Storages/StorageBuffer.h>
 #include <Storages/StorageFactory.h>
@@ -246,10 +246,15 @@ void StorageBuffer::read(
                 if (query_plan.isInitialized())
                 {
 
-                    auto adding_missed = std::make_unique<AddingMissedStep>(
+                    auto actions = addMissingDefaults(
+                            query_plan.getCurrentDataStream().header,
+                            header_after_adding_defaults.getNamesAndTypesList(),
+                            metadata_snapshot->getColumns(),
+                            context);
+
+                    auto adding_missed = std::make_unique<ExpressionStep>(
                             query_plan.getCurrentDataStream(),
-                            header_after_adding_defaults,
-                            metadata_snapshot->getColumns(), context);
+                            std::move(actions));
 
                     adding_missed->setStepDescription("Add columns missing in destination table");
                     query_plan.addStep(std::move(adding_missed));

From 7b96ef61e8e3e9d9c74f850375003d025ab9f739 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 5 Feb 2021 14:48:09 +0300
Subject: [PATCH 0706/1238] Add logging if Poco cannot allocate thread in tcp
 server

---
 contrib/poco | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/poco b/contrib/poco
index e11f3c97157..fbaaba4a02e 160000
--- a/contrib/poco
+++ b/contrib/poco
@@ -1 +1 @@
-Subproject commit e11f3c971570cf6a31006cd21cadf41a259c360a
+Subproject commit fbaaba4a02e29987b8c584747a496c79528f125f

From 98cfefdcfbc2aa84422dec9c8710b2892e5f50a6 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 5 Feb 2021 14:49:58 +0300
Subject: [PATCH 0707/1238] Fix build

---
 .../Optimizations/filterPushDown.cpp          | 22 -------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 src/Processors/QueryPlan/Optimizations/filterPushDown.cpp

diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
deleted file mode 100644
index 0d651897bf8..00000000000
--- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <Processors/QueryPlan/Optimizations/Optimizations.h>
-#include <Processors/QueryPlan/FilterStep.h>
-
-namespace DB::QueryPlanOptimizations
-{
-
-size_t tryPushDownLimit(QueryPlan::Node * node, QueryPlan::Nodes &)
-{
-    auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
-    if (!filter_step)
-        return 0;
-
-    QueryPlan::Node * child_node = node->children.front();
-    auto & child = child_node->step;
-
-    if (const auto * adding_const_column = typeid_cast<const AddingConstColumnStep *>(child.get()))
-    {
-
-    }
-}
-
-}

From 01a6e01ad73baf05c10427b8d60075468bd8d150 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Thu, 4 Feb 2021 17:46:36 +0300
Subject: [PATCH 0708/1238] Store usage info flags separate from row refs in
 hash join

---
 src/Common/ColumnsHashing.h           |  41 ++++---
 src/Common/ColumnsHashingImpl.h       |  62 +++++++---
 src/Common/HashTable/FixedHashTable.h |   7 ++
 src/Common/HashTable/HashTable.h      |   7 ++
 src/Interpreters/HashJoin.cpp         | 168 +++++++++++++++++++-------
 src/Interpreters/HashJoin.h           |  91 +++++++-------
 src/Interpreters/joinDispatch.h       |  45 ++++---
 7 files changed, 270 insertions(+), 151 deletions(-)

diff --git a/src/Common/ColumnsHashing.h b/src/Common/ColumnsHashing.h
index a7fcfd4f8c0..b1d25c98955 100644
--- a/src/Common/ColumnsHashing.h
+++ b/src/Common/ColumnsHashing.h
@@ -28,12 +28,12 @@ namespace ColumnsHashing
 
 /// For the case when there is one numeric key.
 /// UInt8/16/32/64 for any type with corresponding bit width.
-template <typename Value, typename Mapped, typename FieldType, bool use_cache = true>
+template <typename Value, typename Mapped, typename FieldType, bool use_cache = true, bool need_offset = false>
 struct HashMethodOneNumber
-    : public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache>, Value, Mapped, use_cache>
+    : public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
 {
-    using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache>;
-    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+    using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
 
     const char * vec;
 
@@ -70,12 +70,12 @@ struct HashMethodOneNumber
 
 
 /// For the case when there is one string key.
-template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
+template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
 struct HashMethodString
-    : public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
+    : public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
 {
-    using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache>;
-    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+    using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
 
     const IColumn::Offset * offsets;
     const UInt8 * chars;
@@ -108,12 +108,13 @@ protected:
 
 
 /// For the case when there is one fixed-length string key.
-template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
+template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
 struct HashMethodFixedString
-    : public columns_hashing_impl::HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
+    : public columns_hashing_impl::
+          HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
 {
-    using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>;
-    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+    using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
 
     size_t n;
     const ColumnFixedString::Chars * chars;
@@ -454,13 +455,13 @@ template <>
 struct LowCardinalityKeys<false> {};
 
 /// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits.
-template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true>
+template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true, bool need_offset = false>
 struct HashMethodKeysFixed
     : private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>
-    , public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>, Value, Mapped, use_cache>
+    , public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
 {
-    using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>;
-    using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+    using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>;
+    using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
     using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>;
 
     static constexpr bool has_nullable_keys = has_nullable_keys_;
@@ -540,13 +541,13 @@ protected:
 };
 
 /// For the case when there is one string key.
-template <typename Value, typename Mapped, bool use_cache = true>
+template <typename Value, typename Mapped, bool use_cache = true, bool need_offset = false>
 struct HashMethodHashed
-    : public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache>, Value, Mapped, use_cache>
+    : public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
 {
     using Key = UInt128;
-    using Self = HashMethodHashed<Value, Mapped, use_cache>;
-    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+    using Self = HashMethodHashed<Value, Mapped, use_cache, need_offset>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
 
     ColumnRawPtrs key_columns;
 
diff --git a/src/Common/ColumnsHashingImpl.h b/src/Common/ColumnsHashingImpl.h
index cdd8dd20dd0..c5068175ffa 100644
--- a/src/Common/ColumnsHashingImpl.h
+++ b/src/Common/ColumnsHashingImpl.h
@@ -87,34 +87,58 @@ public:
     bool isInserted() const { return inserted; }
 };
 
-template <typename Mapped>
-class FindResultImpl
+class FindResultImplBase
 {
-    Mapped * value;
     bool found;
 
 public:
-    FindResultImpl(Mapped * value_, bool found_) : value(value_), found(found_) {}
+    explicit FindResultImplBase(bool found_) : found(found_) {}
     bool isFound() const { return found; }
-    Mapped & getMapped() const { return *value; }
+};
+
+template <bool need_offset = false>
+class FindResultImplOffsetBase
+{
+public:
+    constexpr static bool has_offset = need_offset;
+    explicit FindResultImplOffsetBase(size_t /* off */) {}
 };
 
 template <>
-class FindResultImpl<void>
+class FindResultImplOffsetBase<true>
 {
-    bool found;
-
+    size_t offset;
 public:
-    explicit FindResultImpl(bool found_) : found(found_) {}
-    bool isFound() const { return found; }
+    constexpr static bool has_offset = true;
+
+    explicit FindResultImplOffsetBase(size_t off) : offset(off) {}
+    ALWAYS_INLINE size_t getOffset() const { return offset; }
 };
 
-template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization>
+template <typename Mapped, bool need_offset = false>
+class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBase<need_offset>
+{
+    Mapped * value;
+
+public:
+    FindResultImpl(Mapped * value_, bool found_, size_t off)
+        : FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off), value(value_) {}
+    Mapped & getMapped() const { return *value; }
+};
+
+template <bool need_offset>
+class FindResultImpl<void, need_offset> : public FindResultImplBase, public FindResultImplOffsetBase<need_offset>
+{
+public:
+    FindResultImpl(bool found_, size_t off) : FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off) {}
+};
+
+template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization, bool need_offset = false>
 class HashMethodBase
 {
 public:
     using EmplaceResult = EmplaceResultImpl<Mapped>;
-    using FindResult = FindResultImpl<Mapped>;
+    using FindResult = FindResultImpl<Mapped, need_offset>;
     static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
     using Cache = LastElementCache<Value, consecutive_keys_optimization>;
 
@@ -217,12 +241,13 @@ protected:
     {
         if constexpr (Cache::consecutive_keys_optimization)
         {
+            static_assert(!FindResult::has_offset, "`consecutive_keys_optimization` and `has_offset` are conflicting options");
             if (cache.check(key))
             {
                 if constexpr (has_mapped)
-                    return FindResult(&cache.value.second, cache.found);
+                    return FindResult(&cache.value.second, cache.found, 0);
                 else
-                    return FindResult(cache.found);
+                    return FindResult(cache.found, 0);
             }
         }
 
@@ -247,10 +272,15 @@ protected:
             }
         }
 
+        size_t offset = 0;
+        if constexpr (FindResult::has_offset)
+        {
+            offset = it ? data.offsetInternal(it) : 0;
+        }
         if constexpr (has_mapped)
-            return FindResult(it ? &it->getMapped() : nullptr, it != nullptr);
+            return FindResult(it ? &it->getMapped() : nullptr, it != nullptr, offset);
         else
-            return FindResult(it != nullptr);
+            return FindResult(it != nullptr, offset);
     }
 };
 
diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h
index b3fa5b7a26c..0f88856f83a 100644
--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@@ -476,6 +476,13 @@ public:
 
     size_t getBufferSizeInCells() const { return NUM_CELLS; }
 
+    size_t offsetInternal(ConstLookupResult ptr) const
+    {
+        if (ptr->isZero(*this))
+            return 0;
+        return ptr - buf + 1;
+    }
+
     const Cell * data() const { return buf; }
     Cell * data() { return buf; }
 
diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h
index 15fa09490e6..8b69527e090 100644
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@@ -1214,6 +1214,13 @@ public:
         return grower.bufSize();
     }
 
+    size_t offsetInternal(ConstLookupResult ptr) const
+    {
+        if (ptr->isZero(*this))
+            return 0;
+        return ptr - buf + 1;
+    }
+
 #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
     size_t getCollisions() const
     {
diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index ad1a37c2703..8293f959cd9 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -55,6 +55,57 @@ struct NotProcessedCrossJoin : public ExtraBlock
 
 }
 
+namespace JoinStuff
+{
+    bool JoinUsedFlags::getUsedSafe(size_t i) const
+    {
+        if (flags.empty())
+            return !need_flags;
+        return flags[i].load();
+    }
+
+    template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS>
+    void JoinUsedFlags::reinit(size_t size)
+    {
+        if constexpr (MapGetter<KIND, STRICTNESS>::flagged)
+        {
+            assert(flags.size() <= size);
+            need_flags = true;
+            flags = std::vector<std::atomic_bool>(size);
+        }
+    }
+
+    template <>
+    void JoinUsedFlags::setUsed<false>(size_t i [[maybe_unused]]) {}
+
+    template <>
+    bool JoinUsedFlags::getUsed<false>(size_t i [[maybe_unused]]) { return true; }
+
+    template <>
+    bool JoinUsedFlags::setUsedOnce<false>(size_t i [[maybe_unused]]) { return true; }
+
+    template <>
+    void JoinUsedFlags::setUsed<true>(size_t i)
+    {
+        /// Could be set simultaneously from different threads.
+        flags[i].store(true, std::memory_order_relaxed);
+    }
+
+    template <>
+    bool JoinUsedFlags::getUsed<true>(size_t i) { return flags[i].load(); }
+
+    template <>
+    bool JoinUsedFlags::setUsedOnce<true>(size_t i)
+    {
+        /// fast check to prevent heavy CAS with seq_cst order
+        if (flags[i].load(std::memory_order_relaxed))
+            return false;
+
+        bool expected = false;
+        return flags[i].compare_exchange_strong(expected, true);
+    }
+}
+
 static ColumnPtr filterWithBlanks(ColumnPtr src_column, const IColumn::Filter & filter, bool inverse_filter = false)
 {
     ColumnPtr column = src_column->convertToFullColumnIfConst();
@@ -264,8 +315,8 @@ static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes
 class KeyGetterForDict
 {
 public:
-    using Mapped = JoinStuff::MappedOne;
-    using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped>;
+    using Mapped = RowRef;
+    using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped, true>;
 
     KeyGetterForDict(const ColumnRawPtrs & key_columns_, const Sizes &, void *)
         : key_columns(key_columns_)
@@ -286,7 +337,7 @@ public:
         }
 
         result.row_num = positions[row];
-        return FindResult(&result, found[row]);
+        return FindResult(&result, found[row], 0);
     }
 
 private:
@@ -300,41 +351,43 @@ private:
 template <HashJoin::Type type, typename Value, typename Mapped>
 struct KeyGetterForTypeImpl;
 
+constexpr bool use_offset = true;
+
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::key8, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt8, false>;
+    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt8, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::key16, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt16, false>;
+    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt16, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::key32, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt32, false>;
+    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt32, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::key64, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt64, false>;
+    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt64, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::key_string, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodString<Value, Mapped, true, false>;
+    using Type = ColumnsHashing::HashMethodString<Value, Mapped, true, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::key_fixed_string, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodFixedString<Value, Mapped, true, false>;
+    using Type = ColumnsHashing::HashMethodFixedString<Value, Mapped, true, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::keys128, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt128, Mapped, false, false, false>;
+    using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt128, Mapped, false, false, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::keys256, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodKeysFixed<Value, DummyUInt256, Mapped, false, false, false>;
+    using Type = ColumnsHashing::HashMethodKeysFixed<Value, DummyUInt256, Mapped, false, false, false, use_offset>;
 };
 template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<HashJoin::Type::hashed, Value, Mapped>
 {
-    using Type = ColumnsHashing::HashMethodHashed<Value, Mapped, false>;
+    using Type = ColumnsHashing::HashMethodHashed<Value, Mapped, false, use_offset>;
 };
 
 template <HashJoin::Type type, typename Data>
@@ -463,12 +516,11 @@ namespace
 
 
     template <ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map, bool has_null_map>
-    void NO_INLINE insertFromBlockImplTypeCase(
+    size_t NO_INLINE insertFromBlockImplTypeCase(
         HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns,
         const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
     {
-        [[maybe_unused]] constexpr bool mapped_one = std::is_same_v<typename Map::mapped_type, JoinStuff::MappedOne> ||
-                                    std::is_same_v<typename Map::mapped_type, JoinStuff::MappedOneFlagged>;
+        [[maybe_unused]] constexpr bool mapped_one = std::is_same_v<typename Map::mapped_type, RowRef>;
         constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof;
 
         const IColumn * asof_column [[maybe_unused]] = nullptr;
@@ -489,35 +541,36 @@ namespace
             else
                 Inserter<Map, KeyGetter>::insertAll(join, map, key_getter, stored_block, i, pool);
         }
+        return map.getBufferSizeInCells();
     }
 
 
     template <ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map>
-    void insertFromBlockImplType(
+    size_t insertFromBlockImplType(
         HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns,
         const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
     {
         if (null_map)
-            insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, true>(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
+            return insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, true>(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
         else
-            insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, false>(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
+            return insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, false>(join, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
     }
 
 
     template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
-    void insertFromBlockImpl(
+    size_t insertFromBlockImpl(
         HashJoin & join, HashJoin::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns,
         const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
     {
         switch (type)
         {
-            case HashJoin::Type::EMPTY: break;
-            case HashJoin::Type::CROSS: break; /// Do nothing. We have already saved block, and it is enough.
-            case HashJoin::Type::DICT:  break; /// No one should call it with Type::DICT.
+            case HashJoin::Type::EMPTY: return 0;
+            case HashJoin::Type::CROSS: return 0; /// Do nothing. We have already saved block, and it is enough.
+            case HashJoin::Type::DICT:  return 0; /// No one should call it with Type::DICT.
 
         #define M(TYPE) \
             case HashJoin::Type::TYPE: \
-                insertFromBlockImplType<STRICTNESS, typename KeyGetterForType<HashJoin::Type::TYPE, std::remove_reference_t<decltype(*maps.TYPE)>>::Type>(\
+                return insertFromBlockImplType<STRICTNESS, typename KeyGetterForType<HashJoin::Type::TYPE, std::remove_reference_t<decltype(*maps.TYPE)>>::Type>(\
                     join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, pool); \
                     break;
             APPLY_FOR_JOIN_VARIANTS(M)
@@ -607,9 +660,11 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits)
 
         if (kind != ASTTableJoin::Kind::Cross)
         {
-            joinDispatch(kind, strictness, data->maps, [&](auto, auto strictness_, auto & map)
+            joinDispatch(kind, strictness, data->maps, [&](auto kind_, auto strictness_, auto & map)
             {
-                insertFromBlockImpl<strictness_>(*this, data->type, map, rows, key_columns, key_sizes, stored_block, null_map, data->pool);
+                size_t size = insertFromBlockImpl<strictness_>(*this, data->type, map, rows, key_columns, key_sizes, stored_block, null_map, data->pool);
+                /// Number of buckets + 1 value from zero storage
+                used_flags.reinit<kind_, strictness_>(size + 1);
             });
         }
 
@@ -770,7 +825,11 @@ void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unuse
 /// Joins right table columns which indexes are present in right_indexes using specified map.
 /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS).
 template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map, bool need_filter, bool has_null_map>
-NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map [[maybe_unused]])
+NO_INLINE IColumn::Filter joinRightColumns(
+    const Map & map,
+    AddedColumns & added_columns,
+    const ConstNullMapPtr & null_map [[maybe_unused]],
+    JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]])
 {
     constexpr bool is_any_join = STRICTNESS == ASTTableJoin::Strictness::Any;
     constexpr bool is_all_join = STRICTNESS == ASTTableJoin::Strictness::All;
@@ -781,6 +840,8 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
     constexpr bool right = KIND == ASTTableJoin::Kind::Right;
     constexpr bool full = KIND == ASTTableJoin::Kind::Full;
 
+    constexpr bool need_flags = MapGetter<KIND, STRICTNESS>::flagged;
+
     constexpr bool add_missing = (left || full) && !is_semi_join;
     constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right);
 
@@ -827,7 +888,7 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
                 if (const RowRef * found = mapped.findAsof(asof_type, asof_inequality, left_asof_key, i))
                 {
                     setUsed<need_filter>(filter, i);
-                    mapped.setUsed();
+                    used_flags.template setUsed<need_flags>(find_result.getOffset());
                     added_columns.appendFromBlock<add_missing>(*found->block, found->row_num);
                 }
                 else
@@ -836,13 +897,15 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
             else if constexpr (is_all_join)
             {
                 setUsed<need_filter>(filter, i);
-                mapped.setUsed();
+                used_flags.template setUsed<need_flags>(find_result.getOffset());
                 addFoundRowAll<Map, add_missing>(mapped, added_columns, current_offset);
             }
             else if constexpr ((is_any_join || is_semi_join) && right)
             {
                 /// Use first appeared left key + it needs left columns replication
-                if (mapped.setUsedOnce())
+                bool used_once = used_flags.template setUsedOnce<need_flags>(find_result.getOffset());
+
+                if (used_once)
                 {
                     setUsed<need_filter>(filter, i);
                     addFoundRowAll<Map, add_missing>(mapped, added_columns, current_offset);
@@ -850,8 +913,10 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
             }
             else if constexpr (is_any_join && KIND == ASTTableJoin::Kind::Inner)
             {
+                bool used_once = used_flags.template setUsedOnce<need_flags>(find_result.getOffset());
+
                 /// Use first appeared left key only
-                if (mapped.setUsedOnce())
+                if (used_once)
                 {
                     setUsed<need_filter>(filter, i);
                     added_columns.appendFromBlock<add_missing>(*mapped.block, mapped.row_num);
@@ -863,13 +928,13 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
             }
             else if constexpr (is_anti_join)
             {
-                if constexpr (right)
-                    mapped.setUsed();
+                if constexpr (right && need_flags)
+                    used_flags.template setUsed<need_flags>(find_result.getOffset());
             }
             else /// ANY LEFT, SEMI LEFT, old ANY (RightAny)
             {
                 setUsed<need_filter>(filter, i);
-                mapped.setUsed();
+                used_flags.template setUsed<need_flags>(find_result.getOffset());
                 added_columns.appendFromBlock<add_missing>(*mapped.block, mapped.row_num);
             }
         }
@@ -889,26 +954,28 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
 }
 
 template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map>
-IColumn::Filter joinRightColumnsSwitchNullability(const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map)
+IColumn::Filter joinRightColumnsSwitchNullability(
+    const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map, JoinStuff::JoinUsedFlags & used_flags)
 {
     if (added_columns.need_filter)
     {
         if (null_map)
-            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, true>(map, added_columns, null_map);
+            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, true>(map, added_columns, null_map, used_flags);
         else
-            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, false>(map, added_columns, nullptr);
+            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, false>(map, added_columns, nullptr, used_flags);
     }
     else
     {
         if (null_map)
-            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, true>(map, added_columns, null_map);
+            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, true>(map, added_columns, null_map, used_flags);
         else
-            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, false>(map, added_columns, nullptr);
+            return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, false>(map, added_columns, nullptr, used_flags);
     }
 }
 
 template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename Maps>
-IColumn::Filter switchJoinRightColumns(const Maps & maps_, AddedColumns & added_columns, HashJoin::Type type, const ConstNullMapPtr & null_map)
+IColumn::Filter switchJoinRightColumns(
+    const Maps & maps_, AddedColumns & added_columns, HashJoin::Type type, const ConstNullMapPtr & null_map, JoinStuff::JoinUsedFlags & used_flags)
 {
     switch (type)
     {
@@ -916,7 +983,7 @@ IColumn::Filter switchJoinRightColumns(const Maps & maps_, AddedColumns & added_
         case HashJoin::Type::TYPE: \
             return joinRightColumnsSwitchNullability<KIND, STRICTNESS,\
                 typename KeyGetterForType<HashJoin::Type::TYPE, const std::remove_reference_t<decltype(*maps_.TYPE)>>::Type>(\
-                *maps_.TYPE, added_columns, null_map);
+                *maps_.TYPE, added_columns, null_map, used_flags);
         APPLY_FOR_JOIN_VARIANTS(M)
     #undef M
 
@@ -933,7 +1000,8 @@ IColumn::Filter dictionaryJoinRightColumns(const TableJoin & table_join, AddedCo
         STRICTNESS == ASTTableJoin::Strictness::Semi ||
         STRICTNESS == ASTTableJoin::Strictness::Anti))
     {
-        return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetterForDict>(table_join, added_columns, null_map);
+        JoinStuff::JoinUsedFlags flags;
+        return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetterForDict>(table_join, added_columns, null_map, flags);
     }
 
     throw Exception("Logical error: wrong JOIN combination", ErrorCodes::LOGICAL_ERROR);
@@ -997,7 +1065,7 @@ void HashJoin::joinBlockImpl(
 
     IColumn::Filter row_filter = overDictionary() ?
         dictionaryJoinRightColumns<KIND, STRICTNESS>(*table_join, added_columns, null_map) :
-        switchJoinRightColumns<KIND, STRICTNESS>(maps_, added_columns, data->type, null_map);
+        switchJoinRightColumns<KIND, STRICTNESS>(maps_, added_columns, data->type, null_map, used_flags);
 
     for (size_t i = 0; i < added_columns.size(); ++i)
         block.insert(added_columns.moveColumn(i));
@@ -1276,8 +1344,8 @@ struct AdderNonJoined
 {
     static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right)
     {
-        constexpr bool mapped_asof = std::is_same_v<Mapped, JoinStuff::MappedAsof>;
-        [[maybe_unused]] constexpr bool mapped_one = std::is_same_v<Mapped, JoinStuff::MappedOne> || std::is_same_v<Mapped, JoinStuff::MappedOneFlagged>;
+        constexpr bool mapped_asof = std::is_same_v<Mapped, AsofRowRefs>;
+        [[maybe_unused]] constexpr bool mapped_one = std::is_same_v<Mapped, RowRef>;
 
         if constexpr (mapped_asof)
         {
@@ -1404,7 +1472,8 @@ private:
         {
             const Mapped & mapped = it->getMapped();
 
-            if (mapped.getUsed())
+            size_t off = map.offsetInternal(it.getPtr());
+            if (parent.isUsed(off))
                 continue;
 
             AdderNonJoined<Mapped>::add(mapped, rows_added, columns_keys_and_right);
@@ -1456,4 +1525,13 @@ BlockInputStreamPtr HashJoin::createStreamWithNonJoinedRows(const Block & result
     return {};
 }
 
+void HashJoin::reuseJoinedData(const HashJoin & join)
+{
+    data = join.data;
+    joinDispatch(kind, strictness, data->maps, [this](auto kind_, auto strictness_, auto & map)
+    {
+        used_flags.reinit<kind_, strictness_>(map.getBufferSizeInCells(data->type) + 1);
+    });
+}
+
 }
diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h
index c14ad4e5a99..d9b9d4dd9fa 100644
--- a/src/Interpreters/HashJoin.h
+++ b/src/Interpreters/HashJoin.h
@@ -33,48 +33,32 @@ class DictionaryReader;
 namespace JoinStuff
 {
 
-/// Base class with optional flag attached that's needed to implement RIGHT and FULL JOINs.
-template <typename T, bool with_used>
-struct WithFlags;
-
-template <typename T>
-struct WithFlags<T, true> : T
+/// Flags needed to implement RIGHT and FULL JOINs.
+class JoinUsedFlags
 {
-    using Base = T;
-    using T::T;
+    std::vector<std::atomic_bool> flags;
+    bool need_flags;
 
-    mutable std::atomic<bool> used {};
-    void setUsed() const { used.store(true, std::memory_order_relaxed); }    /// Could be set simultaneously from different threads.
-    bool getUsed() const { return used; }
+public:
 
-    bool setUsedOnce() const
-    {
-        /// fast check to prevent heavy CAS with seq_cst order
-        if (used.load(std::memory_order_relaxed))
-            return false;
+    /// Update size for vector with flags.
+    /// Calling this method invalidates existing flags.
+    /// It can be called several times, but all of them should happen before using this structure.
+    template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS>
+    void reinit(size_t size_);
 
-        bool expected = false;
-        return used.compare_exchange_strong(expected, true);
-    }
+    bool getUsedSafe(size_t i) const;
+
+    template <bool use_flags>
+    void setUsed(size_t i);
+
+    template <bool use_flags>
+    bool getUsed(size_t i);
+
+    template <bool use_flags>
+    bool setUsedOnce(size_t i);
 };
 
-template <typename T>
-struct WithFlags<T, false> : T
-{
-    using Base = T;
-    using T::T;
-
-    void setUsed() const {}
-    bool getUsed() const { return true; }
-    bool setUsedOnce() const { return true; }
-};
-
-using MappedOne =        WithFlags<RowRef, false>;
-using MappedAll =        WithFlags<RowRefList, false>;
-using MappedOneFlagged = WithFlags<RowRef, true>;
-using MappedAllFlagged = WithFlags<RowRefList, true>;
-using MappedAsof =       WithFlags<AsofRowRefs, false>;
-
 }
 
 /** Data structure for implementation of JOIN.
@@ -294,15 +278,30 @@ public:
 
             __builtin_unreachable();
         }
+
+        size_t getBufferSizeInCells(Type which) const
+        {
+            switch (which)
+            {
+                case Type::EMPTY:            return 0;
+                case Type::CROSS:            return 0;
+                case Type::DICT:             return 0;
+
+            #define M(NAME) \
+                case Type::NAME: return NAME ? NAME->getBufferSizeInCells() : 0;
+                APPLY_FOR_JOIN_VARIANTS(M)
+            #undef M
+            }
+
+            __builtin_unreachable();
+        }
     };
 
-    using MapsOne =             MapsTemplate<JoinStuff::MappedOne>;
-    using MapsAll =             MapsTemplate<JoinStuff::MappedAll>;
-    using MapsOneFlagged =      MapsTemplate<JoinStuff::MappedOneFlagged>;
-    using MapsAllFlagged =      MapsTemplate<JoinStuff::MappedAllFlagged>;
-    using MapsAsof =            MapsTemplate<JoinStuff::MappedAsof>;
+    using MapsOne = MapsTemplate<RowRef>;
+    using MapsAll = MapsTemplate<RowRefList>;
+    using MapsAsof = MapsTemplate<AsofRowRefs>;
 
-    using MapsVariant = std::variant<MapsOne, MapsAll, MapsOneFlagged, MapsAllFlagged, MapsAsof>;
+    using MapsVariant = std::variant<MapsOne, MapsAll, MapsAsof>;
     using BlockNullmapList = std::deque<std::pair<const Block *, ColumnPtr>>;
 
     struct RightTableData
@@ -323,16 +322,15 @@ public:
         Arena pool;
     };
 
-    void reuseJoinedData(const HashJoin & join)
-    {
-        data = join.data;
-    }
+    void reuseJoinedData(const HashJoin & join);
 
     std::shared_ptr<RightTableData> getJoinedData() const
     {
         return data;
     }
 
+    bool isUsed(size_t off) const { return used_flags.getUsedSafe(off); }
+
 private:
     friend class NonJoinedBlockInputStream;
     friend class JoinSource;
@@ -352,6 +350,7 @@ private:
 
     /// Right table data. StorageJoin shares it between many Join objects.
     std::shared_ptr<RightTableData> data;
+    mutable JoinStuff::JoinUsedFlags used_flags;
     Sizes key_sizes;
 
     /// Block with columns from the right-side table.
diff --git a/src/Interpreters/joinDispatch.h b/src/Interpreters/joinDispatch.h
index af16550e17e..cac3fcd1e9a 100644
--- a/src/Interpreters/joinDispatch.h
+++ b/src/Interpreters/joinDispatch.h
@@ -15,38 +15,35 @@ namespace DB
 template <ASTTableJoin::Kind kind, typename ASTTableJoin::Strictness>
 struct MapGetter;
 
-template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::RightAny> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::RightAny> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::RightAny> { using Map = HashJoin::MapsOneFlagged; };
-template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::RightAny> { using Map = HashJoin::MapsOneFlagged; };
+template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::RightAny>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::RightAny> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::RightAny> { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; };
+template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::RightAny>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; };
 
-template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Any> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Any> { using Map = HashJoin::MapsOneFlagged; };
-template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Any> { using Map = HashJoin::MapsAllFlagged; };
-template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Any> { using Map = HashJoin::MapsAllFlagged; };
+template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Any>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Any> { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; };
+template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Any> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
+template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Any>  { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
 
-template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::All> { using Map = HashJoin::MapsAll; };
-template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::All> { using Map = HashJoin::MapsAll; };
-template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::All> { using Map = HashJoin::MapsAllFlagged; };
-template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::All> { using Map = HashJoin::MapsAllFlagged; };
+template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::All>  { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::All> { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::All> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
+template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::All>  { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
 
 /// Only SEMI LEFT and SEMI RIGHT are valid. INNER and FULL are here for templates instantiation.
-template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Semi> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Semi> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Semi> { using Map = HashJoin::MapsAllFlagged; };
-template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Semi> { using Map = HashJoin::MapsOne; };
+template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Semi>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Semi> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Semi> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
+template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Semi>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
 
 /// Only SEMI LEFT and SEMI RIGHT are valid. INNER and FULL are here for templates instantiation.
-template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Anti> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Anti> { using Map = HashJoin::MapsOne; };
-template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Anti> { using Map = HashJoin::MapsAllFlagged; };
-template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Anti> { using Map = HashJoin::MapsOne; };
+template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Anti>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Anti> { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
+template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Anti> { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; };
+template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Anti>  { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; };
 
 template <ASTTableJoin::Kind kind>
-struct MapGetter<kind, ASTTableJoin::Strictness::Asof>
-{
-    using Map = HashJoin::MapsAsof;
-};
+struct MapGetter<kind, ASTTableJoin::Strictness::Asof> { using Map = HashJoin::MapsAsof; static constexpr bool flagged = false; };
 
 
 static constexpr std::array<ASTTableJoin::Strictness, 6> STRICTNESSES = {

From 3fc1641d9167a1b285826e21d95305568dc57a34 Mon Sep 17 00:00:00 2001
From: Stig Bakken <stig@stigbakken.com>
Date: Thu, 4 Feb 2021 04:52:06 +0800
Subject: [PATCH 0709/1238] Show details of MaterializeMySQL tables in
 `system.tables`

---
 src/Storages/StorageMaterializeMySQL.cpp                  | 3 +--
 .../materialize_with_ddl.py                               | 8 ++++++++
 tests/integration/test_materialize_mysql_database/test.py | 4 ++++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageMaterializeMySQL.cpp b/src/Storages/StorageMaterializeMySQL.cpp
index 721221e3fdc..e59f1e22958 100644
--- a/src/Storages/StorageMaterializeMySQL.cpp
+++ b/src/Storages/StorageMaterializeMySQL.cpp
@@ -30,9 +30,8 @@ namespace DB
 StorageMaterializeMySQL::StorageMaterializeMySQL(const StoragePtr & nested_storage_, const IDatabase * database_)
     : StorageProxy(nested_storage_->getStorageID()), nested_storage(nested_storage_), database(database_)
 {
-    auto nested_memory_metadata = nested_storage->getInMemoryMetadata();
     StorageInMemoryMetadata in_memory_metadata;
-    in_memory_metadata.setColumns(nested_memory_metadata.getColumns());
+    in_memory_metadata = nested_storage->getInMemoryMetadata();
     setInMemoryMetadata(in_memory_metadata);
 }
 
diff --git a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
index b7f432d963b..c9be2387fc7 100644
--- a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
+++ b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py
@@ -772,3 +772,11 @@ def multi_table_update_test(clickhouse_node, mysql_node, service_name):
 
     check_query(clickhouse_node, "SELECT * FROM multi_table_update.a", "1\tbaz\n")
     check_query(clickhouse_node, "SELECT * FROM multi_table_update.b", "1\tquux\n")
+
+def system_tables_test(clickhouse_node, mysql_node, service_name):
+    mysql_node.query("DROP DATABASE IF EXISTS system_tables_test")
+    clickhouse_node.query("DROP DATABASE IF EXISTS system_tables_test")
+    mysql_node.query("CREATE DATABASE system_tables_test")
+    mysql_node.query("CREATE TABLE system_tables_test.test (id int NOT NULL PRIMARY KEY) ENGINE=InnoDB")
+    clickhouse_node.query("CREATE DATABASE system_tables_test ENGINE = MaterializeMySQL('{}:3306', 'system_tables_test', 'root', 'clickhouse')".format(service_name))
+    check_query(clickhouse_node, "SELECT partition_key, sorting_key, primary_key FROM system.tables WHERE database = 'system_tables_test' AND name = 'test'", "intDiv(id, 4294967)\tid\tid\n")
diff --git a/tests/integration/test_materialize_mysql_database/test.py b/tests/integration/test_materialize_mysql_database/test.py
index 32316901dce..e55772d9e1d 100644
--- a/tests/integration/test_materialize_mysql_database/test.py
+++ b/tests/integration/test_materialize_mysql_database/test.py
@@ -242,3 +242,7 @@ def test_system_parts_table(started_cluster, started_mysql_8_0, clickhouse_node)
 def test_multi_table_update(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node):
     materialize_with_ddl.multi_table_update_test(clickhouse_node, started_mysql_5_7, "mysql1")
     materialize_with_ddl.multi_table_update_test(clickhouse_node, started_mysql_8_0, "mysql8_0")
+
+@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary])
+def test_system_tables_table(started_cluster, started_mysql_8_0, clickhouse_node):
+    materialize_with_ddl.system_tables_test(clickhouse_node, started_mysql_8_0, "mysql8_0")

From 8c49e84668c2c39be8d111795c159c9db4202c21 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 5 Feb 2021 16:42:45 +0300
Subject: [PATCH 0710/1238] Fix test.

---
 src/Interpreters/inplaceBlockConversions.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp
index 3e2f055307a..6a2306fdfc8 100644
--- a/src/Interpreters/inplaceBlockConversions.cpp
+++ b/src/Interpreters/inplaceBlockConversions.cpp
@@ -104,7 +104,7 @@ ActionsDAGPtr createFillingMissingDefaultsExpression(
     if (save_unneeded_columns)
     {
         Names required_names;
-        required_names.resize(required_columns.size());
+        required_names.reserve(required_columns.size());
         for (const auto & column : required_columns)
             required_names.push_back(column.name);
 

From 31384cd652861acbfc1444bbea8822472db46b46 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 5 Feb 2021 17:37:49 +0300
Subject: [PATCH 0711/1238] Fix test.

---
 src/Interpreters/ActionsDAG.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index ed6f8527bc5..4927544a80e 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -626,7 +626,8 @@ void ActionsDAG::addMaterializingOutputActions()
                             std::make_shared<FunctionMaterialize>()));
 
     Index new_index;
-    for (auto * node : index)
+    std::vector<Node *> index_nodes(index.begin(), index.end());
+    for (auto * node : index_nodes)
     {
         auto & name = node->result_name;
         node = &addFunction(func_builder_materialize, {node}, {}, true);

From 872d2a8ad53804cf279ac334918fa060fdae308e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 5 Feb 2021 17:42:41 +0300
Subject: [PATCH 0712/1238] Fix test.

---
 src/Interpreters/inplaceBlockConversions.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp
index 6a2306fdfc8..014fe774655 100644
--- a/src/Interpreters/inplaceBlockConversions.cpp
+++ b/src/Interpreters/inplaceBlockConversions.cpp
@@ -99,7 +99,9 @@ ActionsDAGPtr createFillingMissingDefaultsExpression(
 
     auto syntax_result = TreeRewriter(context).analyze(expr_list, header.getNamesAndTypesList());
     auto expression_analyzer = ExpressionAnalyzer{expr_list, syntax_result, context};
-    auto dag = expression_analyzer.getActionsDAG(true, !save_unneeded_columns);
+    auto dag = std::make_shared<ActionsDAG>(header.getNamesAndTypesList());
+    auto actions = expression_analyzer.getActionsDAG(true, !save_unneeded_columns);
+    dag = ActionsDAG::merge(std::move(*dag), std::move(*actions));
 
     if (save_unneeded_columns)
     {

From 85c175883ef969978472bd8e094d66143723e21e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 5 Feb 2021 18:11:26 +0300
Subject: [PATCH 0713/1238] Rename functions.

---
 src/DataStreams/AddingDefaultsBlockInputStream.cpp | 2 +-
 src/Interpreters/addMissingDefaults.cpp            | 2 +-
 src/Interpreters/inplaceBlockConversions.cpp       | 8 ++++----
 src/Interpreters/inplaceBlockConversions.h         | 5 ++---
 src/Storages/MergeTree/IMergeTreeReader.cpp        | 2 +-
 5 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/src/DataStreams/AddingDefaultsBlockInputStream.cpp
index 045151465b8..4b8dcff1870 100644
--- a/src/DataStreams/AddingDefaultsBlockInputStream.cpp
+++ b/src/DataStreams/AddingDefaultsBlockInputStream.cpp
@@ -171,7 +171,7 @@ Block AddingDefaultsBlockInputStream::readImpl()
     if (!evaluate_block.columns())
         evaluate_block.insert({ColumnConst::create(ColumnUInt8::create(1, 0), res.rows()), std::make_shared<DataTypeUInt8>(), "_dummy"});
 
-    auto dag = createFillingMissingDefaultsExpression(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
+    auto dag = evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
     if (dag)
     {
         auto actions = std::make_shared<ExpressionActions>(std::move(dag));
diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp
index 24772faa20a..dbc6efc2d99 100644
--- a/src/Interpreters/addMissingDefaults.cpp
+++ b/src/Interpreters/addMissingDefaults.cpp
@@ -91,7 +91,7 @@ ActionsDAGPtr addMissingDefaults(
     }
 
     /// Computes explicitly specified values by default and materialized columns.
-    if (auto dag = createFillingMissingDefaultsExpression(header, required_columns, columns, context))
+    if (auto dag = evaluateMissingDefaults(header, required_columns, columns, context))
         actions = ActionsDAG::merge(std::move(*actions), std::move(*dag));
 
     return actions;
diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp
index 014fe774655..61082e0894c 100644
--- a/src/Interpreters/inplaceBlockConversions.cpp
+++ b/src/Interpreters/inplaceBlockConversions.cpp
@@ -87,7 +87,7 @@ ASTPtr convertRequiredExpressions(Block & block, const NamesAndTypesList & requi
     return conversion_expr_list;
 }
 
-ActionsDAGPtr createFillingMissingDefaultsExpression(
+ActionsDAGPtr createExpressions(
     const Block & header,
     ASTPtr expr_list,
     bool save_unneeded_columns,
@@ -125,14 +125,14 @@ void performRequiredConversions(Block & block, const NamesAndTypesList & require
     if (conversion_expr_list->children.empty())
         return;
 
-    if (auto dag = createFillingMissingDefaultsExpression(block, conversion_expr_list, true, required_columns, context))
+    if (auto dag = createExpressions(block, conversion_expr_list, true, required_columns, context))
     {
         auto expression = std::make_shared<ExpressionActions>(std::move(dag));
         expression->execute(block);
     }
 }
 
-ActionsDAGPtr createFillingMissingDefaultsExpression(
+ActionsDAGPtr evaluateMissingDefaults(
     const Block & header,
     const NamesAndTypesList & required_columns,
     const ColumnsDescription & columns,
@@ -142,7 +142,7 @@ ActionsDAGPtr createFillingMissingDefaultsExpression(
         return nullptr;
 
     ASTPtr expr_list = defaultRequiredExpressions(header, required_columns, columns);
-    return createFillingMissingDefaultsExpression(header, expr_list, save_unneeded_columns, required_columns, context);
+    return createExpressions(header, expr_list, save_unneeded_columns, required_columns, context);
 }
 
 }
diff --git a/src/Interpreters/inplaceBlockConversions.h b/src/Interpreters/inplaceBlockConversions.h
index ffba4a45ba1..3c224968315 100644
--- a/src/Interpreters/inplaceBlockConversions.h
+++ b/src/Interpreters/inplaceBlockConversions.h
@@ -15,9 +15,8 @@ class ColumnsDescription;
 class ActionsDAG;
 using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
 
-/// Adds missing defaults to block according to required_columns
-/// using columns description
-ActionsDAGPtr createFillingMissingDefaultsExpression(
+/// Create actions which adds missing defaults to block according to required_columns using columns description.
+ActionsDAGPtr evaluateMissingDefaults(
     const Block & header,
     const NamesAndTypesList & required_columns,
     const ColumnsDescription & columns,
diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp
index 052f6922cac..f28ca28b124 100644
--- a/src/Storages/MergeTree/IMergeTreeReader.cpp
+++ b/src/Storages/MergeTree/IMergeTreeReader.cpp
@@ -186,7 +186,7 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns
             additional_columns.insert({res_columns[pos], name_and_type->type, name_and_type->name});
         }
 
-        auto dag = DB::createFillingMissingDefaultsExpression(
+        auto dag = DB::evaluateMissingDefaults(
                 additional_columns, columns, metadata_snapshot->getColumns(), storage.global_context);
         if (dag)
         {

From 92a4c4e318ac52f0e01da2bd1f2637f93b677f9a Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 5 Feb 2021 18:28:20 +0300
Subject: [PATCH 0714/1238] Add librdkafka to integration tests runner

---
 docker/test/integration/runner/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index f353931f0a0..fb853ecf751 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -27,6 +27,7 @@ RUN apt-get update \
     luajit \
     libssl-dev \
     libcurl4-openssl-dev \
+    librdkafka-dev \
     gdb \
     software-properties-common \
     libkrb5-dev \

From 6247d59c32922985380be690fe134fa5bddc0748 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 5 Feb 2021 18:30:45 +0300
Subject: [PATCH 0715/1238] Use fixed version

---
 docker/test/integration/runner/Dockerfile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index fb853ecf751..502dc3736b2 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -27,7 +27,6 @@ RUN apt-get update \
     luajit \
     libssl-dev \
     libcurl4-openssl-dev \
-    librdkafka-dev \
     gdb \
     software-properties-common \
     libkrb5-dev \
@@ -62,7 +61,7 @@ RUN python3 -m pip install \
     aerospike \
     avro \
     cassandra-driver \
-    confluent-kafka \
+    confluent-kafka==1.5.0 \
     dict2xml \
     dicttoxml \
     docker \

From 4bb38f333be993ecf8f0c5d066433359fd012b72 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 5 Feb 2021 18:34:03 +0300
Subject: [PATCH 0716/1238] some simple cases

---
 src/Processors/Transforms/WindowTransform.cpp | 132 ++++++++++++++----
 src/Processors/Transforms/WindowTransform.h   |  15 +-
 .../01591_window_functions.reference          |  62 ++++++++
 .../0_stateless/01591_window_functions.sql    |  29 ++++
 4 files changed, 204 insertions(+), 34 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 8d5f4450472..568a1c78b0f 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -63,6 +63,35 @@ WindowTransform::WindowTransform(const Block & input_header_,
         order_by_indices.push_back(
             input_header.getPositionByName(column.column_name));
     }
+
+// FIXME this is just all wrong. Disabled desc order for now.
+    if (window_description.frame.type == WindowFrame::FrameType::Range
+        && window_description.order_by.size() == 1
+        && window_description.order_by[0].direction < 0)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+            "ORDER BY DESC for RANGE frames is not implemented");
+    }
+//     // If we have at least one RANGE OFFSET frame boundary, no UNBOUNDED frame
+//     // boundaries, and the ORDER BY is DESC, we have to swap the frame end
+//     // and frame start. This is tricky and I'm not sure how to explain the
+//     // reason, so I can only suggest to draw various RANGE OFFSET frames with
+//     // ASC and DESC orders to understand...
+//     // swap is a no-op if both frames are CURRENT ROW, so use a simpler condition.
+//     auto & frame = window_description.frame;
+//     if (frame.type == WindowFrame::FrameType::Range
+//         && (frame.end_type != WindowFrame::BoundaryType::Unbounded
+//             && frame.begin_type != WindowFrame::BoundaryType::Unbounded)
+//         && window_description.order_by.size() == 1
+//         && window_description.order_by[0].direction < 0)
+//     {
+//         std::swap(frame.begin_type, frame.end_type);
+//         std::swap(frame.begin_preceding, frame.end_preceding);
+//         std::swap(frame.begin_offset, frame.end_offset);
+//
+//         fmt::print(stderr, "swapped frame boundaries\n");
+//     }
+
 }
 
 WindowTransform::~WindowTransform()
@@ -290,21 +319,16 @@ void WindowTransform::advanceFrameStartRowsOffset()
     assert(offset_left >= 0);
 }
 
+// Compares ORDER BY column values at given rows to find the boundaries of frame:
+// [compared] with [reference] +/- offset. Return value is -1/0/+1, like in
+// sorting predicates -- -1 means [compared] is less than [reference] +/- offset.
 template <typename ColumnType>
-static bool isBeforeFrameStart(const ColumnType * compared_column,
+static int compareValuesWithOffset(const ColumnType * compared_column,
     size_t compared_row, const ColumnType * reference_column,
     size_t reference_row,
-    typename ColumnType::ValueType offset, bool is_preceding)
+    typename ColumnType::ValueType offset,
+    bool offset_is_preceding)
 {
-    // The frame is [[current_row] + begin_offset, [current_row] + end_offset]
-    // We need to return "is to the left of frame", that is,
-    // [tested_row] < [current_row] + begin_offset
-    // 1) right side overflows to positive, the condition is
-    // false.
-    // 2) [frame_start] + begin_offset overflows to negative, the condition is
-    // false.
-    // 3) no overflows, perform the comparison normally
-
     const auto compared_value_data = compared_column->getDataAt(compared_row);
     assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
     auto compared_value = unalignedLoad<typename ColumnType::ValueType>(
@@ -317,20 +341,14 @@ static bool isBeforeFrameStart(const ColumnType * compared_column,
 
     bool is_overflow;
     bool overflow_to_negative;
-    if (is_preceding)
+    if (offset_is_preceding)
     {
-        // The frame start is ref] - offset, inclusive. We answer
-        // "is compared row to the left of the frame?", i.e.
-        // [compared] < [ref] - offset.
         is_overflow = __builtin_sub_overflow(reference_value, offset,
             &reference_value);
         overflow_to_negative = offset > 0;
     }
     else
     {
-        // The frame start is [ref] + offset, inclusive. We answer
-        // "is compared row to the left of the frame?", i.e.
-        // [compared] < [ref] + offset.
         is_overflow = __builtin_add_overflow(reference_value, offset,
             &reference_value);
         overflow_to_negative = offset < 0;
@@ -347,41 +365,39 @@ static bool isBeforeFrameStart(const ColumnType * compared_column,
         if (overflow_to_negative)
         {
             // Overflow to the negative, [compared] must be greater.
-            return false;
+            return 1;
         }
         else
         {
             // Overflow to the positive, [compared] must be less.
-            return true;
+            return -1;
         }
     }
     else
     {
         // No overflow, compare normally.
-        return compared_value < reference_value;
+        return compared_value < reference_value ? -1
+            : compared_value == reference_value ? 0 : 1;
     }
 }
 
 template <typename ColumnType>
 void WindowTransform::advanceFrameStartRangeOffset()
 {
-    // [frame_start] + begin_offset < [current_row]
-    // 1) [frame_start] + begin_offset overflows to positive, the condition is
-    // false.
-    // 2) [frame_start] + begin_offset overflows to negative, the condition is
-    // false.
-    // 3) no overflows, perform the comparison normally
-
+    const int direction = window_description.order_by[0].direction;
     const auto * reference_column = assert_cast<const ColumnType *>(
         inputAt(current_row)[order_by_indices[0]].get());
     for (; frame_start < partition_end; advanceRowNumber(frame_start))
     {
+        // The first frame value is [current_row] with offset, so we advance
+        // while [frames_start] < [current_row] with offset.
         const auto * compared_column = assert_cast<const ColumnType *>(
             inputAt(frame_start)[order_by_indices[0]].get());
-        if (!isBeforeFrameStart(compared_column, frame_start.row,
+        if (compareValuesWithOffset(compared_column, frame_start.row,
             reference_column, current_row.row,
             window_description.frame.begin_offset,
-            window_description.frame.begin_preceding))
+            window_description.frame.begin_preceding)
+                * direction >= 0)
         {
             frame_started = true;
             return;
@@ -401,6 +417,10 @@ void WindowTransform::advanceFrameStartRangeOffsetDispatch()
     {
         advanceFrameStartRangeOffset<ColumnVector<UInt8>>();
     }
+    else if(typeid_cast<const ColumnVector<Int8> *>(column))
+    {
+        advanceFrameStartRangeOffset<ColumnVector<Int8>>();
+    }
     else
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED,
@@ -621,6 +641,55 @@ void WindowTransform::advanceFrameEndRowsOffset()
     assert(offset_left >= 0);
 }
 
+template <typename ColumnType>
+void WindowTransform::advanceFrameEndRangeOffset()
+{
+    const int direction = window_description.order_by[0].direction;
+    const auto * reference_column = assert_cast<const ColumnType *>(
+        inputAt(current_row)[order_by_indices[0]].get());
+    for (; frame_end < partition_end; advanceRowNumber(frame_end))
+    {
+        // The last frame value is current_row with offset, and we need a
+        // past-the-end pointer, so we advance while
+        // [frame_end] <= [current_row] with offset.
+        const auto * compared_column = assert_cast<const ColumnType *>(
+            inputAt(frame_end)[order_by_indices[0]].get());
+        if (compareValuesWithOffset(compared_column, frame_end.row,
+            reference_column, current_row.row,
+            window_description.frame.end_offset,
+            window_description.frame.end_preceding)
+                * direction > 0)
+        {
+            frame_ended = true;
+            return;
+        }
+    }
+
+    frame_ended = partition_ended;
+}
+
+void WindowTransform::advanceFrameEndRangeOffsetDispatch()
+{
+    // Dispatch on the type of the ORDER BY column.
+    assert(order_by_indices.size() == 1);
+    const IColumn * column = inputAt(current_row)[order_by_indices[0]].get();
+
+    if (typeid_cast<const ColumnVector<UInt8> *>(column))
+    {
+        advanceFrameEndRangeOffset<ColumnVector<UInt8>>();
+    }
+    else if (typeid_cast<const ColumnVector<Int8> *>(column))
+    {
+        advanceFrameEndRangeOffset<ColumnVector<Int8>>();
+    }
+    else
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+            "The RANGE OFFSET frame end for '{}' ORDER BY column is not implemented",
+            demangle(typeid(*column).name()));
+    }
+}
+
 void WindowTransform::advanceFrameEnd()
 {
     // No reason for this function to be called again after it succeeded.
@@ -642,6 +711,9 @@ void WindowTransform::advanceFrameEnd()
                 case WindowFrame::FrameType::Rows:
                     advanceFrameEndRowsOffset();
                     break;
+                case WindowFrame::FrameType::Range:
+                    advanceFrameEndRangeOffsetDispatch();
+                    break;
                 default:
                     throw Exception(ErrorCodes::NOT_IMPLEMENTED,
                         "The frame end type '{}' is not implemented",
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index b45913965ee..b70f5877007 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -106,16 +106,23 @@ public:
 
 private:
     void advancePartitionEnd();
-    void advanceFrameStart();
+
+    bool arePeers(const RowNumber & x, const RowNumber & y) const;
+
     void advanceFrameStartRowsOffset();
     void advanceFrameStartRangeOffsetDispatch();
-    template <typename T>
+    template <typename ColumnType>
     void advanceFrameStartRangeOffset();
+    void advanceFrameStart();
+
+    void advanceFrameEndRowsOffset();
     void advanceFrameEndCurrentRow();
     void advanceFrameEndUnbounded();
-    void advanceFrameEndRowsOffset();
     void advanceFrameEnd();
-    bool arePeers(const RowNumber & x, const RowNumber & y) const;
+    void advanceFrameEndRangeOffsetDispatch();
+    template <typename ColumnType>
+    void advanceFrameEndRangeOffset();
+
     void updateAggregationState();
     void writeOutCurrentRow();
 
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 1993c59fc8b..b8b3ae17830 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -709,3 +709,65 @@ from numbers(3);
 1	3
 1	3
 1	3
+-- RANGE OFFSET
+-- a basic RANGE OFFSET frame
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11))
+window w as (order by x asc range between 1 preceding and 2 following)
+order by x;
+0	0	2	3
+1	0	3	4
+2	1	4	4
+3	2	5	4
+4	3	6	4
+5	4	7	4
+6	5	8	4
+7	6	9	4
+8	7	10	4
+9	8	10	3
+10	9	10	2
+-- overflow conditions
+select x, min(x) over w, max(x) over w, count(x) over w
+from (
+    select toUInt8(if(mod(number, 2),
+        toInt64(255 - intDiv(number, 2)),
+        toInt64(intDiv(number, 2)))) x
+    from numbers(10)
+)
+window w as (order by x range between 1 preceding and 2 following)
+order by x;
+0	0	2	3
+1	0	3	4
+2	1	4	4
+3	2	4	3
+4	3	4	2
+251	251	253	3
+252	251	254	4
+253	252	255	4
+254	253	255	3
+255	254	255	2
+select x, min(x) over w, max(x) over w, count(x) over w
+from (
+    select toInt8(multiIf(
+        mod(number, 3) == 0, toInt64(intDiv(number, 3)),
+        mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)),
+        toInt64(-128 + intDiv(number, 3)))) x
+    from numbers(15)
+)
+window w as (order by x range between 1 preceding and 2 following)
+order by x;
+-128	-128	-126	3
+-127	-128	-125	4
+-126	-127	-124	4
+-125	-126	-124	3
+-124	-125	-124	2
+0	0	2	3
+1	0	3	4
+2	1	4	4
+3	2	4	3
+4	3	4	2
+123	123	125	3
+124	123	126	4
+125	124	127	4
+126	125	127	3
+127	126	127	2
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 400d4832144..cf2dd28a54c 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -212,3 +212,32 @@ select
     count(*) over (rows between  current row and current row),
     count(*) over (range between  current row and current row)
 from numbers(3);
+
+-- RANGE OFFSET
+-- a basic RANGE OFFSET frame
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11))
+window w as (order by x asc range between 1 preceding and 2 following)
+order by x;
+
+-- overflow conditions
+select x, min(x) over w, max(x) over w, count(x) over w
+from (
+    select toUInt8(if(mod(number, 2),
+        toInt64(255 - intDiv(number, 2)),
+        toInt64(intDiv(number, 2)))) x
+    from numbers(10)
+)
+window w as (order by x range between 1 preceding and 2 following)
+order by x;
+
+select x, min(x) over w, max(x) over w, count(x) over w
+from (
+    select toInt8(multiIf(
+        mod(number, 3) == 0, toInt64(intDiv(number, 3)),
+        mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)),
+        toInt64(-128 + intDiv(number, 3)))) x
+    from numbers(15)
+)
+window w as (order by x range between 1 preceding and 2 following)
+order by x;

From 95e21ba093374e1b637a7189282ec38e6a7471b7 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Fri, 5 Feb 2021 07:53:22 -0800
Subject: [PATCH 0717/1238] Docs - remove duplicate date_diff and minor fixes

---
 .../functions/date-time-functions.md          | 131 +++++-------------
 1 file changed, 34 insertions(+), 97 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index fa4ea7a739e..35c033b5a2c 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -449,17 +449,9 @@ Aliases: `dateAdd`, `DATE_ADD`.
 
 **Parameters**
 
--   `unit` - The unit of time - [String](../syntax.md#syntax-string-literal).
-    Possible values:
+-   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
 
-    - `second`
-    - `minute`
-    - `hour`
-    - `day`
-    - `week`
-    - `month`
-    - `quarter`
-    - `year`
+        Supported values: second, minute, hour, day, week, month, quarter, year.
 -   `value` - Amount of the specified unit of time.    
 -   `date` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 
@@ -476,43 +468,48 @@ select date_add(YEAR, 3, toDate('2018-01-01'));
 └───────────────────────────────────────────────┘
 ```
 
-## date\_diff {#date_diff}
+## date\_diff {#dated_diff}
 
-Returns the difference between two dates in terms of the specified unit.
+Returns the difference between two Date or DateTime values.
 
-**Syntax** 
+**Syntax**
 
 ``` sql
-date_sub(unit, date1, date2)
+date_diff('unit', startdate, enddate, [timezone])
 ```
 
-Aliases: `date_diff`, `DATE_DIFF`. 
-
 **Parameters**
 
--   `unit` - The unit of time - [String](../syntax.md#syntax-string-literal).
-    Possible values:
+-   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
 
-    - `second`
-    - `minute`
-    - `hour`
-    - `day`
-    - `week`
-    - `month`
-    - `quarter`
-    - `year`
--   `date1`,`date2` - Dates or Dates with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+        Supported values: second, minute, hour, day, week, month, quarter, year.
+
+-   `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+
+-   `enddate` — The second time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+
+-   `timezone` — Optional parameter. If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified.
+
+**Returned value**
+
+Difference between `startdate` and `enddate` expressed in `unit`.
+
+Type: `int`.
 
 **Example**
 
-```sql
-select date_diff(MONTH, toDate('2018-12-18'), toDate('2018-01-01'));
+Query:
+
+``` sql
+SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'));
 ```
 
-```text
-┌─dateDiff('month', toDate('2018-12-18'), toDate('2018-01-01'))─┐
-│                                                           -11 │
-└───────────────────────────────────────────────────────────────┘
+Result:
+
+``` text
+┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐
+│                                                                                     25 │
+└────────────────────────────────────────────────────────────────────────────────────────┘
 ```
 
 ## timestamp\_add {#timestamp_add}
@@ -531,17 +528,9 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.
     
 -   `date` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 -   `value` -  Amount of the specified unit of time - [String](../syntax.md#syntax-string-literal)
--   `unit` - The unit of time interval - [String](../syntax.md#syntax-string-literal).
-    Possible values:
+-   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
 
-    - `second`
-    - `minute`
-    - `hour`
-    - `day`
-    - `week`
-    - `month`
-    - `quarter`
-    - `year`
+        Supported values: second, minute, hour, day, week, month, quarter, year.
     
 **Example**
 
@@ -569,17 +558,9 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`.
 
 **Parameters**
 
--   `unit` - The unit of time - [String](../syntax.md#syntax-string-literal).
-    Possible values:
+-   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
 
-    - `second`
-    - `minute`
-    - `hour`
-    - `day`
-    - `week`
-    - `month`
-    - `quarter`
-    - `year`
+        Supported values: second, minute, hour, day, week, month, quarter, year.
 - `value` -  Amount of the specified unit of time. [String](../syntax.md#syntax-string-literal).   
 - `date1`, `date2` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 
@@ -710,50 +691,6 @@ SELECT
 └──────────────────────────┴───────────────────────────────┘
 ```
 
-## dateDiff {#datediff}
-
-Returns the difference between two Date or DateTime values.
-
-**Syntax**
-
-``` sql
-dateDiff('unit', startdate, enddate, [timezone])
-```
-
-**Parameters**
-
--   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-
-        Supported values: second, minute, hour, day, week, month, quarter, year.
-
--   `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
-
--   `enddate` — The second time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
-
--   `timezone` — Optional parameter. If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified.
-
-**Returned value**
-
-Difference between `startdate` and `enddate` expressed in `unit`.
-
-Type: `int`.
-
-**Example**
-
-Query:
-
-``` sql
-SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'));
-```
-
-Result:
-
-``` text
-┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐
-│                                                                                     25 │
-└────────────────────────────────────────────────────────────────────────────────────────┘
-```
-
 ## timeSlots(StartTime, Duration,\[, Size\]) {#timeslotsstarttime-duration-size}
 
 For a time interval starting at ‘StartTime’ and continuing for ‘Duration’ seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the ‘Size’ in seconds. ‘Size’ is an optional parameter: a constant UInt32, set to 1800 by default.

From facdc749cb1322499f21cfd4a2147f2c8c2b53c7 Mon Sep 17 00:00:00 2001
From: Bharat Nallan <bharatnc@gmail.com>
Date: Fri, 5 Feb 2021 07:54:14 -0800
Subject: [PATCH 0718/1238] Docs - commit suggestion

Co-authored-by: vdimir <vdimir@yandex-team.ru>
---
 docs/en/sql-reference/functions/date-time-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 35c033b5a2c..3e85ff42834 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -453,7 +453,7 @@ Aliases: `dateAdd`, `DATE_ADD`.
 
         Supported values: second, minute, hour, day, week, month, quarter, year.
 -   `value` - Amount of the specified unit of time.    
--   `date` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+-   `date` — [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 
 
 **Example**

From 6eb145697e44dd78a2e80cdfdb4fae476b69855f Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Fri, 5 Feb 2021 18:56:24 +0300
Subject: [PATCH 0719/1238] Add benchmark results for Broadwell 8vCPU S3/SSD in
 Yandex.Cloud

---
 .../yandex_cloud_broadwell_8_vcpu.json        | 55 +++++++++++++++++++
 .../yandex_cloud_broadwell_8_vcpu_s3.json     | 55 +++++++++++++++++++
 2 files changed, 110 insertions(+)
 create mode 100644 website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu.json
 create mode 100644 website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu_s3.json

diff --git a/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu.json b/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu.json
new file mode 100644
index 00000000000..1217adbbff5
--- /dev/null
+++ b/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu.json
@@ -0,0 +1,55 @@
+[
+    {
+        "system":       "Yandex Cloud 8vCPU",
+        "system_full":  "Yandex Cloud Broadwell, 8 vCPU (4 threads), 64 GB RAM, 500 GB SSD",
+        "cpu_vendor":   "Intel",
+        "time":         "2021-02-05 00:00:00",
+        "kind":         "cloud",
+        "result":
+        [
+            [0.004, 0.003, 0.003],
+            [0.047, 0.030, 0.021],
+            [0.129, 0.066, 0.067],
+            [0.873, 0.098, 0.095],
+            [0.869, 0.247, 0.257],
+            [1.429, 0.818, 0.768],
+            [0.055, 0.042, 0.043],
+            [0.034, 0.025, 0.024],
+            [1.372, 1.003, 1.051],
+            [1.605, 1.281, 1.209],
+            [0.942, 0.503, 0.483],
+            [0.980, 0.537, 0.558],
+            [2.076, 1.664, 1.635],
+            [3.136, 2.235, 2.171],
+            [2.351, 1.973, 1.974],
+            [2.369, 2.170, 2.133],
+            [6.281, 5.576, 5.498],
+            [3.739, 3.481, 3.354],
+            [10.947, 10.225, 10.271],
+            [0.875, 0.111, 0.108],
+            [10.832, 1.844, 1.877],
+            [12.344, 2.330, 2.227],
+            [22.999, 5.000, 4.903],
+            [20.086, 2.390, 2.278],
+            [3.036, 0.722, 0.673],
+            [1.420, 0.602, 0.578],
+            [3.040, 0.728, 0.714],
+            [10.842, 1.874, 1.783],
+            [9.207, 2.809, 2.705],
+            [2.751, 2.703, 2.714],
+            [2.810, 1.675, 1.568],
+            [6.507, 2.449, 2.505],
+            [15.968, 15.014, 15.318],
+            [13.479, 7.951, 7.702],
+            [13.227, 7.791, 7.699],
+            [2.811, 2.723, 2.549],
+            [0.358, 0.249, 0.273],
+            [0.157, 0.099, 0.101],
+            [0.189, 0.088, 0.080],
+            [0.758, 0.544, 0.525],
+            [0.115, 0.033, 0.027],
+            [0.063, 0.048, 0.023],
+            [0.014, 0.011, 0.008]
+        ]
+    }
+]
diff --git a/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu_s3.json b/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu_s3.json
new file mode 100644
index 00000000000..ace2442c86e
--- /dev/null
+++ b/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu_s3.json
@@ -0,0 +1,55 @@
+[
+    {
+        "system":       "Yandex Cloud 8vCPU Object Storage",
+        "system_full":  "Yandex Cloud Broadwell, 8 vCPU (4 threads), 64 GB RAM, Object Storage",
+        "cpu_vendor":   "Intel",
+        "time":         "2021-02-05 00:00:00",
+        "kind":         "cloud",
+        "result":
+        [
+            [0.007, 0.003, 0.003],
+            [0.214, 0.111, 0.096],
+            [1.239, 1.359, 0.718],
+            [3.056, 3.366, 1.869],
+            [1.946, 1.552, 2.450],
+            [4.804, 2.307, 2.398],
+            [0.198, 0.108, 0.114],
+            [0.141, 0.104, 0.100],
+            [2.755, 2.749, 3.608],
+            [3.140, 3.905, 3.830],
+            [2.353, 4.996, 1.637],
+            [3.796, 1.536, 1.724],
+            [3.565, 3.016, 3.381],
+            [4.962, 4.263, 4.352],
+            [4.210, 3.974, 4.318],
+            [3.884, 3.434, 3.124],
+            [10.451, 9.147, 7.526],
+            [6.288, 5.882, 7.714],
+            [15.239, 33.243, 17.968],
+            [1.645, 1.870, 3.230],
+            [10.980, 8.984, 7.589],
+            [14.345, 11.503, 12.449],
+            [17.687, 17.764, 18.984],
+            [76.606, 65.179, 94.215],
+            [5.833, 3.347, 3.127],
+            [3.815, 2.574, 2.402],
+            [4.916, 6.169, 5.731],
+            [7.961, 9.930, 8.555],
+            [5.995, 7.382, 6.054],
+            [3.113, 4.176, 3.172],
+            [5.077, 5.221, 5.709],
+            [8.990, 9.598, 6.272],
+            [17.832, 17.668, 17.276],
+            [11.846, 14.692, 13.225],
+            [12.544, 12.502, 12.725],
+            [3.604, 4.811, 3.267],
+            [0.738, 0.751, 0.862],
+            [0.718, 0.611, 0.561],
+            [2.125, 0.688, 0.522],
+            [1.469, 1.546, 1.373],
+            [1.382, 1.069, 0.976],
+            [1.353, 1.212, 1.119],
+            [0.045, 0.031, 0.041]
+        ]
+    }
+]

From e8d5fbc0a2dd01e1bb762e0451c5d2a138b2e37c Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Fri, 5 Feb 2021 08:07:02 -0800
Subject: [PATCH 0720/1238] Docs - more minor fixes

---
 .../functions/date-time-functions.md             | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 3e85ff42834..5d96fd78c11 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -456,6 +456,10 @@ Aliases: `dateAdd`, `DATE_ADD`.
 -   `date` — [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 
 
+**Returned value**
+
+Returns Date or DateTime with `value` expressed in `unit` added to `date`. 
+
 **Example**
 
 ```sql
@@ -478,6 +482,8 @@ Returns the difference between two Date or DateTime values.
 date_diff('unit', startdate, enddate, [timezone])
 ```
 
+Aliases: `dateDiff`, `DATE_DIFF`. 
+
 **Parameters**
 
 -   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -531,6 +537,10 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.
 -   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
 
         Supported values: second, minute, hour, day, week, month, quarter, year.
+
+**Returned value**
+
+Returns Date or DateTime with the specified `value`  expressed in `unit` added to `date`. 
     
 **Example**
 
@@ -562,7 +572,11 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`.
 
         Supported values: second, minute, hour, day, week, month, quarter, year.
 - `value` -  Amount of the specified unit of time. [String](../syntax.md#syntax-string-literal).   
-- `date1`, `date2` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+- `date`- [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+
+**Returned value**
+
+Difference between `date` and the specified `value` expressed in `unit`.
 
 **Example**
 

From a7f58da87e2252fb0087b2b79ca780fe6cd8d6ff Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 5 Feb 2021 19:35:21 +0300
Subject: [PATCH 0721/1238] Fix tests.

---
 src/Interpreters/ActionsDAG.cpp         | 31 ++++++++++++++++++-------
 src/Interpreters/ActionsDAG.h           |  7 +++---
 src/Interpreters/addMissingDefaults.cpp | 10 +++-----
 3 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 4927544a80e..884453034e9 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -45,10 +45,10 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
     }
 }
 
-ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace)
+ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace, bool add_to_index)
 {
     auto it = index.find(node.result_name);
-    if (it != index.end() && !can_replace)
+    if (it != index.end() && !can_replace && add_to_index)
         throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
 
     auto & res = nodes.emplace_back(std::move(node));
@@ -56,7 +56,8 @@ ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace)
     if (res.type == ActionType::INPUT)
         inputs.emplace_back(&res);
 
-    index.replace(&res);
+    if (add_to_index)
+        index.replace(&res);
     return res;
 }
 
@@ -90,7 +91,7 @@ const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool
     return addNode(std::move(node), can_replace);
 }
 
-const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, bool can_replace)
+const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, bool can_replace, bool materialize)
 {
     if (!column.column)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name);
@@ -101,7 +102,20 @@ const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, boo
     node.result_name = std::move(column.name);
     node.column = std::move(column.column);
 
-    return addNode(std::move(node), can_replace);
+    auto * res = &addNode(std::move(node), can_replace, !materialize);
+
+    if (materialize)
+    {
+        auto & name = res->result_name;
+
+        FunctionOverloadResolverPtr func_builder_materialize =
+                std::make_shared<FunctionOverloadResolverAdaptor>(
+                        std::make_unique<DefaultOverloadResolver>(
+                                std::make_shared<FunctionMaterialize>()));
+
+        res = &addFunction(func_builder_materialize, {res}, {}, true, false);
+        res = &addAlias(*res, name, true);
+    }
 }
 
 const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace)
@@ -169,7 +183,8 @@ ActionsDAG::Node & ActionsDAG::addFunction(
         const FunctionOverloadResolverPtr & function,
         Inputs children,
         std::string result_name,
-        bool can_replace)
+        bool can_replace,
+        bool add_to_index)
 {
     size_t num_arguments = children.size();
 
@@ -250,7 +265,7 @@ ActionsDAG::Node & ActionsDAG::addFunction(
 
     node.result_name = std::move(result_name);
 
-    return addNode(std::move(node), can_replace);
+    return addNode(std::move(node), can_replace, add_to_index);
 }
 
 
@@ -630,7 +645,7 @@ void ActionsDAG::addMaterializingOutputActions()
     for (auto * node : index_nodes)
     {
         auto & name = node->result_name;
-        node = &addFunction(func_builder_materialize, {node}, {}, true);
+        node = &addFunction(func_builder_materialize, {node}, {}, true, false);
         node = &addAlias(*node, name, true);
         new_index.insert(node);
     }
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index c929f1a4a59..d7463f7e2ed 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -198,7 +198,7 @@ public:
 
     const Node & addInput(std::string name, DataTypePtr type, bool can_replace = false);
     const Node & addInput(ColumnWithTypeAndName column, bool can_replace = false);
-    const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false);
+    const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false, bool materialize = false);
     const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false);
     const Node & addArrayJoin(const std::string & source_name, std::string result_name);
     const Node & addFunction(
@@ -272,7 +272,7 @@ public:
     std::pair<ActionsDAGPtr, ActionsDAGPtr> splitActionsForFilter(const std::string & column_name) const;
 
 private:
-    Node & addNode(Node node, bool can_replace = false);
+    Node & addNode(Node node, bool can_replace = false, bool add_to_index = true);
     Node & getNode(const std::string & name);
 
     Node & addAlias(Node & child, std::string alias, bool can_replace);
@@ -280,7 +280,8 @@ private:
             const FunctionOverloadResolverPtr & function,
             Inputs children,
             std::string result_name,
-            bool can_replace);
+            bool can_replace,
+            bool add_to_index = true);
 
     ActionsDAGPtr cloneEmpty() const
     {
diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp
index dbc6efc2d99..3de96708acb 100644
--- a/src/Interpreters/addMissingDefaults.cpp
+++ b/src/Interpreters/addMissingDefaults.cpp
@@ -74,20 +74,16 @@ ActionsDAGPtr addMissingDefaults(
 
             auto & group = nested_groups[offsets_name];
             group[0] = constant.result_name;
-            const auto & func = actions->addFunction(func_builder_replicate, group, {}, context);
+            actions->addFunction(func_builder_replicate, group, constant.result_name, context);
 
-            actions->addAlias(func.result_name, column.name, true);
             continue;
         }
 
-        auto new_column = column.type->createColumnConstWithDefaultValue(0);
-        const auto * node = &actions->addColumn({std::move(new_column), column.type, column.name}, true);
-
         /** It is necessary to turn a constant column into a full column, since in part of blocks (from other parts),
         *  it can be full (or the interpreter may decide that it is constant everywhere).
         */
-        node = &actions->addFunction(func_builder_materialize, {node->result_name}, {}, context);
-        actions->addAlias(node->result_name, column.name, true);
+        auto new_column = column.type->createColumnConstWithDefaultValue(0);
+        actions->addColumn({std::move(new_column), column.type, column.name}, true, true);
     }
 
     /// Computes explicitly specified values by default and materialized columns.

From 27933e714b956e34a404f1519b7397f3f93d2d7c Mon Sep 17 00:00:00 2001
From: Marquitos <marsanben92@gmail.com>
Date: Fri, 5 Feb 2021 17:39:05 +0100
Subject: [PATCH 0722/1238] Add 'access_management' configuration to initial
 setup

---
 docker/server/README.md     | 8 ++++----
 docker/server/entrypoint.sh | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/docker/server/README.md b/docker/server/README.md
index d8e9204dffa..6f799d68185 100644
--- a/docker/server/README.md
+++ b/docker/server/README.md
@@ -56,7 +56,7 @@ $ echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @-
 20.12.3.3
 ```
 
-### Volumes 
+### Volumes
 
 Typically you may want to mount the following folders inside your container to archieve persistency:
 
@@ -76,7 +76,7 @@ You may also want to mount:
 * `/etc/clickhouse-server/usert.d/*.xml` - files with use settings adjustmenets
 * `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below).
 
-### Linux capabilities 
+### Linux capabilities
 
 ClickHouse has some advanced functionality which requite enabling several [linux capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html).
 
@@ -113,10 +113,10 @@ $ docker run --rm -e CLICKHOUSE_UID=0 -e CLICKHOUSE_GID=0 --name clickhouse-serv
 
 ### How to create default database and user on starting
 
-Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD`:
+Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER`, `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT` and `CLICKHOUSE_PASSWORD`:
 
 ```
-$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server
+$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server
 ```
 
 ## How to extend this image
diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh
index 549ff601c59..0138a165505 100755
--- a/docker/server/entrypoint.sh
+++ b/docker/server/entrypoint.sh
@@ -54,6 +54,7 @@ FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_
 CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
 CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
 CLICKHOUSE_DB="${CLICKHOUSE_DB:-}"
+CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}"
 
 for dir in "$DATA_DIR" \
   "$ERROR_LOG_DIR" \
@@ -97,6 +98,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL
           </networks>
           <password>${CLICKHOUSE_PASSWORD}</password>
           <quota>default</quota>
+          <access_management>${CLICKHOUSE_ACCESS_MANAGEMENT}</access_management>
         </${CLICKHOUSE_USER}>
       </users>
     </yandex>

From e49315e05853e616b07abd5e05ed95b843500100 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 5 Feb 2021 19:52:50 +0300
Subject: [PATCH 0723/1238] Fix tests.

---
 src/Interpreters/ActionsDAG.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 884453034e9..4f4f774a511 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -116,6 +116,8 @@ const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, boo
         res = &addFunction(func_builder_materialize, {res}, {}, true, false);
         res = &addAlias(*res, name, true);
     }
+
+    return *res;
 }
 
 const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace)

From 4906fd9c8c1cbaaed741d1dc015d50198ac093c5 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Fri, 5 Feb 2021 08:55:30 -0800
Subject: [PATCH 0724/1238] Docs - fixes to  doc link

---
 .../functions/date-time-functions.md           | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 5d96fd78c11..173fd8e4af0 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -380,7 +380,7 @@ Alias: `dateTrunc`.
 
 **Parameters**
 
--   `unit` — Part of date. [String](../syntax.md#syntax-string-literal).
+-   `unit` — Part of date. [String](../../sql-reference/data-types/string.md).
     Possible values:
 
     - `second`
@@ -449,10 +449,10 @@ Aliases: `dateAdd`, `DATE_ADD`.
 
 **Parameters**
 
--   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/data-types/string.md).
 
         Supported values: second, minute, hour, day, week, month, quarter, year.
--   `value` - Amount of the specified unit of time.    
+-   `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md)    
 -   `date` — [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 
 
@@ -472,7 +472,7 @@ select date_add(YEAR, 3, toDate('2018-01-01'));
 └───────────────────────────────────────────────┘
 ```
 
-## date\_diff {#dated_diff}
+## date\_diff {#date_diff}
 
 Returns the difference between two Date or DateTime values.
 
@@ -486,7 +486,7 @@ Aliases: `dateDiff`, `DATE_DIFF`.
 
 **Parameters**
 
--   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/data-types/string.md).
 
         Supported values: second, minute, hour, day, week, month, quarter, year.
 
@@ -533,8 +533,8 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.
 **Parameters**
     
 -   `date` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
--   `value` -  Amount of the specified unit of time - [String](../syntax.md#syntax-string-literal)
--   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md)
+-   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/data-types/string.md).
 
         Supported values: second, minute, hour, day, week, month, quarter, year.
 
@@ -568,10 +568,10 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`.
 
 **Parameters**
 
--   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/data-types/string.md).
 
         Supported values: second, minute, hour, day, week, month, quarter, year.
-- `value` -  Amount of the specified unit of time. [String](../syntax.md#syntax-string-literal).   
+- `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md).   
 - `date`- [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 
 **Returned value**

From a761143286617c2c1615d2170edfab8d3da127ba Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 5 Feb 2021 21:19:41 +0300
Subject: [PATCH 0725/1238] Fix tests.

---
 src/Interpreters/addMissingDefaults.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp
index 3de96708acb..fa7ba1be550 100644
--- a/src/Interpreters/addMissingDefaults.cpp
+++ b/src/Interpreters/addMissingDefaults.cpp
@@ -59,7 +59,7 @@ ActionsDAGPtr addMissingDefaults(
     for (const auto & column : required_columns)
     {
         if (header.has(column.name))
-            continue;
+            actions->addInput(header.getByName(column.name));
 
         if (columns.hasDefault(column.name))
             continue;
@@ -87,7 +87,7 @@ ActionsDAGPtr addMissingDefaults(
     }
 
     /// Computes explicitly specified values by default and materialized columns.
-    if (auto dag = evaluateMissingDefaults(header, required_columns, columns, context))
+    if (auto dag = evaluateMissingDefaults(actions->getResultColumns(), required_columns, columns, context))
         actions = ActionsDAG::merge(std::move(*actions), std::move(*dag));
 
     return actions;

From 5aa27a6dd17afdb37987e2e95db46ce19003ec17 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 5 Feb 2021 21:21:48 +0300
Subject: [PATCH 0726/1238] Fix tests.

---
 src/Interpreters/addMissingDefaults.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp
index fa7ba1be550..acf585584bc 100644
--- a/src/Interpreters/addMissingDefaults.cpp
+++ b/src/Interpreters/addMissingDefaults.cpp
@@ -59,7 +59,7 @@ ActionsDAGPtr addMissingDefaults(
     for (const auto & column : required_columns)
     {
         if (header.has(column.name))
-            actions->addInput(header.getByName(column.name));
+            continue;
 
         if (columns.hasDefault(column.name))
             continue;

From 60f2e89cfba58dbaf263a7899cd4a63fedb4b729 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 5 Feb 2021 22:21:30 +0300
Subject: [PATCH 0727/1238] Fix the case when DataType parser may have
 exponential complexity

---
 src/Parsers/ParserDataType.cpp                            | 6 ++++--
 .../01691_parser_data_type_exponential.reference          | 0
 .../0_stateless/01691_parser_data_type_exponential.sh     | 8 ++++++++
 3 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/01691_parser_data_type_exponential.reference
 create mode 100755 tests/queries/0_stateless/01691_parser_data_type_exponential.sh

diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp
index 3d3f393a300..dd495fe6d53 100644
--- a/src/Parsers/ParserDataType.cpp
+++ b/src/Parsers/ParserDataType.cpp
@@ -32,8 +32,10 @@ private:
         const char * operators[] = {"=", "equals", nullptr};
         ParserLeftAssociativeBinaryOperatorList enum_parser(operators, std::make_unique<ParserLiteral>());
 
-        return nested_parser.parse(pos, node, expected)
-            || enum_parser.parse(pos, node, expected)
+        if (pos->type == TokenType::BareWord && std::string_view(pos->begin, pos->size()) == "Nested")
+            return nested_parser.parse(pos, node, expected);
+
+        return enum_parser.parse(pos, node, expected)
             || literal_parser.parse(pos, node, expected)
             || data_type_parser.parse(pos, node, expected);
     }
diff --git a/tests/queries/0_stateless/01691_parser_data_type_exponential.reference b/tests/queries/0_stateless/01691_parser_data_type_exponential.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01691_parser_data_type_exponential.sh b/tests/queries/0_stateless/01691_parser_data_type_exponential.sh
new file mode 100755
index 00000000000..2b1d34982a2
--- /dev/null
+++ b/tests/queries/0_stateless/01691_parser_data_type_exponential.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# Check that DataType parser does not have exponential complexity in the case found by fuzzer.
+for _ in {1..10}; do ${CLICKHOUSE_CLIENT} -n --testmode --query "SELECT CAST(1 AS A2222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222220000000000000000000000000000000000000000000000000000000000000000000000000000002260637443813394204222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpio22222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggre222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 22222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 2222222222222eFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222222222222222222222222200000000000000000000178859639454016722222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpio22222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222222222222222222222222200000000000000000000178859639454016722222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222222222222222222222ggregateFuncpion(groupBitmap, 22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222, 222222222222222ggregateFuncpion(groupBitmap222222222222222222222222222222222222222222222222222222222222222222222222000000000000000000001788596394540167623222222222222222222ggregateFu22222222222222222222222222222222222, UInt33)); -- { clientError 62 }"; done

From d563cfb2be9f361e8231bdb387da51afd43cd3fd Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 5 Feb 2021 22:22:11 +0300
Subject: [PATCH 0728/1238] Fix tests.

---
 src/Interpreters/ActionsDAG.cpp         | 5 +++--
 src/Interpreters/ActionsDAG.h           | 3 ++-
 src/Interpreters/addMissingDefaults.cpp | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 4f4f774a511..88d79325f5c 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -159,7 +159,8 @@ const ActionsDAG::Node & ActionsDAG::addFunction(
         const FunctionOverloadResolverPtr & function,
         const Names & argument_names,
         std::string result_name,
-        const Context & context [[maybe_unused]])
+        const Context & context [[maybe_unused]],
+        bool can_replace)
 {
     const auto & all_settings = context.getSettingsRef();
     settings.max_temporary_columns = all_settings.max_temporary_columns;
@@ -178,7 +179,7 @@ const ActionsDAG::Node & ActionsDAG::addFunction(
     for (const auto & name : argument_names)
         children.push_back(&getNode(name));
 
-    return addFunction(function, children, std::move(result_name), false);
+    return addFunction(function, children, std::move(result_name), can_replace);
 }
 
 ActionsDAG::Node & ActionsDAG::addFunction(
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index d7463f7e2ed..9023e1900d6 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -205,7 +205,8 @@ public:
             const FunctionOverloadResolverPtr & function,
             const Names & argument_names,
             std::string result_name,
-            const Context & context);
+            const Context & context,
+            bool can_replace = false);
 
     /// Call addAlias several times.
     void addAliases(const NamesWithAliases & aliases);
diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp
index acf585584bc..7dfe839b564 100644
--- a/src/Interpreters/addMissingDefaults.cpp
+++ b/src/Interpreters/addMissingDefaults.cpp
@@ -74,7 +74,7 @@ ActionsDAGPtr addMissingDefaults(
 
             auto & group = nested_groups[offsets_name];
             group[0] = constant.result_name;
-            actions->addFunction(func_builder_replicate, group, constant.result_name, context);
+            actions->addFunction(func_builder_replicate, group, constant.result_name, context, true);
 
             continue;
         }

From f81a407cddc69960cbad86bd62c5b6dfac767d4a Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 5 Feb 2021 22:39:26 +0300
Subject: [PATCH 0729/1238] Fix if with tuple then/else arguments

---
 src/Functions/if.cpp                          |  7 ++--
 .../01701_if_tuple_segfault.reference         |  3 ++
 .../0_stateless/01701_if_tuple_segfault.sql   | 33 +++++++++++++++++++
 3 files changed, 40 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/01701_if_tuple_segfault.reference
 create mode 100644 tests/queries/0_stateless/01701_if_tuple_segfault.sql

diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 3be4848f1ff..614bfcf700e 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -532,7 +532,7 @@ private:
         return nullptr;
     }
 
-    ColumnPtr executeTuple(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
+    ColumnPtr executeTuple(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
     {
         /// Calculate function for each corresponding elements of tuples.
 
@@ -558,6 +558,7 @@ private:
 
         const DataTypeTuple & type1 = static_cast<const DataTypeTuple &>(*arg1.type);
         const DataTypeTuple & type2 = static_cast<const DataTypeTuple &>(*arg2.type);
+        const DataTypeTuple & tuple_result = static_cast<const DataTypeTuple &>(*result_type);
 
         ColumnsWithTypeAndName temporary_columns(3);
         temporary_columns[0] = arguments[0];
@@ -570,7 +571,7 @@ private:
             temporary_columns[1] = {col1_contents[i], type1.getElements()[i], {}};
             temporary_columns[2] = {col2_contents[i], type2.getElements()[i], {}};
 
-            tuple_columns[i] = executeImpl(temporary_columns, std::make_shared<DataTypeUInt8>(), input_rows_count);
+            tuple_columns[i] = executeImpl(temporary_columns, tuple_result.getElements()[i], input_rows_count);
         }
 
         return ColumnTuple::create(tuple_columns);
@@ -988,7 +989,7 @@ public:
             || (res = executeTyped<UInt128, UInt128>(cond_col, arguments, result_type, input_rows_count))
             || (res = executeString(cond_col, arguments, result_type))
             || (res = executeGenericArray(cond_col, arguments, result_type))
-            || (res = executeTuple(arguments, input_rows_count))))
+            || (res = executeTuple(arguments, result_type, input_rows_count))))
         {
             return executeGeneric(cond_col, arguments, input_rows_count);
         }
diff --git a/tests/queries/0_stateless/01701_if_tuple_segfault.reference b/tests/queries/0_stateless/01701_if_tuple_segfault.reference
new file mode 100644
index 00000000000..001e50da954
--- /dev/null
+++ b/tests/queries/0_stateless/01701_if_tuple_segfault.reference
@@ -0,0 +1,3 @@
+2020-10-01 19:20:30	hello	([0],45)	45	([0,1,2,3,4,5,6,7,8,9,10,11,12],[45,55,65,75,85,95,105,115,125,135,145,155,165])
+([3],4)
+2020-10-01 19:20:30	hello	([0],45)	5	([0,1,2,3,4,5,6,7,8,9,10,11,12],[22,27,32,37,42,47,52,57,62,67,72,77,82])
diff --git a/tests/queries/0_stateless/01701_if_tuple_segfault.sql b/tests/queries/0_stateless/01701_if_tuple_segfault.sql
new file mode 100644
index 00000000000..93b28c578a9
--- /dev/null
+++ b/tests/queries/0_stateless/01701_if_tuple_segfault.sql
@@ -0,0 +1,33 @@
+DROP TABLE IF EXISTS agg_table;
+
+CREATE TABLE IF NOT EXISTS agg_table
+(
+    time DateTime CODEC(DoubleDelta, LZ4),
+    xxx String,
+    two_values Tuple(Array(UInt16), UInt32),
+    agg_simple SimpleAggregateFunction(sum, UInt64),
+    agg SimpleAggregateFunction(sumMap, Tuple(Array(Int16), Array(UInt64)))
+)
+ENGINE = AggregatingMergeTree()
+ORDER BY (xxx, time);
+
+INSERT INTO agg_table SELECT toDateTime('2020-10-01 19:20:30'), 'hello', ([any(number)], sum(number)), sum(number),
+    sumMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))) FROM numbers(10);
+
+SELECT * FROM agg_table;
+
+SELECT if(xxx = 'x', ([2], 3), ([3], 4)) FROM agg_table;
+
+SELECT if(xxx = 'x', ([2], 3), ([3], 4, 'q', 'w', 7)) FROM agg_table; --{ serverError 386 }
+
+ALTER TABLE agg_table UPDATE two_values = (two_values.1, two_values.2) WHERE time BETWEEN toDateTime('2020-08-01 00:00:00') AND toDateTime('2020-12-01 00:00:00') SETTINGS mutations_sync = 2;
+
+ALTER TABLE agg_table UPDATE agg_simple = 5 WHERE time BETWEEN toDateTime('2020-08-01 00:00:00') AND toDateTime('2020-12-01 00:00:00') SETTINGS mutations_sync = 2;
+
+ALTER TABLE agg_table UPDATE agg = (agg.1, agg.2) WHERE time BETWEEN toDateTime('2020-08-01 00:00:00') AND toDateTime('2020-12-01 00:00:00') SETTINGS mutations_sync = 2;
+
+ALTER TABLE agg_table UPDATE agg = (agg.1, arrayMap(x -> toUInt64(x / 2), agg.2)) WHERE time BETWEEN toDateTime('2020-08-01 00:00:00') AND toDateTime('2020-12-01 00:00:00') SETTINGS mutations_sync = 2;
+
+SELECT * FROM agg_table;
+
+DROP TABLE IF EXISTS agg_table;

From 9d62842a33da9a704b00f074a2c38b05b0e6d378 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 5 Feb 2021 22:52:06 +0300
Subject: [PATCH 0730/1238] Fix tests.

---
 src/Functions/materialize.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Functions/materialize.h b/src/Functions/materialize.h
index ccdbe455c34..5b06ac36da7 100644
--- a/src/Functions/materialize.h
+++ b/src/Functions/materialize.h
@@ -27,6 +27,8 @@ public:
         return name;
     }
 
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
+
     size_t getNumberOfArguments() const override
     {
         return 1;

From 8dc3a207207a600e825088fd1cccbb67189a5248 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Fri, 5 Feb 2021 13:33:47 -0800
Subject: [PATCH 0731/1238] Docs - improve unit description

---
 .../sql-reference/functions/date-time-functions.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 173fd8e4af0..9080d191ce3 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -380,7 +380,7 @@ Alias: `dateTrunc`.
 
 **Parameters**
 
--   `unit` — Part of date. [String](../../sql-reference/data-types/string.md).
+-   `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
     Possible values:
 
     - `second`
@@ -449,7 +449,7 @@ Aliases: `dateAdd`, `DATE_ADD`.
 
 **Parameters**
 
--   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/data-types/string.md).
+-   `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
 
         Supported values: second, minute, hour, day, week, month, quarter, year.
 -   `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md)    
@@ -486,7 +486,7 @@ Aliases: `dateDiff`, `DATE_DIFF`.
 
 **Parameters**
 
--   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/data-types/string.md).
+-   `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
 
         Supported values: second, minute, hour, day, week, month, quarter, year.
 
@@ -534,7 +534,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.
     
 -   `date` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 -   `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md)
--   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/data-types/string.md).
+-   `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
 
         Supported values: second, minute, hour, day, week, month, quarter, year.
 
@@ -568,11 +568,11 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`.
 
 **Parameters**
 
--   `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/data-types/string.md).
+-   `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
 
         Supported values: second, minute, hour, day, week, month, quarter, year.
-- `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md).   
-- `date`- [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+-   `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md).   
+-   `date`- [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 
 **Returned value**
 

From a23853c1415a07cfdcb594f28ffa77f6286ae53b Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Sat, 6 Feb 2021 12:42:07 +0300
Subject: [PATCH 0732/1238] minor fixes

---
 docs/en/engines/database-engines/materialize-mysql.md | 2 +-
 docs/en/operations/settings/settings.md               | 2 +-
 docs/en/sql-reference/statements/create/view.md       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/en/engines/database-engines/materialize-mysql.md b/docs/en/engines/database-engines/materialize-mysql.md
index e459132c0ae..2e361cc82f0 100644
--- a/docs/en/engines/database-engines/materialize-mysql.md
+++ b/docs/en/engines/database-engines/materialize-mysql.md
@@ -93,7 +93,7 @@ ClickHouse has only one physical order, which is determined by `ORDER BY` clause
 - Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine.
 - Replication can be easily broken.
 - Manual operations on database and tables are forbidden.
-- `MaterializeMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) settings. There is a merge of data in the corresponding table in `MaterializeMySQL` database when a table in MySQL server changes.
+- `MaterializeMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged in the corresponding table in the `MaterializeMySQL` database when a table in the MySQL server changes.
 
 ## Examples of Use {#examples-of-use}
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 99bd8d879d1..e5316834381 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2594,7 +2594,7 @@ Default value: `16`.
 
 ## optimize_on_insert {#optimize-on-insert}
 
-Do the same transformation for inserted block of data as if merge was done on this block (e.g. replacing, collapsing, aggregating...).
+Enables or disables data transformation before the insertion, as if merge was done on this block (e.g. replacing, collapsing, aggregating...).
 
 Possible values:
 
diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 596f52202e2..95d66175021 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -59,7 +59,7 @@ A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Note
 
 The execution of [ALTER](../../../sql-reference/statements/alter/index.md) queries on materialized views has limitations, so they might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view.
 
-Note that materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) settings. There is a merge of data before the insertion in a view.
+Note that materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged before the insertion into a view.
 
 Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query.
 

From 09743bdd9f1e040f17f02783e15effce391d376f Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Sat, 6 Feb 2021 12:43:48 +0300
Subject: [PATCH 0733/1238] Added translation

---
 .../database-engines/materialize-mysql.md     |  3 +-
 docs/ru/operations/settings/settings.md       | 54 +++++++++++++++++++
 .../sql-reference/statements/create/view.md   |  2 +
 3 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/docs/ru/engines/database-engines/materialize-mysql.md b/docs/ru/engines/database-engines/materialize-mysql.md
index f23ac0cddd6..415760e271f 100644
--- a/docs/ru/engines/database-engines/materialize-mysql.md
+++ b/docs/ru/engines/database-engines/materialize-mysql.md
@@ -93,6 +93,7 @@ DDL-запросы в MySQL конвертируются в соответств
 - Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializeMySQL`.
 - Репликация может быть легко нарушена.
 - Прямые операции изменения данных в таблицах и базах данных `MaterializeMySQL` запрещены.
+- на работу `MaterializeMySQL` влияет настройка [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert). Когда таблица на MySQL сервере меняется, происходит слияние данных в соответсвующей таблице в базе данных `MaterializeMySQL`.
 
 ## Примеры использования {#examples-of-use}
 
@@ -156,4 +157,4 @@ SELECT * FROM mysql.test;
 └───┴─────┴──────┘
 ```
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/database_engines/materialize-mysql/) <!--hide-->
+[Original article](https://clickhouse.tech/docs/ru/engines/database-engines/materialize-mysql/) <!--hide-->
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index bacc97dfd14..c77bd62b509 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -2473,4 +2473,58 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
 
 Значение по умолчанию: `16`.
 
+## optimize_on_insert {#optimize-on-insert}
+
+Включает или выключает преобразованние данных перед их вставкой, как будто над блоком было произведено слияние (например замена, сворачивание, агрегация...).
+
+Возможные значения:
+
+-   0 — выключена
+-   1 — включена.
+
+Значение по умолчанию: 1.
+
+**Пример**
+
+Разница между включенным и выключенным состояниями:
+
+Запрос:
+
+```sql
+SET optimize_on_insert = 1;
+
+CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable;
+
+INSERT INTO test1 SELECT number % 2 FROM numbers(5);
+
+SELECT * FROM test1;
+
+SET optimize_on_insert = 0;
+
+CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable;
+
+INSERT INTO test2 SELECT number % 2 FROM numbers(5);
+
+SELECT * FROM test2;
+```
+
+Результат:
+
+``` text
+┌─FirstTable─┐
+│          0 │
+│          1 │
+└────────────┘
+
+┌─SecondTable─┐
+│           0 │
+│           0 │
+│           0 │
+│           1 │
+│           1 │
+└─────────────┘
+```
+
+Обратите внимание на то, что эта настройка влияет на поведение [материализованное представление ](../../sql-reference/statements/create/view.md#materialized) и [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md).
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->
diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md
index 09026874948..fb86903c4c7 100644
--- a/docs/ru/sql-reference/statements/create/view.md
+++ b/docs/ru/sql-reference/statements/create/view.md
@@ -56,6 +56,8 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
 
 Недоработано выполнение запросов `ALTER` над материализованными представлениями, поэтому они могут быть неудобными для использования. Если материализованное представление использует конструкцию `TO [db.]name`, то можно выполнить `DETACH` представления, `ALTER` для целевой таблицы и последующий `ATTACH` ранее отсоединенного (`DETACH`) представления.
 
+Обратите внимание, что работа материлизованного представления находится под влиянием настройки [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert). Перед вставкой данных в таблицу происходит слияние данных.
+ 
 Представления выглядят так же, как обычные таблицы. Например, они перечисляются в результате запроса `SHOW TABLES`.
 
 Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`.

From 147cc4ae47ffa0fe3cba1a92665a52e89259981c Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Sat, 6 Feb 2021 12:48:53 +0300
Subject: [PATCH 0734/1238] fixed typos

---
 docs/ru/engines/database-engines/materialize-mysql.md | 2 +-
 docs/ru/operations/settings/settings.md               | 2 +-
 docs/ru/sql-reference/statements/create/view.md       | 3 +--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/ru/engines/database-engines/materialize-mysql.md b/docs/ru/engines/database-engines/materialize-mysql.md
index 415760e271f..71aaf81a5e7 100644
--- a/docs/ru/engines/database-engines/materialize-mysql.md
+++ b/docs/ru/engines/database-engines/materialize-mysql.md
@@ -157,4 +157,4 @@ SELECT * FROM mysql.test;
 └───┴─────┴──────┘
 ```
 
-[Original article](https://clickhouse.tech/docs/ru/engines/database-engines/materialize-mysql/) <!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/database-engines/materialize-mysql/) <!--hide-->
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index c77bd62b509..e7ac91c79c8 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -2486,7 +2486,7 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
 
 **Пример**
 
-Разница между включенным и выключенным состояниями:
+Разница между включенным и выключенным состоянием:
 
 Запрос:
 
diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md
index fb86903c4c7..12bc519252d 100644
--- a/docs/ru/sql-reference/statements/create/view.md
+++ b/docs/ru/sql-reference/statements/create/view.md
@@ -62,5 +62,4 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
 
 Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`.
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) 
-<!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) <!--hide-->

From af26ad6df9dc2fd22ea989d94429f8de4f2019bb Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Sat, 6 Feb 2021 12:49:04 +0300
Subject: [PATCH 0735/1238] Minor changes in missed out date time functions

---
 docs/en/sql-reference/functions/date-time-functions.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 9080d191ce3..2cec116f986 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -380,7 +380,7 @@ Alias: `dateTrunc`.
 
 **Parameters**
 
--   `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
+-   `unit` — The type of interval to truncate the result. [String](../../sql-reference/data-types/string.md).
     Possible values:
 
     - `second`
@@ -486,19 +486,19 @@ Aliases: `dateDiff`, `DATE_DIFF`.
 
 **Parameters**
 
--   `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
+-   `unit` — The type of interval for result [String](../../sql-reference/data-types/string.md).
 
         Supported values: second, minute, hour, day, week, month, quarter, year.
 
--   `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+-   `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 
--   `enddate` — The second time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+-   `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 
 -   `timezone` — Optional parameter. If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified.
 
 **Returned value**
 
-Difference between `startdate` and `enddate` expressed in `unit`.
+Difference between `enddate` and `startdate` expressed in `unit`.
 
 Type: `int`.
 

From de30b1477c2fc8864b45e56f2968da84cf18708e Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Sat, 6 Feb 2021 13:00:39 +0300
Subject: [PATCH 0736/1238] Update view.md

---
 docs/ru/sql-reference/statements/create/view.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md
index 12bc519252d..f4b91b5ae17 100644
--- a/docs/ru/sql-reference/statements/create/view.md
+++ b/docs/ru/sql-reference/statements/create/view.md
@@ -56,7 +56,7 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
 
 Недоработано выполнение запросов `ALTER` над материализованными представлениями, поэтому они могут быть неудобными для использования. Если материализованное представление использует конструкцию `TO [db.]name`, то можно выполнить `DETACH` представления, `ALTER` для целевой таблицы и последующий `ATTACH` ранее отсоединенного (`DETACH`) представления.
 
-Обратите внимание, что работа материлизованного представления находится под влиянием настройки [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert). Перед вставкой данных в таблицу происходит слияние данных.
+Обратите внимание, что работа материлизованного представления находится под влиянием настройки [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert). Перед вставкой данных в таблицу происходит их слияние.
  
 Представления выглядят так же, как обычные таблицы. Например, они перечисляются в результате запроса `SHOW TABLES`.
 

From 34229bf0aa86e0faa77ee17dd06d20959f6c5cf9 Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Sat, 6 Feb 2021 13:01:52 +0300
Subject: [PATCH 0737/1238] Update materialize-mysql.md

---
 docs/ru/engines/database-engines/materialize-mysql.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/engines/database-engines/materialize-mysql.md b/docs/ru/engines/database-engines/materialize-mysql.md
index 71aaf81a5e7..3022542e294 100644
--- a/docs/ru/engines/database-engines/materialize-mysql.md
+++ b/docs/ru/engines/database-engines/materialize-mysql.md
@@ -93,7 +93,7 @@ DDL-запросы в MySQL конвертируются в соответств
 - Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializeMySQL`.
 - Репликация может быть легко нарушена.
 - Прямые операции изменения данных в таблицах и базах данных `MaterializeMySQL` запрещены.
-- на работу `MaterializeMySQL` влияет настройка [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert). Когда таблица на MySQL сервере меняется, происходит слияние данных в соответсвующей таблице в базе данных `MaterializeMySQL`.
+- На работу `MaterializeMySQL` влияет настройка [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert). Когда таблица на MySQL сервере меняется, происходит слияние данных в соответсвующей таблице в базе данных `MaterializeMySQL`.
 
 ## Примеры использования {#examples-of-use}
 

From f4c2048bf21b467fb542ee9a897df2921b500080 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 5 Feb 2021 22:06:22 +0300
Subject: [PATCH 0738/1238] Fix toDateTime64(toDate()/toDateTime()) for
 DateTime64

Maybe this is even does not worth it, but at least there was code that
assume that this should work - ToDateTime64Transform in
FunctionsConversion.h.
---
 src/Functions/DateTimeTransforms.h                          | 6 +++++-
 .../0_stateless/01692_DateTime64_from_DateTime.reference    | 5 +++++
 .../queries/0_stateless/01692_DateTime64_from_DateTime.sql  | 3 +++
 3 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference
 create mode 100644 tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql

diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index b55f78e71bd..333b397312d 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -704,7 +704,11 @@ struct DateTimeTransformImpl
     {
         using Op = Transformer<typename FromDataType::FieldType, typename ToDataType::FieldType, Transform>;
 
-        const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 1, 0);
+        size_t time_zone_argument_position = 1;
+        if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
+            time_zone_argument_position = 2;
+
+        const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_argument_position, 0);
 
         const ColumnPtr source_col = arguments[0].column;
         if (const auto * sources = checkAndGetColumn<typename FromDataType::ColumnType>(source_col.get()))
diff --git a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference
new file mode 100644
index 00000000000..183d6f1222c
--- /dev/null
+++ b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference
@@ -0,0 +1,5 @@
+-- { echo }
+select toDateTime64(toDateTime(1), 2);
+1970-01-01 03:00:01.00
+select toDateTime64(toDate(1), 2);
+1970-01-02 00:00:00.00
diff --git a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql
new file mode 100644
index 00000000000..543c6b373da
--- /dev/null
+++ b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql
@@ -0,0 +1,3 @@
+-- { echo }
+select toDateTime64(toDateTime(1), 2);
+select toDateTime64(toDate(1), 2);

From 0627ba0e36435f73bcb30bb42db5e92e1a89b0af Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 5 Feb 2021 22:06:23 +0300
Subject: [PATCH 0739/1238] Fix timezone argument for DateTime64

It should be marked with always const, otherwise it will bail:

    Code: 44, e.displayText() = DB::Exception: Illegal column String of time zone argument of function, must be constant string: While processing toDateTime(-1, 1, 'GMT'), Stack trace (when copying this message, always include the lines below):
---
 src/Functions/FunctionsConversion.h                        | 7 ++++++-
 .../0_stateless/01692_DateTime64_from_DateTime.reference   | 4 ++++
 .../queries/0_stateless/01692_DateTime64_from_DateTime.sql | 2 ++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index 96e49686526..df58e184d54 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -1294,7 +1294,12 @@ public:
     bool useDefaultImplementationForNulls() const override { return checked_return_type; }
 
     bool useDefaultImplementationForConstants() const override { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override
+    {
+        if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
+            return {2};
+        return {1};
+    }
     bool canBeExecutedOnDefaultArguments() const override { return false; }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
diff --git a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference
index 183d6f1222c..a0562e40027 100644
--- a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference
+++ b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference
@@ -3,3 +3,7 @@ select toDateTime64(toDateTime(1), 2);
 1970-01-01 03:00:01.00
 select toDateTime64(toDate(1), 2);
 1970-01-02 00:00:00.00
+select toDateTime64(toDateTime(1), 2, 'GMT');
+1970-01-01 00:00:01.00
+select toDateTime64(toDate(1), 2, 'GMT');
+1970-01-02 00:00:00.00
diff --git a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql
index 543c6b373da..60f76e9192c 100644
--- a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql
+++ b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql
@@ -1,3 +1,5 @@
 -- { echo }
 select toDateTime64(toDateTime(1), 2);
 select toDateTime64(toDate(1), 2);
+select toDateTime64(toDateTime(1), 2, 'GMT');
+select toDateTime64(toDate(1), 2, 'GMT');

From c4b5eed4ff00506b52c1280380bbf23e911359a3 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 5 Feb 2021 22:06:23 +0300
Subject: [PATCH 0740/1238] Fix DateTime64 initialization (to match DateTime
 behaviour)

There was no specializations for toDateTime64(<numeric>), and because of
this default decimal conversion was used, however this is not enough for
DateTime/DateTime64 types, since the date may overflow and the proper
check is required (like DateTime has), and this what UBsan found [1]:

    ../src/IO/WriteHelpers.h:812:33: runtime error: index 508 out of bounds for type 'const char [201]' Received signal -3 Received signal Unknown signal (-3)

Backtrace:

    (gdb) bt
    0  LocalDateTime::LocalDateTime (this=0x7fffffff8418, year_=1970, month_=1 '\001', day_=1 '\001', hour_=2 '\002', minute_=0 '\000', second_=254 '\376') at LocalDateTime.h:83
    1  0x00000000138a5edb in DB::writeDateTimeText<(char)45, (char)58, (char)32, (char)46> (datetime64=..., scale=7, buf=..., date_lut=...) at WriteHelpers.h:852
    2  0x0000000019c379b4 in DB::DataTypeDateTime64::serializeText (this=0x7ffff5c4b0d8, column=..., row_num=0, ostr=..., settings=...) at DataTypeDateTime64.cpp:66
    3  0x0000000019d297e4 in DB::IDataType::serializeAsText (this=0x7ffff5c4b0d8, column=..., row_num=0, ostr=..., settings=...) at IDataType.cpp:387

  [1]: https://clickhouse-test-reports.s3.yandex.net/19527/cea8ae162ffbf92e5ed29304ab010704c5d611c8/fuzzer_ubsan/report.html#fail1

Also fix CAST for DateTime64
---
 src/Functions/FunctionsConversion.h           | 55 ++++++++++++++++++-
 .../01691_DateTime64_clamp.reference          |  9 +++
 .../0_stateless/01691_DateTime64_clamp.sql    |  5 ++
 3 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01691_DateTime64_clamp.reference
 create mode 100644 tests/queries/0_stateless/01691_DateTime64_clamp.sql

diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index df58e184d54..df0cba4c844 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -477,6 +477,59 @@ template <typename Name> struct ConvertImpl<DataTypeDate, DataTypeDateTime64, Na
 template <typename Name> struct ConvertImpl<DataTypeDateTime, DataTypeDateTime64, Name, ConvertDefaultBehaviorTag>
     : DateTimeTransformImpl<DataTypeDateTime, DataTypeDateTime64, ToDateTime64Transform> {};
 
+/** Conversion of numeric to DateTime64
+  */
+
+template <typename FromType>
+struct ToDateTime64TransformUnsigned
+{
+    static constexpr auto name = "toDateTime64";
+
+    const DateTime64::NativeType scale_multiplier = 1;
+
+    ToDateTime64TransformUnsigned(UInt32 scale = 0)
+        : scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
+    {}
+
+    inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(const FromType & from, const DateLUTImpl &) const
+    {
+        return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(from, 0, scale_multiplier);
+    }
+};
+template <typename FromType>
+struct ToDateTime64TransformSigned
+{
+    static constexpr auto name = "toDateTime64";
+
+    const DateTime64::NativeType scale_multiplier = 1;
+
+    ToDateTime64TransformSigned(UInt32 scale = 0)
+        : scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
+    {}
+
+    inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(const FromType & from, const DateLUTImpl &) const
+    {
+        if (from < 0)
+            return 0;
+        return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(from, 0, scale_multiplier);
+    }
+};
+
+template <typename Name> struct ConvertImpl<DataTypeInt8, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeInt8, DataTypeDateTime64, ToDateTime64TransformSigned<Int8>> {};
+template <typename Name> struct ConvertImpl<DataTypeInt16, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeInt16, DataTypeDateTime64, ToDateTime64TransformSigned<Int16>> {};
+template <typename Name> struct ConvertImpl<DataTypeInt32, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeInt32, DataTypeDateTime64, ToDateTime64TransformSigned<Int32>> {};
+template <typename Name> struct ConvertImpl<DataTypeInt64, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeInt64, DataTypeDateTime64, ToDateTime64TransformSigned<Int64>> {};
+template <typename Name> struct ConvertImpl<DataTypeUInt64, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeUInt64, DataTypeDateTime64, ToDateTime64TransformUnsigned<UInt64>> {};
+template <typename Name> struct ConvertImpl<DataTypeFloat32, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeFloat32, DataTypeDateTime64, ToDateTime64TransformSigned<Float32>> {};
+template <typename Name> struct ConvertImpl<DataTypeFloat64, DataTypeDateTime64, Name>
+    : DateTimeTransformImpl<DataTypeFloat64, DataTypeDateTime64, ToDateTime64TransformSigned<Float64>> {};
+
 /** Conversion of DateTime64 to Date or DateTime: discards fractional part.
  */
 template <typename Transform>
@@ -2318,7 +2371,7 @@ private:
                 using LeftDataType = typename Types::LeftType;
                 using RightDataType = typename Types::RightType;
 
-                if constexpr (IsDataTypeDecimalOrNumber<LeftDataType> && IsDataTypeDecimalOrNumber<RightDataType>)
+                if constexpr (IsDataTypeDecimalOrNumber<LeftDataType> && IsDataTypeDecimalOrNumber<RightDataType> && !std::is_same_v<DataTypeDateTime64, RightDataType>)
                 {
                     if (wrapper_cast_type == CastType::accurate)
                     {
diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.reference b/tests/queries/0_stateless/01691_DateTime64_clamp.reference
new file mode 100644
index 00000000000..1d222e2cb21
--- /dev/null
+++ b/tests/queries/0_stateless/01691_DateTime64_clamp.reference
@@ -0,0 +1,9 @@
+-- { echo }
+SELECT toDateTime(-2, 2);
+1970-01-01 03:00:00.00
+SELECT toDateTime64(-2, 2);
+1970-01-01 03:00:00.00
+SELECT CAST(-1 AS DateTime64);
+1970-01-01 03:00:00.000
+SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64);
+2020-01-01 00:00:00.300
diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.sql b/tests/queries/0_stateless/01691_DateTime64_clamp.sql
new file mode 100644
index 00000000000..7ccce597adf
--- /dev/null
+++ b/tests/queries/0_stateless/01691_DateTime64_clamp.sql
@@ -0,0 +1,5 @@
+-- { echo }
+SELECT toDateTime(-2, 2);
+SELECT toDateTime64(-2, 2);
+SELECT CAST(-1 AS DateTime64);
+SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64);

From cddfc91bcccd9e3cccf77e81fbeb831382432cd6 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Sat, 6 Feb 2021 13:12:17 +0300
Subject: [PATCH 0741/1238] Fixes

---
 .../functions/ip-address-functions.md          | 18 ++++--------------
 .../functions/ip-address-functions.md          | 18 ++++--------------
 2 files changed, 8 insertions(+), 28 deletions(-)

diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md
index b7a47c09d8f..ab64fdc74d5 100644
--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@@ -265,7 +265,7 @@ SELECT toIPv6('127.0.0.1')
 └─────────────────────┘
 ```
 
-## isIPv4String {#isIPv4String}
+## isIPv4String {#isipv4string}
 
 Determines whether the input string is an IPv4 address or not.
 
@@ -277,7 +277,7 @@ isIPv4String(string)
 
 **Parameters**
 
--   `string` — String. [String](../../sql-reference/data-types/string.md).
+-   `string` — IP address. [String](../../sql-reference/data-types/string.md).
 
 **Returned value**
 
@@ -291,8 +291,6 @@ Query:
 
 ```sql
 SELECT isIPv4String('0.0.0.0');
-
-SELECT isIPv4String('Hello');
 ```
 
 Result:
@@ -301,12 +299,9 @@ Result:
 ┌─isIPv4String('0.0.0.0')─┐
 │                       1 │
 └─────────────────────────┘
-┌─isIPv4String('Hello')─┐
-│                     0 │
-└───────────────────────┘
 ```
 
-## isIPv6String {#isIPv4String}
+## isIPv6String {#isipv4string}
 
 Determines whether the input string is an IPv6 address or not. 
 
@@ -318,7 +313,7 @@ isIPv6String(string)
 
 **Parameters**
 
--   `string` — String. [String](../../sql-reference/data-types/string.md).
+-   `string` — IP address. [String](../../sql-reference/data-types/string.md).
 
 **Returned value**
 
@@ -332,8 +327,6 @@ Query:
 
 ``` sql
 SELECT isIPv6String('::ffff:127.0.0.1');
-
-SELECT isIPv6String('Hello');
 ```
 
 Result:
@@ -342,9 +335,6 @@ Result:
 ┌─isIPv6String('::ffff:127.0.0.1')─┐
 │                                1 │
 └──────────────────────────────────┘
-┌─isIPv6String('Hello')─┐
-│                     0 │
-└───────────────────────┘
 ```
 
 [Original article](https://clickhouse.tech/docs/en/query_language/functions/ip_address_functions/) <!--hide-->
diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md
index 640d6d0e4fd..68895aac7a6 100644
--- a/docs/ru/sql-reference/functions/ip-address-functions.md
+++ b/docs/ru/sql-reference/functions/ip-address-functions.md
@@ -243,7 +243,7 @@ SELECT
 └───────────────────────────────────┴──────────────────────────────────┘
 ```
 
-## isIPv4String {#isIPv4String}
+## isIPv4String {#isipv4string}
 
 Определяет, является ли строка адресом IPv4 или нет.
 
@@ -255,7 +255,7 @@ isIPv4String(string)
 
 **Параметры**
 
--   `string` — строка. [String](../../sql-reference/data-types/string.md).
+-   `string` — IP адрес. [String](../../sql-reference/data-types/string.md).
 
 **Возвращаемое значение**
 
@@ -269,8 +269,6 @@ isIPv4String(string)
 
 ```sql
 SELECT isIPv4String('0.0.0.0');
-
-SELECT isIPv4String('Hello');
 ```
 
 Результат:
@@ -279,12 +277,9 @@ SELECT isIPv4String('Hello');
 ┌─isIPv4String('0.0.0.0')─┐
 │                       1 │
 └─────────────────────────┘
-┌─isIPv4String('Hello')─┐
-│                     0 │
-└───────────────────────┘
 ```
 
-## isIPv6String {#isIPv4String}
+## isIPv6String {#isipv4string}
 
 Определяет, является ли строка адресом IPv6 или нет.
 
@@ -296,7 +291,7 @@ isIPv6String(string)
 
 **Параметры**
 
--   `string` — строка. [String](../../sql-reference/data-types/string.md).
+-   `string` — IP адрес. [String](../../sql-reference/data-types/string.md).
 
 **Возвращаемое значение**
 
@@ -310,8 +305,6 @@ isIPv6String(string)
 
 ``` sql
 SELECT isIPv6String('::ffff:127.0.0.1');
-
-SELECT isIPv6String('Hello');
 ```
 
 Результат:
@@ -320,9 +313,6 @@ SELECT isIPv6String('Hello');
 ┌─isIPv6String('::ffff:127.0.0.1')─┐
 │                                1 │
 └──────────────────────────────────┘
-┌─isIPv6String('Hello')─┐
-│                     0 │
-└───────────────────────┘
 ```
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/ip_address_functions/) <!--hide-->

From 9b75e5d4119dbbc2ba8b213cf8f3ad4bba700098 Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Sat, 6 Feb 2021 13:17:14 +0300
Subject: [PATCH 0742/1238] Update docs/ru/operations/settings/settings.md

Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
 docs/ru/operations/settings/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index e7ac91c79c8..d827fd6f3ca 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -2475,7 +2475,7 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
 
 ## optimize_on_insert {#optimize-on-insert}
 
-Включает или выключает преобразованние данных перед их вставкой, как будто над блоком было произведено слияние (например замена, сворачивание, агрегация...).
+Включает или выключает преобразование данных перед добавлением в таблицу, как будто над добавляемым блоком предварительно было произведено слияние (например, выполняются замена, сворачивание, агрегирование данных).
 
 Возможные значения:
 

From 951a18983006539a91e43dda5d38de9c8e26170a Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Sat, 6 Feb 2021 13:17:24 +0300
Subject: [PATCH 0743/1238] Update docs/ru/operations/settings/settings.md

Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
 docs/ru/operations/settings/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index d827fd6f3ca..2d2bbb02852 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -2486,7 +2486,7 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
 
 **Пример**
 
-Разница между включенным и выключенным состоянием:
+Сравните добавление данных при включенной и выключенной настройке:
 
 Запрос:
 

From 6c91e744e4c9ec9593c5f3cff64c17b18fb50ab6 Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Sat, 6 Feb 2021 13:17:36 +0300
Subject: [PATCH 0744/1238] Update docs/ru/operations/settings/settings.md

Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
 docs/ru/operations/settings/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 2d2bbb02852..1d175c5fe79 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -2525,6 +2525,6 @@ SELECT * FROM test2;
 └─────────────┘
 ```
 
-Обратите внимание на то, что эта настройка влияет на поведение [материализованное представление ](../../sql-reference/statements/create/view.md#materialized) и [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md).
+Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md).
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->

From 4222b5ece6903f26b9efb93b73981465cd23b924 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Sat, 6 Feb 2021 13:23:57 +0300
Subject: [PATCH 0745/1238] Fixes

---
 .../aggregate-functions/reference/argmax.md          |  4 ----
 .../aggregate-functions/reference/argmin.md          | 12 ++++--------
 .../aggregate-functions/reference/argmax.md          |  6 +-----
 .../aggregate-functions/reference/argmin.md          |  6 +-----
 4 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
index fe30de95eab..8412e00458f 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
@@ -25,10 +25,6 @@ argMax(tuple(arg, val))
 -   `arg` — Argument.
 -   `val` — Value.
 
-or 
-
--   `tuple(arg, val)` — [Tuple](../../../sql-reference/data-types/tuple.md).
-
 **Returned value**
 
 -   `arg` value that corresponds to maximum `val` value.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
index 484c1c1161f..0c5b39aebe3 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
@@ -4,9 +4,9 @@ toc_priority: 105
 
 # argMin {#agg-function-argmin}
 
-Calculates the `arg` value for a minimal `val` value. If there are several different values of `arg` for minimal values of `val`, returns the first of these values encountered.
+Calculates the `arg` value for a minimum `val` value. If there are several different values of `arg` for minimum values of `val`, returns the first of these values encountered.
 
-Tuple version of this function will return the tuple with the minimal `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
+Tuple version of this function will return the tuple with the minimum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
 
 **Syntax**
 
@@ -25,19 +25,15 @@ argMin(tuple(arg, val))
 -   `arg` — Argument.
 -   `val` — Value.
 
-or 
-
--   `tuple(arg, val)` — [Tuple](../../../sql-reference/data-types/tuple.md).
-
 **Returned value**
 
--   `arg` value that corresponds to minimal `val` value.
+-   `arg` value that corresponds to minimum `val` value.
 
 Type: matches `arg` type. 
 
 For tuple in the input:
 
--   Tuple with minimal `val` value.
+-   Tuple with minimum `val` value.
 
 Type: [Tuple](../../../sql-reference/data-types/tuple.md).
 
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
index 2ff7afb4ad6..35167512d36 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
@@ -25,10 +25,6 @@ argMax(tuple(arg, val))
 -   `arg` — аргумент.
 -   `val` — значение.
 
-или
-
--   `tuple(arg, val)` — [Tuple](../../../sql-reference/data-types/tuple.md).
-
 **Возвращаемое значение**
 
 -   Значение `arg`, соответствующее максимальному значению `val`.
@@ -43,7 +39,7 @@ argMax(tuple(arg, val))
 
 **Пример**
 
-Таблица ввода:
+Исходная таблица:
 
 ``` text
 ┌─user─────┬─salary─┐
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
index 2d4095db30b..63b7d5c8200 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
@@ -25,10 +25,6 @@ argMin(tuple(arg, val))
 -   `arg` — аргумент.
 -   `val` — значение.
 
-или
-
--   `tuple(arg, val)` — [Tuple](../../../sql-reference/data-types/tuple.md).
-
 **Возвращаемое значение**
 
 -   Значение `arg`, соответствующее минимальному значению `val`.
@@ -43,7 +39,7 @@ argMin(tuple(arg, val))
 
 **Пример**
 
-Таблица ввода:
+Исходная таблица:
 
 ``` text
 ┌─user─────┬─salary─┐

From 4092916db68de1264bb2afb3f1a27f681c43e28c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 6 Feb 2021 14:56:05 +0300
Subject: [PATCH 0746/1238] Useless changes

---
 src/Common/ThreadProfileEvents.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp
index e6336baecda..327178c92ff 100644
--- a/src/Common/ThreadProfileEvents.cpp
+++ b/src/Common/ThreadProfileEvents.cpp
@@ -68,7 +68,7 @@ TasksStatsCounters::TasksStatsCounters(const UInt64 tid, const MetricsProvider p
     case MetricsProvider::Netlink:
         stats_getter = [metrics_provider = std::make_shared<TaskStatsInfoGetter>(), tid]()
                 {
-                    ::taskstats result;
+                    ::taskstats result{};
                     metrics_provider->getStat(result, tid);
                     return result;
                 };
@@ -76,7 +76,7 @@ TasksStatsCounters::TasksStatsCounters(const UInt64 tid, const MetricsProvider p
     case MetricsProvider::Procfs:
         stats_getter = [metrics_provider = std::make_shared<ProcfsMetricsProvider>(tid)]()
                 {
-                    ::taskstats result;
+                    ::taskstats result{};
                     metrics_provider->getTaskStats(result);
                     return result;
                 };

From a790cd9bc5e96be05052201eae5fc65f91b20055 Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Sat, 6 Feb 2021 16:37:12 +0300
Subject: [PATCH 0747/1238] Fixes

---
 docs/en/sql-reference/functions/array-functions.md | 6 +++---
 docs/ru/sql-reference/functions/array-functions.md | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index be6440bbe9c..d5b357795d7 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -1311,7 +1311,7 @@ arrayMin([func,] arr)
 
 -   The minimum of function values (or the array minimum). 
 
-Type: matches the array elements type. 
+Type: if `func` is specified, matches `func` return value type, else matches the array elements type. 
 
 **Examples**
 
@@ -1366,7 +1366,7 @@ arrayMax([func,] arr)
 
 -   The maximum of function values (or the array maximum). 
 
-Type: matches the array elements type. 
+Type: if `func` is specified, matches `func` return value type, else matches the array elements type. 
 
 **Examples**
 
@@ -1421,7 +1421,7 @@ arraySum([func,] arr)
 
 -   The sum of the function values (or the array sum).
 
-Type: for decimal numbers in source array — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](../../sql-reference/data-types/int-uint.md).
+Type: for decimal numbers in source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md
index 3bba6f799c3..82976af5fbc 100644
--- a/docs/ru/sql-reference/functions/array-functions.md
+++ b/docs/ru/sql-reference/functions/array-functions.md
@@ -1158,7 +1158,7 @@ arrayMin([func,] arr)
 
 -   Минимальное значение функции (или минимальный элемент массива).
 
-Тип: соответствует типу элементов массива.
+Тип: если передана `func`, соответствует типу ее возвращаемого значения, иначе соответствует типу элементов массива.
 
 **Примеры**
 
@@ -1213,7 +1213,7 @@ arrayMax([func,] arr)
 
 -   Максимальное значение функции (или максимальный элемент массива).
 
-Тип: соответствует типу элементов массива.
+Тип: если передана `func`, соответствует типу ее возвращаемого значения, иначе соответствует типу элементов массива.
 
 **Примеры**
 
@@ -1268,7 +1268,7 @@ arraySum([func,] arr)
 
 -   Сумма значений функции (или сумма элементов массива).
 
-Тип: для Decimal чисел в исходном массиве — [Decimal128](../../sql-reference/data-types/decimal.md), для чисел с плавающей точкой — [Float64](../../sql-reference/data-types/float.md), для беззнаковых целых чисел — [UInt64](../../sql-reference/data-types/int-uint.md), для целых чисел со знаком — [Int64](../../sql-reference/data-types/int-uint.md).
+Тип: для Decimal чисел в исходном массиве (если функция `func` была передана, то для чисел, преобразованных ею) — [Decimal128](../../sql-reference/data-types/decimal.md), для чисел с плавающей точкой — [Float64](../../sql-reference/data-types/float.md), для беззнаковых целых чисел — [UInt64](../../sql-reference/data-types/int-uint.md), для целых чисел со знаком — [Int64](../../sql-reference/data-types/int-uint.md).
 
 **Примеры**
 

From 529fb1ea49d2e0071ecaa938d04a65be42e75324 Mon Sep 17 00:00:00 2001
From: feng lv <fenglv15@mails.ucas.ac.cn>
Date: Sat, 6 Feb 2021 14:59:48 +0000
Subject: [PATCH 0748/1238] remove some useless code

---
 .../RewriteSumIfFunctionVisitor.cpp           | 20 ++++---------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/src/Interpreters/RewriteSumIfFunctionVisitor.cpp b/src/Interpreters/RewriteSumIfFunctionVisitor.cpp
index 2fb0765db13..2593c220c63 100644
--- a/src/Interpreters/RewriteSumIfFunctionVisitor.cpp
+++ b/src/Interpreters/RewriteSumIfFunctionVisitor.cpp
@@ -13,18 +13,6 @@ void RewriteSumIfFunctionMatcher::visit(ASTPtr & ast, Data & data)
         visit(*func, ast, data);
 }
 
-static ASTPtr createNewFunctionWithOneArgument(const String & func_name, const ASTPtr & argument)
-{
-    auto new_func = std::make_shared<ASTFunction>();
-    new_func->name = func_name;
-
-    auto new_arguments = std::make_shared<ASTExpressionList>();
-    new_arguments->children.push_back(argument);
-    new_func->arguments = new_arguments;
-    new_func->children.push_back(new_arguments);
-    return new_func;
-}
-
 void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data &)
 {
     if (!func.arguments || func.arguments->children.empty())
@@ -46,7 +34,7 @@ void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast,
 
         if (func_arguments.size() == 2 && literal->value.get<UInt64>() == 1)
         {
-            auto new_func = createNewFunctionWithOneArgument("countIf", func_arguments[1]);
+            auto new_func = makeASTFunction("countIf", func_arguments[1]);
             new_func->setAlias(func.alias);
             ast = std::move(new_func);
             return;
@@ -74,7 +62,7 @@ void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast,
             /// sum(if(cond, 1, 0)) -> countIf(cond)
             if (first_value == 1 && second_value == 0)
             {
-                auto new_func = createNewFunctionWithOneArgument("countIf", if_arguments[0]);
+                auto new_func = makeASTFunction("countIf", if_arguments[0]);
                 new_func->setAlias(func.alias);
                 ast = std::move(new_func);
                 return;
@@ -82,8 +70,8 @@ void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast,
             /// sum(if(cond, 0, 1)) -> countIf(not(cond))
             if (first_value == 0 && second_value == 1)
             {
-                auto not_func = createNewFunctionWithOneArgument("not", if_arguments[0]);
-                auto new_func = createNewFunctionWithOneArgument("countIf", not_func);
+                auto not_func = makeASTFunction("not", if_arguments[0]);
+                auto new_func = makeASTFunction("countIf", not_func);
                 new_func->setAlias(func.alias);
                 ast = std::move(new_func);
                 return;

From 0cd36019280bc243d9b5bb926cfa2ec08ccaf623 Mon Sep 17 00:00:00 2001
From: Maxim Akhmedov <max42@yandex-team.ru>
Date: Sat, 6 Feb 2021 18:08:42 +0300
Subject: [PATCH 0749/1238] Allow using MergeTreeWhereOptimizer not only with
 MergeTree-based storages

---
 src/Interpreters/InterpreterSelectQuery.cpp    |  9 +++++++--
 .../MergeTree/MergeTreeWhereOptimizer.cpp      | 18 +++++-------------
 .../MergeTree/MergeTreeWhereOptimizer.h        |  4 +---
 3 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 2ee1b3956e4..6122719d94e 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -390,13 +390,18 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         if (try_move_to_prewhere && storage && !row_policy_filter && query.where() && !query.prewhere() && !query.final())
         {
             /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
-            if (const auto * merge_tree = dynamic_cast<const MergeTreeData *>(storage.get()))
+            if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())
             {
+                /// Extract column compressed sizes.
+                std::unordered_map<std::string, UInt64> column_compressed_sizes;
+                for (const auto & [name, sizes] : column_sizes)
+                    column_compressed_sizes[name] = sizes.data_compressed;
+
                 SelectQueryInfo current_info;
                 current_info.query = query_ptr;
                 current_info.syntax_analyzer_result = syntax_analyzer_result;
 
-                MergeTreeWhereOptimizer{current_info, *context, *merge_tree, metadata_snapshot, syntax_analyzer_result->requiredSourceColumns(), log};
+                MergeTreeWhereOptimizer{current_info, *context, std::move(column_compressed_sizes), metadata_snapshot, syntax_analyzer_result->requiredSourceColumns(), log};
             }
         }
 
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 5d6b74cabe9..34cac56d74c 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -30,7 +30,7 @@ static constexpr auto threshold = 2;
 MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
     SelectQueryInfo & query_info,
     const Context & context,
-    const MergeTreeData & data,
+    std::unordered_map<std::string, UInt64> column_sizes_,
     const StorageMetadataPtr & metadata_snapshot,
     const Names & queried_columns_,
     Poco::Logger * log_)
@@ -39,28 +39,20 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
     , queried_columns{queried_columns_}
     , block_with_constants{KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)}
     , log{log_}
+    , column_sizes{std::move(column_sizes_)}
 {
     const auto & primary_key = metadata_snapshot->getPrimaryKey();
     if (!primary_key.column_names.empty())
         first_primary_key_column = primary_key.column_names[0];
 
-    calculateColumnSizes(data, queried_columns);
+    for (const auto & [_, size] : column_sizes)
+        total_size_of_queried_columns += size;
+
     determineArrayJoinedNames(query_info.query->as<ASTSelectQuery &>());
     optimize(query_info.query->as<ASTSelectQuery &>());
 }
 
 
-void MergeTreeWhereOptimizer::calculateColumnSizes(const MergeTreeData & data, const Names & column_names)
-{
-    for (const auto & column_name : column_names)
-    {
-        UInt64 size = data.getColumnCompressedSize(column_name);
-        column_sizes[column_name] = size;
-        total_size_of_queried_columns += size;
-    }
-}
-
-
 static void collectIdentifiersNoSubqueries(const ASTPtr & ast, NameSet & set)
 {
     if (auto opt_name = tryGetIdentifierName(ast))
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
index 939c265b3e5..cad77fb9eed 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@@ -33,7 +33,7 @@ public:
     MergeTreeWhereOptimizer(
         SelectQueryInfo & query_info,
         const Context & context,
-        const MergeTreeData & data,
+        std::unordered_map<std::string, UInt64> column_sizes_,
         const StorageMetadataPtr & metadata_snapshot,
         const Names & queried_columns_,
         Poco::Logger * log_);
@@ -75,8 +75,6 @@ private:
     /// Transform Conditions list to WHERE or PREWHERE expression.
     static ASTPtr reconstruct(const Conditions & conditions);
 
-    void calculateColumnSizes(const MergeTreeData & data, const Names & column_names);
-
     void optimizeConjunction(ASTSelectQuery & select, ASTFunction * const fun) const;
 
     void optimizeArbitrary(ASTSelectQuery & select) const;

From 8953fe1eb29774689e3163c161323109526c072c Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Sat, 6 Feb 2021 19:30:46 +0300
Subject: [PATCH 0750/1238] Fix seekable buffer

---
 src/IO/ReadBufferFromFileDescriptor.cpp | 1 +
 src/IO/SeekableReadBuffer.h             | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp
index 0ab07b85027..dd5d9e67cd7 100644
--- a/src/IO/ReadBufferFromFileDescriptor.cpp
+++ b/src/IO/ReadBufferFromFileDescriptor.cpp
@@ -90,6 +90,7 @@ bool ReadBufferFromFileDescriptor::nextImpl()
     if (bytes_read)
     {
         ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read);
+        working_buffer = internal_buffer;
         working_buffer.resize(bytes_read);
     }
     else
diff --git a/src/IO/SeekableReadBuffer.h b/src/IO/SeekableReadBuffer.h
index f7a468b0490..f8e6d817fb1 100644
--- a/src/IO/SeekableReadBuffer.h
+++ b/src/IO/SeekableReadBuffer.h
@@ -21,6 +21,12 @@ public:
      */
     virtual off_t seek(off_t off, int whence) = 0;
 
+    /**
+     * Keep in mind that seekable buffer may encounter eof() once and the working buffer
+     * may get into inconsistent state. Don't forget to reset it on the first nextImpl()
+     * after seek().
+     */
+
     /**
      * @return Offset from the begin of the underlying buffer / file corresponds to the buffer current position.
      */

From f0370b241c341ce961bac516afbd909631ec6b3d Mon Sep 17 00:00:00 2001
From: Dmitriy <sevirov@yandex-team.ru>
Date: Sat, 6 Feb 2021 20:17:25 +0300
Subject: [PATCH 0751/1238] Document the opentelemetry_start_trace_probability
 setting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Задокументировал настройку.
---
 docs/en/operations/settings/settings.md | 11 +++++++++++
 docs/ru/operations/settings/settings.md | 11 +++++++++++
 2 files changed, 22 insertions(+)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index edfd391c71e..869c76fb975 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2592,4 +2592,15 @@ Possible values:
 
 Default value: `16`.
 
+## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability}
+
+Enables a trace for executed queries.
+
+Possible values:
+
+-   0 — The trace for a executed query is disabled.
+-   1 — The trace for a executed query is enabled.
+
+Default value: `0`.
+
 [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index bacc97dfd14..2aa81daa0b0 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -2473,4 +2473,15 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
 
 Значение по умолчанию: `16`.
 
+## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability}
+
+Включает трассировку для выполненных запросов.
+
+Возможные значения:
+
+-   0 — трассировка для выполненного запроса отключена.
+-   1 — трассировка для выполненного запроса включена.
+
+Значение по умолчанию: `0`.
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->

From ab55556c5e3600528ba8f5e3d54638990b5b3a5b Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Sat, 6 Feb 2021 20:24:52 +0300
Subject: [PATCH 0752/1238] Fix build of utils

---
 utils/check-mysql-binlog/main.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/utils/check-mysql-binlog/main.cpp b/utils/check-mysql-binlog/main.cpp
index ccdc4cd168c..04dfb56ff08 100644
--- a/utils/check-mysql-binlog/main.cpp
+++ b/utils/check-mysql-binlog/main.cpp
@@ -69,21 +69,27 @@ static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody(
         case DB::MySQLReplication::WRITE_ROWS_EVENT_V1:
         case DB::MySQLReplication::WRITE_ROWS_EVENT_V2:
         {
-            event = std::make_shared<DB::MySQLReplication::WriteRowsEvent>(last_table_map_event, std::move(header));
+            DB::MySQLReplication::RowsEventHeader rows_header(header.type);
+            rows_header.parse(*event_payload);
+            event = std::make_shared<DB::MySQLReplication::WriteRowsEvent>(last_table_map_event, std::move(header), rows_header);
             event->parseEvent(*event_payload);
             break;
         }
         case DB::MySQLReplication::DELETE_ROWS_EVENT_V1:
         case DB::MySQLReplication::DELETE_ROWS_EVENT_V2:
         {
-            event = std::make_shared<DB::MySQLReplication::DeleteRowsEvent>(last_table_map_event, std::move(header));
+            DB::MySQLReplication::RowsEventHeader rows_header(header.type);
+            rows_header.parse(*event_payload);
+            event = std::make_shared<DB::MySQLReplication::DeleteRowsEvent>(last_table_map_event, std::move(header), rows_header);
             event->parseEvent(*event_payload);
             break;
         }
         case DB::MySQLReplication::UPDATE_ROWS_EVENT_V1:
         case DB::MySQLReplication::UPDATE_ROWS_EVENT_V2:
         {
-            event = std::make_shared<DB::MySQLReplication::UpdateRowsEvent>(last_table_map_event, std::move(header));
+            DB::MySQLReplication::RowsEventHeader rows_header(header.type);
+            rows_header.parse(*event_payload);
+            event = std::make_shared<DB::MySQLReplication::UpdateRowsEvent>(last_table_map_event, std::move(header), rows_header);
             event->parseEvent(*event_payload);
             break;
         }

From fd899daa0096c10dc8f0c18c0bff1f97615aabbf Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 6 Feb 2021 22:17:19 +0300
Subject: [PATCH 0753/1238] Fix UBSan report in arrayCumSum

---
 src/Functions/array/arrayCumSum.cpp | 62 +++++++++++++++++------------
 1 file changed, 37 insertions(+), 25 deletions(-)

diff --git a/src/Functions/array/arrayCumSum.cpp b/src/Functions/array/arrayCumSum.cpp
index 40c0cd4ade2..97c623d0be9 100644
--- a/src/Functions/array/arrayCumSum.cpp
+++ b/src/Functions/array/arrayCumSum.cpp
@@ -45,6 +45,41 @@ struct ArrayCumSumImpl
     }
 
 
+    template <typename Src, typename Dst>
+    static void NO_SANITIZE_UNDEFINED implConst(
+        size_t size, const IColumn::Offset * __restrict offsets, Dst * __restrict res_values, Src src_value)
+    {
+        size_t pos = 0;
+        for (const auto * end = offsets + size; offsets < end; ++offsets)
+        {
+            auto offset = *offsets;
+            Src accumulated{};
+            for (; pos < offset; ++pos)
+            {
+                accumulated += src_value;
+                res_values[pos] = accumulated;
+            }
+        }
+    }
+
+    template <typename Src, typename Dst>
+    static void NO_SANITIZE_UNDEFINED implVector(
+        size_t size, const IColumn::Offset * __restrict offsets, Dst * __restrict res_values, const Src * __restrict src_values)
+    {
+        size_t pos = 0;
+        for (const auto * end = offsets + size; offsets < end; ++offsets)
+        {
+            auto offset = *offsets;
+            Src accumulated{};
+            for (; pos < offset; ++pos)
+            {
+                accumulated += src_values[pos];
+                res_values[pos] = accumulated;
+            }
+        }
+    }
+
+
     template <typename Element, typename Result>
     static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
     {
@@ -75,19 +110,7 @@ struct ArrayCumSumImpl
 
             typename ColVecResult::Container & res_values = res_nested->getData();
             res_values.resize(column_const->size());
-
-            size_t pos = 0;
-            for (auto offset : offsets)
-            {
-                // skip empty arrays
-                if (pos < offset)
-                {
-                    res_values[pos++] = x; // NOLINT
-                    for (; pos < offset; ++pos)
-                        res_values[pos] = res_values[pos - 1] + x;
-                }
-            }
-
+            implConst(offsets.size(), offsets.data(), res_values.data(), x);
             res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
             return true;
         }
@@ -103,18 +126,7 @@ struct ArrayCumSumImpl
 
         typename ColVecResult::Container & res_values = res_nested->getData();
         res_values.resize(data.size());
-
-        size_t pos = 0;
-        for (auto offset : offsets)
-        {
-            // skip empty arrays
-            if (pos < offset)
-            {
-                res_values[pos] = data[pos]; // NOLINT
-                for (++pos; pos < offset; ++pos)
-                    res_values[pos] = res_values[pos - 1] + data[pos];
-            }
-        }
+        implVector(offsets.size(), offsets.data(), res_values.data(), data.data());
         res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
         return true;
 

From 65902f4c6e21fdb6b48764d4ef4c96e588eb946b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 6 Feb 2021 22:21:22 +0300
Subject: [PATCH 0754/1238] Fix UBSan report in arrayCumSum

---
 src/Functions/array/arrayCumSum.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Functions/array/arrayCumSum.cpp b/src/Functions/array/arrayCumSum.cpp
index 97c623d0be9..96001901a6e 100644
--- a/src/Functions/array/arrayCumSum.cpp
+++ b/src/Functions/array/arrayCumSum.cpp
@@ -53,7 +53,7 @@ struct ArrayCumSumImpl
         for (const auto * end = offsets + size; offsets < end; ++offsets)
         {
             auto offset = *offsets;
-            Src accumulated{};
+            Dst accumulated{};
             for (; pos < offset; ++pos)
             {
                 accumulated += src_value;
@@ -70,7 +70,7 @@ struct ArrayCumSumImpl
         for (const auto * end = offsets + size; offsets < end; ++offsets)
         {
             auto offset = *offsets;
-            Src accumulated{};
+            Dst accumulated{};
             for (; pos < offset; ++pos)
             {
                 accumulated += src_values[pos];

From d9d49a4d30c5fcbe970f69d4c4157cd1b5f85a51 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 6 Feb 2021 22:22:21 +0300
Subject: [PATCH 0755/1238] Fix UBSan report in arrayCumSum

---
 .../array/arrayCumSumNonNegative.cpp          | 41 ++++++++++---------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/src/Functions/array/arrayCumSumNonNegative.cpp b/src/Functions/array/arrayCumSumNonNegative.cpp
index ff0f081d70b..148d4283701 100644
--- a/src/Functions/array/arrayCumSumNonNegative.cpp
+++ b/src/Functions/array/arrayCumSumNonNegative.cpp
@@ -48,6 +48,26 @@ struct ArrayCumSumNonNegativeImpl
     }
 
 
+    template <typename Src, typename Dst>
+    static void NO_SANITIZE_UNDEFINED implVector(
+        size_t size, const IColumn::Offset * __restrict offsets, Dst * __restrict res_values, const Src * __restrict src_values)
+    {
+        size_t pos = 0;
+        for (const auto * end = offsets + size; offsets < end; ++offsets)
+        {
+            auto offset = *offsets;
+            Dst accumulated{};
+            for (; pos < offset; ++pos)
+            {
+                accumulated += src_values[pos];
+                if (accumulated < 0)
+                    accumulated = 0;
+                res_values[pos] = accumulated;
+            }
+        }
+    }
+
+
     template <typename Element, typename Result>
     static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
     {
@@ -70,26 +90,7 @@ struct ArrayCumSumNonNegativeImpl
 
         typename ColVecResult::Container & res_values = res_nested->getData();
         res_values.resize(data.size());
-
-        size_t pos = 0;
-        Result accum_sum = 0;
-        for (auto offset : offsets)
-        {
-            // skip empty arrays
-            if (pos < offset)
-            {
-                accum_sum = data[pos] > 0 ? data[pos] : Element(0); // NOLINT
-                res_values[pos] = accum_sum;
-                for (++pos; pos < offset; ++pos)
-                {
-                    accum_sum = accum_sum + data[pos];
-                    if (accum_sum < 0)
-                        accum_sum = 0;
-
-                    res_values[pos] = accum_sum;
-                }
-            }
-        }
+        implVector(offsets.size(), offsets.data(), res_values.data(), data.data());
         res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
         return true;
 

From 1209c02869b9c742afdee459edbbd6c1c25cf29f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 6 Feb 2021 23:18:42 +0300
Subject: [PATCH 0756/1238] Fix overflow in mapPopulateSeries

---
 src/Functions/array/mapPopulateSeries.cpp                | 9 ++++++++-
 .../0_stateless/01698_map_populate_overflow.reference    | 1 +
 .../queries/0_stateless/01698_map_populate_overflow.sql  | 2 ++
 3 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01698_map_populate_overflow.reference
 create mode 100644 tests/queries/0_stateless/01698_map_populate_overflow.sql

diff --git a/src/Functions/array/mapPopulateSeries.cpp b/src/Functions/array/mapPopulateSeries.cpp
index 46c99dba483..2050e0c28ab 100644
--- a/src/Functions/array/mapPopulateSeries.cpp
+++ b/src/Functions/array/mapPopulateSeries.cpp
@@ -16,6 +16,7 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int TOO_LARGE_ARRAY_SIZE;
 }
 
 class FunctionMapPopulateSeries : public IFunction
@@ -188,9 +189,13 @@ private:
                 }
             }
 
+            static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30;
+            if (static_cast<size_t>(max_key - min_key) > MAX_ARRAY_SIZE)
+                throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size in the result of function {}", getName());
+
             /* fill the result arrays */
             KeyType key;
-            for (key = min_key; key <= max_key; ++key)
+            for (key = min_key;; ++key)
             {
                 to_keys_data.insert(key);
 
@@ -205,6 +210,8 @@ private:
                 }
 
                 ++offset;
+                if (key == max_key)
+                    break;
             }
 
             to_keys_offsets.push_back(offset);
diff --git a/tests/queries/0_stateless/01698_map_populate_overflow.reference b/tests/queries/0_stateless/01698_map_populate_overflow.reference
new file mode 100644
index 00000000000..24e0038125a
--- /dev/null
+++ b/tests/queries/0_stateless/01698_map_populate_overflow.reference
@@ -0,0 +1 @@
+([18446744073709551615],[0])
diff --git a/tests/queries/0_stateless/01698_map_populate_overflow.sql b/tests/queries/0_stateless/01698_map_populate_overflow.sql
new file mode 100644
index 00000000000..90c47ff3949
--- /dev/null
+++ b/tests/queries/0_stateless/01698_map_populate_overflow.sql
@@ -0,0 +1,2 @@
+SELECT mapPopulateSeries([0xFFFFFFFFFFFFFFFF], [0], 0xFFFFFFFFFFFFFFFF);
+SELECT mapPopulateSeries([toUInt64(1)], [1], 0xFFFFFFFFFFFFFFFF); -- { serverError 128 }

From 4514c06c2fe194d4b49344bdf59fa8cc7b5071f0 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Sat, 6 Feb 2021 12:59:31 -0800
Subject: [PATCH 0757/1238] Docs - date_sub

---
 .../functions/date-time-functions.md          | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 2cec116f986..664450b385a 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -518,6 +518,46 @@ Result:
 └────────────────────────────────────────────────────────────────────────────────────────┘
 ```
 
+## date\_sub {#date_sub}
+
+This subtracts a time/date interval from a date and then returns the date.
+
+**Syntax**
+
+``` sql
+date_sub(unit, value, date)
+```
+
+Aliases: `dateSub`, `DATE_SUB`. 
+
+**Parameters**
+
+-   `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md).
+
+        Supported values: second, minute, hour, day, week, month, quarter, year.
+-   `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md)    
+-   `date` — [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+
+**Returned value**
+
+Returns Date or DateTime with `value` expressed in `unit` subtracted from `date`. 
+
+**Example**
+
+Query:
+
+``` sql
+SELECT date_sub(YEAR, 3, toDate('2018-01-01'));
+```
+
+Result:
+
+``` text
+┌─minus(toDate('2018-01-01'), toIntervalYear(3))─┐
+│                                     2015-01-01 │
+└────────────────────────────────────────────────┘
+```
+
 ## timestamp\_add {#timestamp_add}
 
 Adds the specified time value with the provided date or date time value.

From 5ddcbe8b90f8e5964afed5a265a1284d44310797 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Sat, 6 Feb 2021 13:05:06 -0800
Subject: [PATCH 0758/1238] Docs - minor unrelated fix to date_trunc

---
 docs/en/sql-reference/functions/date-time-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 664450b385a..86a1110caf9 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -380,7 +380,7 @@ Alias: `dateTrunc`.
 
 **Parameters**
 
--   `unit` — The type of interval to truncate the result. [String](../../sql-reference/data-types/string.md).
+-   `unit` — The type of interval to truncate the result. [String Literal](../syntax.md#syntax-string-literal).
     Possible values:
 
     - `second`

From eadd23aa38c1dd9e505a4f7abbbfdf93439babc8 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sun, 7 Feb 2021 00:06:27 +0300
Subject: [PATCH 0759/1238] Added LRUHashMap

---
 src/Common/HashTable/HashMap.h          |   5 +
 src/Common/HashTable/HashTable.h        | 103 ++++++++--
 src/Common/HashTable/LRUHashMap.h       | 244 ++++++++++++++++++++++++
 src/Common/tests/CMakeLists.txt         |   3 +
 src/Common/tests/gtest_lru_hash_map.cpp | 161 ++++++++++++++++
 src/Common/tests/lru_hash_map_perf.cpp  | 244 ++++++++++++++++++++++++
 6 files changed, 747 insertions(+), 13 deletions(-)
 create mode 100644 src/Common/HashTable/LRUHashMap.h
 create mode 100644 src/Common/tests/gtest_lru_hash_map.cpp
 create mode 100644 src/Common/tests/lru_hash_map_perf.cpp

diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h
index e09f60c4294..99dc5414107 100644
--- a/src/Common/HashTable/HashMap.h
+++ b/src/Common/HashTable/HashMap.h
@@ -109,6 +109,11 @@ struct HashMapCell
         DB::assertChar(',', rb);
         DB::readDoubleQuoted(value.second, rb);
     }
+
+    static bool constexpr need_to_notify_cell_during_move = false;
+
+    static void move(HashMapCell * /* old_location */, HashMapCell * /* new_location */) {}
+
 };
 
 template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h
index 15fa09490e6..5d4410ce4e7 100644
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@@ -204,6 +204,13 @@ struct HashTableCell
     /// Deserialization, in binary and text form.
     void read(DB::ReadBuffer & rb)        { DB::readBinary(key, rb); }
     void readText(DB::ReadBuffer & rb)    { DB::readDoubleQuoted(key, rb); }
+
+    /// When cell pointer is moved during erase, reinsert or resize operations
+
+    static constexpr bool need_to_notify_cell_during_move = false;
+
+    static void move(HashTableCell * /* old_location */, HashTableCell * /* new_location */) {}
+
 };
 
 /**
@@ -427,6 +434,34 @@ protected:
         }
     }
 
+    template<bool enable>
+    struct AllocatorBufferDeleter;
+
+    template<>
+    struct AllocatorBufferDeleter<false>
+    {
+        AllocatorBufferDeleter(Allocator &, size_t) {}
+
+        void operator()(Cell *) const {}
+
+    };
+
+    template<>
+    struct AllocatorBufferDeleter<true>
+    {
+        AllocatorBufferDeleter(Allocator & allocator_, size_t size_)
+            : allocator(allocator_)
+            , size(size_)
+        {}
+
+        void operator()(Cell * buffer) const
+        {
+            allocator.free(buffer, size);
+        }
+
+        Allocator & allocator;
+        size_t size;
+    };
 
     /// Increase the size of the buffer.
     void resize(size_t for_num_elems = 0, size_t for_buf_size = 0)
@@ -460,7 +495,23 @@ protected:
             new_grower.increaseSize();
 
         /// Expand the space.
-        buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, getBufferSizeInBytes(), new_grower.bufSize() * sizeof(Cell)));
+
+        size_t old_buffer_size = getBufferSizeInBytes();
+
+        /** If cell required to be notified during move we need to temporary keep old buffer
+         * because realloc does not quarantee for reallocated buffer to have same base address
+         */
+        AllocatorBufferDeleter<Cell::need_to_notify_cell_during_move> buffer_deleter(*this, old_buffer_size);
+        std::unique_ptr<Cell, decltype(buffer_deleter)> old_buffer(buf, buffer_deleter);
+
+        if constexpr (Cell::need_to_notify_cell_during_move)
+        {
+            buf = reinterpret_cast<Cell *>(Allocator::alloc(new_grower.bufSize() * sizeof(Cell)));
+            memcpy(buf, old_buffer.get(), old_buffer_size);
+        }
+        else
+            buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, old_buffer_size, new_grower.bufSize() * sizeof(Cell)));
+
         grower = new_grower;
 
         /** Now some items may need to be moved to a new location.
@@ -470,7 +521,12 @@ protected:
         size_t i = 0;
         for (; i < old_size; ++i)
             if (!buf[i].isZero(*this))
-                reinsert(buf[i], buf[i].getHash(*this));
+            {
+                size_t updated_place_value = reinsert(buf[i], buf[i].getHash(*this));
+
+                if constexpr (Cell::need_to_notify_cell_during_move)
+                    Cell::move(&(old_buffer.get())[i], &buf[updated_place_value]);
+            }
 
         /** There is also a special case:
           *    if the element was to be at the end of the old buffer,                  [        x]
@@ -481,7 +537,13 @@ protected:
           *    process tail from the collision resolution chain immediately after it   [        o    x    ]
           */
         for (; !buf[i].isZero(*this); ++i)
-            reinsert(buf[i], buf[i].getHash(*this));
+        {
+            size_t updated_place_value = reinsert(buf[i], buf[i].getHash(*this));
+
+            if constexpr (Cell::need_to_notify_cell_during_move)
+                if (&buf[i] != &buf[updated_place_value])
+                    Cell::move(&buf[i], &buf[updated_place_value]);
+        }
 
 #ifdef DBMS_HASH_MAP_DEBUG_RESIZES
         watch.stop();
@@ -495,20 +557,20 @@ protected:
     /** Paste into the new buffer the value that was in the old buffer.
       * Used when increasing the buffer size.
       */
-    void reinsert(Cell & x, size_t hash_value)
+    size_t reinsert(Cell & x, size_t hash_value)
     {
         size_t place_value = grower.place(hash_value);
 
         /// If the element is in its place.
         if (&x == &buf[place_value])
-            return;
+            return place_value;
 
         /// Compute a new location, taking into account the collision resolution chain.
         place_value = findCell(Cell::getKey(x.getValue()), hash_value, place_value);
 
         /// If the item remains in its place in the old collision resolution chain.
         if (!buf[place_value].isZero(*this))
-            return;
+            return place_value;
 
         /// Copy to a new location and zero the old one.
         x.setHash(hash_value);
@@ -516,6 +578,7 @@ protected:
         x.setZero();
 
         /// Then the elements that previously were in collision with this can move to the old place.
+        return place_value;
     }
 
 
@@ -881,7 +944,11 @@ public:
     /// Reinsert node pointed to by iterator
     void ALWAYS_INLINE reinsert(iterator & it, size_t hash_value)
     {
-        reinsert(*it.getPtr(), hash_value);
+        size_t place_value = reinsert(*it.getPtr(), hash_value);
+
+        if constexpr (Cell::need_to_notify_cell_during_move)
+            if (it.getPtr() != &buf[place_value])
+                Cell::move(it.getPtr(), &buf[place_value]);
     }
 
 
@@ -958,8 +1025,14 @@ public:
         return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
     }
 
-    std::enable_if_t<Grower::performs_linear_probing_with_single_step, void>
+    std::enable_if_t<Grower::performs_linear_probing_with_single_step, bool>
     ALWAYS_INLINE erase(const Key & x)
+    {
+        return erase(x, hash(x));
+    }
+
+    std::enable_if_t<Grower::performs_linear_probing_with_single_step, bool>
+    ALWAYS_INLINE erase(const Key & x, size_t hash_value)
     {
         /** Deletion from open addressing hash table without tombstones
           *
@@ -977,21 +1050,19 @@ public:
             {
                 --m_size;
                 this->clearHasZero();
+                return true;
             }
             else
             {
-                return;
+                return false;
             }
         }
 
-        size_t hash_value = hash(x);
         size_t erased_key_position = findCell(x, hash_value, grower.place(hash_value));
 
         /// Key is not found
         if (buf[erased_key_position].isZero(*this))
-        {
-            return;
-        }
+            return false;
 
         /// We need to guarantee loop termination because there will be empty position
         assert(m_size < grower.bufSize());
@@ -1056,12 +1127,18 @@ public:
 
             /// Move the element to the freed place
             memcpy(static_cast<void *>(&buf[erased_key_position]), static_cast<void *>(&buf[next_position]), sizeof(Cell));
+
+            if constexpr (Cell::need_to_notify_cell_during_move)
+                Cell::move(&buf[next_position], &buf[erased_key_position]);
+
             /// Now we have another freed place
             erased_key_position = next_position;
         }
 
         buf[erased_key_position].setZero();
         --m_size;
+
+        return true;
     }
 
     bool ALWAYS_INLINE has(const Key & x) const
diff --git a/src/Common/HashTable/LRUHashMap.h b/src/Common/HashTable/LRUHashMap.h
new file mode 100644
index 00000000000..292006f2438
--- /dev/null
+++ b/src/Common/HashTable/LRUHashMap.h
@@ -0,0 +1,244 @@
+#pragma once
+
+#include <common/types.h>
+
+#include <boost/intrusive/trivial_value_traits.hpp>
+#include <boost/intrusive/list.hpp>
+#include <boost/noncopyable.hpp>
+
+#include <Core/Defines.h>
+#include <Common/Exception.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/PODArray.h>
+
+
+template <typename TKey, typename TMapped, typename Hash, bool save_hash_in_cell>
+struct LRUHashMapCell :
+    public std::conditional_t<save_hash_in_cell,
+        HashMapCellWithSavedHash<TKey, TMapped, Hash, HashTableNoState>,
+        HashMapCell<TKey, TMapped, Hash, HashTableNoState>>
+{
+public:
+    using Key = TKey;
+
+    using Base = std::conditional_t<save_hash_in_cell,
+        HashMapCellWithSavedHash<TKey, TMapped, Hash, HashTableNoState>,
+        HashMapCell<TKey, TMapped, Hash, HashTableNoState>>;
+
+    using Mapped = typename Base::Mapped;
+    using State = typename Base::State;
+
+    using mapped_type = Mapped;
+    using key_type = Key;
+
+    using Base::Base;
+
+    static bool constexpr need_to_notify_cell_during_move = true;
+
+    static void move(LRUHashMapCell * __restrict old_location, LRUHashMapCell * __restrict new_location)
+    {
+        /** We update new location prev and next pointers because during hash table resize
+         *  they can be updated during move of another cell.
+         */
+
+        new_location->prev = old_location->prev;
+        new_location->next = old_location->next;
+
+        LRUHashMapCell * prev = new_location->prev;
+        LRUHashMapCell * next = new_location->next;
+
+        /// Updated previous next and next previous nodes of list to point to new location
+
+        if (prev)
+            prev->next = new_location;
+
+        if (next)
+            next->prev = new_location;
+    }
+
+private:
+    template<typename, typename, typename, bool>
+    friend class LRUHashMapCellNodeTraits;
+
+    LRUHashMapCell * next = nullptr;
+    LRUHashMapCell * prev = nullptr;
+};
+
+template<typename Key, typename Value, typename Hash, bool save_hash_in_cell>
+struct LRUHashMapCellNodeTraits
+{
+    using node = LRUHashMapCell<Key, Value, Hash, save_hash_in_cell>;
+    using node_ptr = LRUHashMapCell<Key, Value, Hash, save_hash_in_cell> *;
+    using const_node_ptr = const LRUHashMapCell<Key, Value, Hash, save_hash_in_cell> *;
+
+    static node * get_next(const node * ptr) { return ptr->next; }
+    static void set_next(node * __restrict ptr, node * __restrict next) { ptr->next = next; }
+    static node * get_previous(const node * ptr) { return ptr->prev; }
+    static void set_previous(node * __restrict ptr, node * __restrict prev) { ptr->prev = prev; }
+};
+
+template <typename TKey, typename TValue, typename Hash, bool save_hash_in_cells>
+class LRUHashMapImpl :
+    private HashMapTable<
+        TKey,
+        LRUHashMapCell<TKey, TValue, Hash, save_hash_in_cells>,
+        Hash,
+        HashTableGrower<>,
+        HashTableAllocator>
+{
+    using Base = HashMapTable<
+        TKey,
+        LRUHashMapCell<TKey, TValue, Hash, save_hash_in_cells>,
+        Hash,
+        HashTableGrower<>,
+        HashTableAllocator>;
+public:
+    using Key = TKey;
+    using Value = TValue;
+
+    using Cell = LRUHashMapCell<Key, Value, Hash, save_hash_in_cells>;
+
+    using LRUHashMapCellIntrusiveValueTraits =
+        boost::intrusive::trivial_value_traits<
+            LRUHashMapCellNodeTraits<Key, Value, Hash, save_hash_in_cells>,
+            boost::intrusive::link_mode_type::normal_link>;
+
+    using LRUList = boost::intrusive::list<
+        Cell,
+        boost::intrusive::value_traits<LRUHashMapCellIntrusiveValueTraits>,
+        boost::intrusive::constant_time_size<false>>;
+
+    using iterator = typename LRUList::iterator;
+    using const_iterator = typename LRUList::const_iterator;
+    using reverse_iterator = typename LRUList::reverse_iterator;
+    using const_reverse_iterator = typename LRUList::const_reverse_iterator;
+
+    LRUHashMapImpl(size_t max_size_, bool preallocate_max_size_in_hash_map = false)
+        : Base(preallocate_max_size_in_hash_map ? max_size_ : 32)
+        , max_size(max_size_)
+    {
+        assert(max_size > 0);
+    }
+
+    std::pair<Cell *, bool> insert(const Key & key, const Value & value)
+    {
+        return emplace(key, value);
+    }
+
+    std::pair<Cell *, bool> insert(const Key & key, Value && value)
+    {
+        return emplace(key, std::move(value));
+    }
+
+    template<typename ...Args>
+    std::pair<Cell *, bool> emplace(const Key & key, Args&&... args)
+    {
+        size_t hash_value = Base::hash(key);
+
+        Cell * it = Base::find(key, hash_value);
+
+        if (it)
+        {
+            /// Cell contains element return it and put to the end of lru list
+            lru_list.splice(lru_list.end(), lru_list, lru_list.iterator_to(*it));
+            return std::make_pair(it, false);
+        }
+
+        if (size() == max_size)
+        {
+            /// Erase least recently used element from front of the list
+            Cell & node = lru_list.front();
+
+            const Key & element_to_remove_key = node.getKey();
+            size_t key_hash = node.getHash(*this);
+
+            lru_list.pop_front();
+
+            [[maybe_unused]] bool erased = Base::erase(element_to_remove_key, key_hash);
+            assert(erased);
+        }
+
+        [[maybe_unused]] bool inserted;
+
+        /// Insert value first try to insert in zero storage if not then insert in buffer
+        if (!Base::emplaceIfZero(key, it, inserted, hash_value))
+            Base::emplaceNonZero(key, it, inserted, hash_value);
+
+        assert(inserted);
+
+        new (&it->getMapped()) Value(std::forward<Args>(args)...);
+
+        /// Put cell to the end of lru list
+        lru_list.insert(lru_list.end(), *it);
+
+        return std::make_pair(it, true);
+    }
+
+    using Base::find;
+
+    Value & get(const Key & key)
+    {
+        auto it = Base::find(key);
+        assert(it);
+
+        Value & value = it->getMapped();
+
+        /// Put cell to the end of lru list
+        lru_list.splice(lru_list.end(), lru_list, lru_list.iterator_to(*it));
+
+        return value;
+    }
+
+    const Value & get(const Key & key) const
+    {
+        return const_cast<std::decay_t<decltype(*this)> *>(this)->get(key);
+    }
+
+    bool contains(const Key & key) const
+    {
+        return Base::has(key);
+    }
+
+    bool erase(const Key & key)
+    {
+        auto hash = Base::hash(key);
+        auto it = Base::find(key, hash);
+
+        if (!it)
+            return false;
+
+        lru_list.erase(lru_list.iterator_to(*it));
+
+        return Base::erase(key, hash);
+    }
+
+    void clear()
+    {
+        lru_list.clear();
+        Base::clear();
+    }
+
+    using Base::size;
+
+    size_t getMaxSize() const { return max_size; }
+
+    iterator begin() { return lru_list.begin(); }
+    const_iterator begin() const { return lru_list.cbegin(); }
+    iterator end() { return lru_list.end(); }
+    const_iterator end() const { return lru_list.cend(); }
+
+    reverse_iterator rbegin() { return lru_list.rbegin(); }
+    const_reverse_iterator rbegin() const { return lru_list.crbegin(); }
+    reverse_iterator rend() { return lru_list.rend(); }
+    const_reverse_iterator rend() const { return lru_list.crend(); }
+
+private:
+    size_t max_size;
+    LRUList lru_list;
+};
+
+template <typename Key, typename Value, typename Hash = DefaultHash<Key>>
+using LRUHashMap = LRUHashMapImpl<Key, Value, Hash, false>;
+
+template <typename Key, typename Value, typename Hash = DefaultHash<Key>>
+using LRUHashMapWithSavedHash = LRUHashMapImpl<Key, Value, Hash, true>;
diff --git a/src/Common/tests/CMakeLists.txt b/src/Common/tests/CMakeLists.txt
index cb36e2b97d2..2dd56e862f0 100644
--- a/src/Common/tests/CMakeLists.txt
+++ b/src/Common/tests/CMakeLists.txt
@@ -38,6 +38,9 @@ target_link_libraries (arena_with_free_lists PRIVATE dbms)
 add_executable (pod_array pod_array.cpp)
 target_link_libraries (pod_array PRIVATE clickhouse_common_io)
 
+add_executable (lru_hash_map_perf lru_hash_map_perf.cpp)
+target_link_libraries (lru_hash_map_perf PRIVATE clickhouse_common_io)
+
 add_executable (thread_creation_latency thread_creation_latency.cpp)
 target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)
 
diff --git a/src/Common/tests/gtest_lru_hash_map.cpp b/src/Common/tests/gtest_lru_hash_map.cpp
new file mode 100644
index 00000000000..237c751303e
--- /dev/null
+++ b/src/Common/tests/gtest_lru_hash_map.cpp
@@ -0,0 +1,161 @@
+#include <iomanip>
+#include <iostream>
+
+#include <Common/HashTable/LRUHashMap.h>
+
+#include <gtest/gtest.h>
+
+template<typename LRUHashMap>
+std::vector<typename LRUHashMap::Key> convertToVector(const LRUHashMap & map)
+{
+    std::vector<typename LRUHashMap::Key> result;
+    result.reserve(map.size());
+
+    for (auto & node: map)
+        result.emplace_back(node.getKey());
+
+    return result;
+}
+
+void testInsert(size_t elements_to_insert_size, size_t map_size)
+{
+    using LRUHashMap = LRUHashMap<int, std::string>;
+
+    LRUHashMap map(map_size);
+
+    std::vector<int> expected;
+
+    for (size_t i = 0; i < elements_to_insert_size; ++i)
+        map.insert(i, std::to_string(i));
+
+    for (size_t i = elements_to_insert_size - map_size; i < elements_to_insert_size; ++i)
+        expected.emplace_back(i);
+
+    std::vector<int> actual = convertToVector(map);
+    ASSERT_EQ(map.size(), actual.size());
+    ASSERT_EQ(actual, expected);
+}
+
+TEST(LRUHashMap, Insert)
+{
+    {
+        using LRUHashMap = LRUHashMap<int, std::string>;
+
+        LRUHashMap map(3);
+
+        map.emplace(1, "1");
+        map.insert(2, "2");
+        std::string v = "3";
+        map.insert(3, v);
+        map.emplace(4, "4");
+
+        std::vector<int> expected = { 2, 3, 4 };
+        std::vector<int> actual = convertToVector(map);
+
+        ASSERT_EQ(actual, expected);
+    }
+
+    testInsert(1200000, 1200000);
+    testInsert(10, 5);
+    testInsert(1200000, 2);
+    testInsert(1200000, 1);
+}
+
+TEST(LRUHashMap, GetModify)
+{
+    using LRUHashMap = LRUHashMap<int, std::string>;
+
+    LRUHashMap map(3);
+
+    map.emplace(1, "1");
+    map.emplace(2, "2");
+    map.emplace(3, "3");
+
+    map.get(3) = "4";
+
+    std::vector<std::string> expected = { "1", "2", "4" };
+    std::vector<std::string> actual;
+    actual.reserve(map.size());
+
+    for (auto & node : map)
+        actual.emplace_back(node.getMapped());
+
+    ASSERT_EQ(actual, expected);
+}
+
+TEST(LRUHashMap, SetRecentKeyToTop)
+{
+    using LRUHashMap = LRUHashMap<int, std::string>;
+
+    LRUHashMap map(3);
+
+    map.emplace(1, "1");
+    map.emplace(2, "2");
+    map.emplace(3, "3");
+    map.emplace(1, "4");
+
+    std::vector<int> expected = { 2, 3, 1 };
+    std::vector<int> actual = convertToVector(map);
+
+    ASSERT_EQ(actual, expected);
+}
+
+TEST(LRUHashMap, GetRecentKeyToTop)
+{
+    using LRUHashMap = LRUHashMap<int, std::string>;
+
+    LRUHashMap map(3);
+
+    map.emplace(1, "1");
+    map.emplace(2, "2");
+    map.emplace(3, "3");
+    map.get(1);
+
+    std::vector<int> expected = { 2, 3, 1 };
+    std::vector<int> actual = convertToVector(map);
+
+    ASSERT_EQ(actual, expected);
+}
+
+TEST(LRUHashMap, Contains)
+{
+    using LRUHashMap = LRUHashMap<int, std::string>;
+
+    LRUHashMap map(3);
+
+    map.emplace(1, "1");
+    map.emplace(2, "2");
+    map.emplace(3, "3");
+
+    ASSERT_TRUE(map.contains(1));
+    ASSERT_TRUE(map.contains(2));
+    ASSERT_TRUE(map.contains(3));
+    ASSERT_EQ(map.size(), 3);
+
+    map.erase(1);
+    map.erase(2);
+    map.erase(3);
+
+    ASSERT_EQ(map.size(), 0);
+    ASSERT_FALSE(map.contains(1));
+    ASSERT_FALSE(map.contains(2));
+    ASSERT_FALSE(map.contains(3));
+}
+
+TEST(LRUHashMap, Clear)
+{
+    using LRUHashMap = LRUHashMap<int, std::string>;
+
+    LRUHashMap map(3);
+
+    map.emplace(1, "1");
+    map.emplace(2, "2");
+    map.emplace(3, "3");
+    map.clear();
+
+    std::vector<int> expected = {};
+    std::vector<int> actual = convertToVector(map);
+
+    ASSERT_EQ(actual, expected);
+    ASSERT_EQ(map.size(), 0);
+}
diff --git a/src/Common/tests/lru_hash_map_perf.cpp b/src/Common/tests/lru_hash_map_perf.cpp
new file mode 100644
index 00000000000..f7ef1fdd759
--- /dev/null
+++ b/src/Common/tests/lru_hash_map_perf.cpp
@@ -0,0 +1,244 @@
+#include <vector>
+#include <list>
+#include <map>
+#include <random>
+#include <pcg_random.hpp>
+
+#include <Common/Stopwatch.h>
+#include <Common/HashTable/LRUHashMap.h>
+
+template<class Key, class Value>
+class LRUHashMapBasic
+{
+public:
+    using key_type = Key;
+    using value_type = Value;
+    using list_type = std::list<key_type>;
+    using node = std::pair<value_type, typename list_type::iterator>;
+    using map_type = std::unordered_map<key_type, node, DefaultHash<Key>>;
+
+    LRUHashMapBasic(size_t max_size_, bool preallocated)
+        : hash_map(preallocated ? max_size_ : 32)
+        , max_size(max_size_)
+    {
+    }
+
+    void insert(const Key &key, const Value &value)
+    {
+        auto it = hash_map.find(key);
+
+        if (it == hash_map.end())
+        {
+            if (size() >= max_size)
+            {
+                auto iterator_to_remove = list.begin();
+
+                hash_map.erase(*iterator_to_remove);
+                list.erase(iterator_to_remove);
+            }
+
+            list.push_back(key);
+            hash_map[key] = std::make_pair(value, --list.end());
+        }
+        else
+        {
+            auto & [value_to_update, iterator_in_list_to_update] = it->second;
+
+            list.splice(list.end(), list, iterator_in_list_to_update);
+
+            iterator_in_list_to_update = list.end();
+            value_to_update = value;
+        }
+    }
+
+    value_type & get(const key_type &key)
+    {
+        auto iterator_in_map = hash_map.find(key);
+        assert(iterator_in_map != hash_map.end());
+
+        auto & [value_to_return, iterator_in_list_to_update] = iterator_in_map->second;
+
+        list.splice(list.end(), list, iterator_in_list_to_update);
+        iterator_in_list_to_update = list.end();
+
+        return value_to_return;
+    }
+
+    const value_type & get(const key_type & key) const
+    {
+        return const_cast<std::decay_t<decltype(*this)> *>(this)->get(key);
+    }
+
+    size_t getMaxSize() const
+    {
+        return max_size;
+    }
+
+    size_t size() const
+    {
+        return hash_map.size();
+    }
+
+    bool empty() const
+    {
+        return hash_map.empty();
+    }
+
+    bool contains(const Key & key)
+    {
+        return hash_map.find(key) != hash_map.end();
+    }
+
+    void clear()
+    {
+        hash_map.clear();
+        list.clear();
+    }
+
+private:
+    map_type hash_map;
+    list_type list;
+    size_t max_size;
+};
+
+std::vector<UInt64> generateNumbersToInsert(size_t numbers_to_insert_size)
+{
+    std::vector<UInt64> numbers;
+    numbers.reserve(numbers_to_insert_size);
+
+    std::random_device rd;
+    pcg64 gen(rd());
+
+    UInt64 min = std::numeric_limits<UInt64>::min();
+    UInt64 max = std::numeric_limits<UInt64>::max();
+
+    auto distribution = std::uniform_int_distribution<>(min, max);
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+    {
+        UInt64 number = distribution(gen);
+        numbers.emplace_back(number);
+    }
+
+    return numbers;
+}
+
+void testInsertElementsIntoHashMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
+{
+    size_t numbers_to_insert_size = numbers_to_insert.size();
+    std::cout << "TestInsertElementsIntoHashMap preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
+    std::cout << std::endl;
+
+    HashMap<int, int> hash_map(preallocated ? map_size : 32);
+
+    Stopwatch watch;
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+        hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
+
+    std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
+
+    UInt64 summ = 0;
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+    {
+        auto it = hash_map.find(numbers_to_insert[i]);
+
+        if (it)
+            summ += it->getMapped();
+    }
+
+    std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
+}
+
+void testInsertElementsIntoStandardMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
+{
+    size_t numbers_to_insert_size = numbers_to_insert.size();
+    std::cout << "TestInsertElementsIntoStandardMap map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
+    std::cout << std::endl;
+
+    std::unordered_map<int, int> hash_map(preallocated ? map_size : 32);
+
+    Stopwatch watch;
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+        hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
+
+    std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
+
+    UInt64 summ = 0;
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+    {
+        auto it = hash_map.find(numbers_to_insert[i]);
+
+        if (it != hash_map.end())
+            summ += it->second;
+    }
+
+    std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
+}
+
+template<typename LRUCache>
+UInt64 testInsertIntoEmptyCache(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
+{
+    size_t numbers_to_insert_size = numbers_to_insert.size();
+    std::cout << "Test testInsertPreallocated preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
+    std::cout << std::endl;
+
+    LRUCache cache(map_size, preallocated);
+    Stopwatch watch;
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+    {
+        cache.insert(numbers_to_insert[i], numbers_to_insert[i]);
+    }
+
+    std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
+
+    UInt64 summ = 0;
+
+    for (size_t i = 0; i < numbers_to_insert_size; ++i)
+        if (cache.contains(numbers_to_insert[i]))
+            summ += cache.get(numbers_to_insert[i]);
+
+    std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
+
+    return summ;
+}
+
+int main(int argc, char ** argv)
+{
+    (void)(argc);
+    (void)(argv);
+
+    size_t hash_map_size = 1200000;
+    size_t numbers_to_insert_size = 12000000;
+    std::vector<UInt64> numbers = generateNumbersToInsert(numbers_to_insert_size);
+
+    std::cout << "Test insert into HashMap preallocated=0" << std::endl;
+    testInsertElementsIntoHashMap(hash_map_size, numbers, true);
+    std::cout << std::endl;
+
+    std::cout << "Test insert into HashMap preallocated=1" << std::endl;
+    testInsertElementsIntoHashMap(hash_map_size, numbers, true);
+    std::cout << std::endl;
+
+    std::cout << "Test LRUHashMap preallocated=0" << std::endl;
+    testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, false);
+    std::cout << std::endl;
+
+    std::cout << "Test LRUHashMap preallocated=1" << std::endl;
+    testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, true);
+    std::cout << std::endl;
+
+    std::cout << "Test LRUHashMapBasic preallocated=0" << std::endl;
+    testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, false);
+    std::cout << std::endl;
+
+    std::cout << "Test LRUHashMapBasic preallocated=1" << std::endl;
+    testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, true);
+    std::cout << std::endl;
+
+    return 0;
+}

From 44b44c1fe7ca591087296cbdd783b9154ec7a6b0 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 01:09:55 +0300
Subject: [PATCH 0760/1238] Update InterpreterSelectQuery.cpp

---
 src/Interpreters/InterpreterSelectQuery.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 6122719d94e..4b89273cd86 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -69,7 +69,6 @@
 #include <Processors/Transforms/FilterTransform.h>
 #include <Processors/Transforms/JoiningTransform.h>
 
-#include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
 #include <Storages/IStorage.h>
 #include <Storages/StorageView.h>

From 417cfcd6989bbe760ae2e4061e6fd07697e3bb6e Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 01:31:20 +0300
Subject: [PATCH 0761/1238] Update ReadBuffer.h

---
 src/IO/ReadBuffer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h
index ae8898efcef..e871205aef3 100644
--- a/src/IO/ReadBuffer.h
+++ b/src/IO/ReadBuffer.h
@@ -198,7 +198,7 @@ private:
       */
     virtual bool nextImpl() { return false; }
 
-    [[noreturn]] static inline void throwReadAfterEOF()
+    [[noreturn]] static void throwReadAfterEOF()
     {
         throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
     }

From 46ff7d2ab04d0e1dc914f4fc038dd15572eb96b0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 5 Feb 2021 22:06:23 +0300
Subject: [PATCH 0762/1238] Fix DateTime64 overflows

---
 src/Functions/FunctionsConversion.h                        | 6 ++++--
 tests/queries/0_stateless/01691_DateTime64_clamp.reference | 2 ++
 tests/queries/0_stateless/01691_DateTime64_clamp.sql       | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index df0cba4c844..b95d4ea9790 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -491,8 +491,9 @@ struct ToDateTime64TransformUnsigned
         : scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
     {}
 
-    inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(const FromType & from, const DateLUTImpl &) const
+    inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
     {
+        from = std::min(time_t(from), time_t(0xFFFFFFFF));
         return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(from, 0, scale_multiplier);
     }
 };
@@ -507,10 +508,11 @@ struct ToDateTime64TransformSigned
         : scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
     {}
 
-    inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(const FromType & from, const DateLUTImpl &) const
+    inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
     {
         if (from < 0)
             return 0;
+        from = std::min(time_t(from), time_t(0xFFFFFFFF));
         return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(from, 0, scale_multiplier);
     }
 };
diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.reference b/tests/queries/0_stateless/01691_DateTime64_clamp.reference
index 1d222e2cb21..de72027334c 100644
--- a/tests/queries/0_stateless/01691_DateTime64_clamp.reference
+++ b/tests/queries/0_stateless/01691_DateTime64_clamp.reference
@@ -7,3 +7,5 @@ SELECT CAST(-1 AS DateTime64);
 1970-01-01 03:00:00.000
 SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64);
 2020-01-01 00:00:00.300
+SELECT toDateTime64(bitShiftLeft(toUInt64(1),33), 2);
+2106-02-07 09:28:15.00
diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.sql b/tests/queries/0_stateless/01691_DateTime64_clamp.sql
index 7ccce597adf..6b5a4815f37 100644
--- a/tests/queries/0_stateless/01691_DateTime64_clamp.sql
+++ b/tests/queries/0_stateless/01691_DateTime64_clamp.sql
@@ -3,3 +3,4 @@ SELECT toDateTime(-2, 2);
 SELECT toDateTime64(-2, 2);
 SELECT CAST(-1 AS DateTime64);
 SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64);
+SELECT toDateTime64(bitShiftLeft(toUInt64(1),33), 2);

From 905793a7e476caf5b95ed2fa3581d5d1b5524085 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 04:49:52 +0300
Subject: [PATCH 0763/1238] Disable excessive squashing of blocks for
 StorageMemory #13052

---
 src/Formats/FormatFactory.cpp               |  1 -
 src/Interpreters/InterpreterInsertQuery.cpp | 10 ++++++----
 src/Storages/IStorage.h                     |  4 ++++
 src/Storages/StorageMemory.h                |  4 +++-
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 86cf12fbf68..f7f32cf9b6f 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -5,7 +5,6 @@
 #include <Interpreters/Context.h>
 #include <Core/Settings.h>
 #include <DataStreams/MaterializingBlockOutputStream.h>
-#include <DataStreams/SquashingBlockOutputStream.h>
 #include <DataStreams/NativeBlockInputStream.h>
 #include <Formats/FormatSettings.h>
 #include <Processors/Formats/IRowInputFormat.h>
diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index 55c4d19206f..fd5cb20531c 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -166,7 +166,7 @@ BlockIO InterpreterInsertQuery::execute()
     BlockIO res;
 
     StoragePtr table = getTable(query);
-    auto table_lock = table->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
+    auto table_lock = table->lockForShare(context.getInitialQueryId(), settings.lock_acquire_timeout);
     auto metadata_snapshot = table->getInMemoryMetadataPtr();
 
     auto query_sample_block = getSampleBlock(query, table, metadata_snapshot);
@@ -348,13 +348,15 @@ BlockIO InterpreterInsertQuery::execute()
 
             /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
             /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
-            if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote()) && !no_squash && !query.watch)
+            if (!(settings.insert_distributed_sync && table->isRemote()) && !no_squash && !query.watch)
             {
+                bool table_prefers_large_blocks = table->prefersLargeBlocks();
+
                 out = std::make_shared<SquashingBlockOutputStream>(
                     out,
                     out->getHeader(),
-                    context.getSettingsRef().min_insert_block_size_rows,
-                    context.getSettingsRef().min_insert_block_size_bytes);
+                    table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
+                    table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0);
             }
 
             auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 1c0149ac261..651688f41bb 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -131,6 +131,10 @@ public:
     /// Returns true if the storage supports reading of subcolumns of complex types.
     virtual bool supportsSubcolumns() const { return false; }
 
+    /// Requires squashing small blocks to large for optimal storage.
+    /// This is true for most storages that store data on disk.
+    virtual bool prefersLargeBlocks() const { return true; }
+
 
     /// Optional size information of each physical column.
     /// Currently it's only used by the MergeTree family for query optimizations.
diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h
index 702cb265ea9..dc695427156 100644
--- a/src/Storages/StorageMemory.h
+++ b/src/Storages/StorageMemory.h
@@ -40,9 +40,11 @@ public:
         unsigned num_streams) override;
 
     bool supportsParallelInsert() const override { return true; }
-
     bool supportsSubcolumns() const override { return true; }
 
+    /// Smaller blocks (e.g. 64K rows) are better for CPU cache.
+    bool prefersLargeBlocks() const override { return false; }
+
     BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, const Context & context) override;
 
     void drop() override;

From c75e34c2914222b266086b99f9935027062d8b34 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 05:39:53 +0300
Subject: [PATCH 0764/1238] Corrections

---
 src/Interpreters/InterpreterInsertQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index fd5cb20531c..67444d49f86 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -289,7 +289,7 @@ BlockIO InterpreterInsertQuery::execute()
 
                 new_settings.max_threads = std::max<UInt64>(1, settings.max_insert_threads);
 
-                if (settings.min_insert_block_size_rows)
+                if (settings.min_insert_block_size_rows && table->prefersLargeBlocks())
                     new_settings.max_block_size = settings.min_insert_block_size_rows;
 
                 Context new_context = context;

From 38ea4af33956b067a4340e18c52e0d7d8e5f5630 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 05:40:06 +0300
Subject: [PATCH 0765/1238] Fix quadratic INSERT

---
 src/Storages/StorageMemory.cpp | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp
index 1474fbcee02..9d0c67ac34c 100644
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@@ -115,22 +115,34 @@ public:
 
     void write(const Block & block) override
     {
-        const auto size_bytes_diff = block.allocatedBytes();
-        const auto size_rows_diff = block.rows();
-
         metadata_snapshot->check(block, true);
-        {
-            std::lock_guard lock(storage.mutex);
-            auto new_data = std::make_unique<Blocks>(*(storage.data.get()));
-            new_data->push_back(block);
-            storage.data.set(std::move(new_data));
+        new_blocks.emplace_back(block);
+    }
 
-            storage.total_size_bytes.fetch_add(size_bytes_diff, std::memory_order_relaxed);
-            storage.total_size_rows.fetch_add(size_rows_diff, std::memory_order_relaxed);
+    void writeSuffix() override
+    {
+        size_t inserted_bytes = 0;
+        size_t inserted_rows = 0;
+
+        for (const auto & block : new_blocks)
+        {
+            inserted_bytes += block.allocatedBytes();
+            inserted_rows += block.rows();
         }
 
+        std::lock_guard lock(storage.mutex);
+
+        auto new_data = std::make_unique<Blocks>(*(storage.data.get()));
+        new_data->insert(new_data->end(), new_blocks.begin(), new_blocks.end());
+
+        storage.data.set(std::move(new_data));
+        storage.total_size_bytes.fetch_add(inserted_bytes, std::memory_order_relaxed);
+        storage.total_size_rows.fetch_add(inserted_rows, std::memory_order_relaxed);
     }
+
 private:
+    Blocks new_blocks;
+
     StorageMemory & storage;
     StorageMetadataPtr metadata_snapshot;
 };

From e4b3ae34f1d2d597696cad8ded6e89ce023b6e54 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 05:42:43 +0300
Subject: [PATCH 0766/1238] Add perf test

---
 tests/performance/memory_cache_friendliness.xml | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 tests/performance/memory_cache_friendliness.xml

diff --git a/tests/performance/memory_cache_friendliness.xml b/tests/performance/memory_cache_friendliness.xml
new file mode 100644
index 00000000000..92b79661540
--- /dev/null
+++ b/tests/performance/memory_cache_friendliness.xml
@@ -0,0 +1,8 @@
+<test>
+    <create_query>CREATE TABLE test_memory (x UInt64) ENGINE Memory</create_query>
+    <fill_query>INSERT INTO test_memory SELECT 1 FROM numbers(1000000000)</fill_query>
+
+    <query>SELECT sum(x * x + x) FROM test_memory</query>
+
+    <drop_query>DROP TABLE IF EXISTS test_memory</drop_query>
+</test>

From 5b62b89752bed33c67981c51ace98b105646558f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 05:49:07 +0300
Subject: [PATCH 0767/1238] Fix test

---
 tests/queries/0_stateless/00341_squashing_insert_select2.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00341_squashing_insert_select2.sql b/tests/queries/0_stateless/00341_squashing_insert_select2.sql
index 469fdaaa64a..3eb5a2682e0 100644
--- a/tests/queries/0_stateless/00341_squashing_insert_select2.sql
+++ b/tests/queries/0_stateless/00341_squashing_insert_select2.sql
@@ -1,5 +1,5 @@
 DROP TABLE IF EXISTS numbers_squashed;
-CREATE TABLE numbers_squashed (number UInt8) ENGINE = Memory;
+CREATE TABLE numbers_squashed (number UInt8) ENGINE = StripeLog;
 
 SET min_insert_block_size_rows = 100;
 SET min_insert_block_size_bytes = 0;

From 77c0f0a0e7941f1406677787e525f0d9e2df04bd Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Tue, 26 Jan 2021 16:11:46 +0800
Subject: [PATCH 0768/1238] add quota type QUERY_SELECTS and QUERY_INSERTS

---
 docs/en/operations/quotas.md                  |  10 ++
 .../operations/system-tables/quota_limits.md  |   2 +
 .../operations/system-tables/quota_usage.md   |   2 +
 .../operations/system-tables/quotas_usage.md  |   4 +
 .../sql-reference/statements/alter/quota.md   |   6 +-
 .../sql-reference/statements/create/quota.md  |   6 +-
 src/Access/Quota.h                            |  12 ++
 src/Interpreters/executeQuery.cpp             |   8 +
 .../integration/test_quota/normal_limits.xml  |   2 +
 tests/integration/test_quota/test.py          | 141 ++++++++++--------
 tests/integration/test_quota/tiny_limits.xml  |   2 +
 tests/integration/test_quota/tracking.xml     |   2 +
 .../0_stateless/01297_create_quota.reference  |  11 +-
 .../0_stateless/01297_create_quota.sql        |   8 +
 14 files changed, 142 insertions(+), 74 deletions(-)

diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md
index c637ef03f71..56c3eaf6455 100644
--- a/docs/en/operations/quotas.md
+++ b/docs/en/operations/quotas.md
@@ -29,6 +29,8 @@ Let’s look at the section of the ‘users.xml’ file that defines quotas.
 
             <!-- Unlimited. Just collect data for the specified time interval. -->
             <queries>0</queries>
+            <query_selects>0</query_selects>
+            <query_inserts>0</query_inserts>
             <errors>0</errors>
             <result_rows>0</result_rows>
             <read_rows>0</read_rows>
@@ -48,6 +50,8 @@ The resource consumption calculated for each interval is output to the server lo
         <duration>3600</duration>
 
         <queries>1000</queries>
+        <query_selects>100</query_selects>
+        <query_inserts>100</query_inserts>
         <errors>100</errors>
         <result_rows>1000000000</result_rows>
         <read_rows>100000000000</read_rows>
@@ -58,6 +62,8 @@ The resource consumption calculated for each interval is output to the server lo
         <duration>86400</duration>
 
         <queries>10000</queries>
+        <query_selects>10000</query_selects>
+        <query_inserts>10000</query_inserts>
         <errors>1000</errors>
         <result_rows>5000000000</result_rows>
         <read_rows>500000000000</read_rows>
@@ -74,6 +80,10 @@ Here are the amounts that can be restricted:
 
 `queries` – The total number of requests.
 
+`query_selects` – The total number of select requests.
+
+`query_inserts` – The total number of insert requests.
+
 `errors` – The number of queries that threw an exception.
 
 `result_rows` – The total number of rows given as a result.
diff --git a/docs/en/operations/system-tables/quota_limits.md b/docs/en/operations/system-tables/quota_limits.md
index 065296f5df3..c2dcb4db34d 100644
--- a/docs/en/operations/system-tables/quota_limits.md
+++ b/docs/en/operations/system-tables/quota_limits.md
@@ -9,6 +9,8 @@ Columns:
 - `0` — Interval is not randomized.
 - `1` — Interval is randomized.
 - `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of queries.
+- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select queries.
+- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert queries.
 - `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
 - `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of result rows.
 - `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of RAM volume in bytes used to store a queries result.
diff --git a/docs/en/operations/system-tables/quota_usage.md b/docs/en/operations/system-tables/quota_usage.md
index 0eb59fd6453..17af9ad9a30 100644
--- a/docs/en/operations/system-tables/quota_usage.md
+++ b/docs/en/operations/system-tables/quota_usage.md
@@ -9,6 +9,8 @@ Columns:
 - `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — End time for calculating resource consumption.
 - `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds.
 - `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests on this interval.
+- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests on this interval.
+- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests on this interval.
 - `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests.
 - `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception.
 - `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
diff --git a/docs/en/operations/system-tables/quotas_usage.md b/docs/en/operations/system-tables/quotas_usage.md
index ed6be820b26..31aafd3e697 100644
--- a/docs/en/operations/system-tables/quotas_usage.md
+++ b/docs/en/operations/system-tables/quotas_usage.md
@@ -11,6 +11,10 @@ Columns:
 - `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds.
 - `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests in this interval.
 - `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests.
+- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests in this interval.
+- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select requests.
+- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests in this interval.
+- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert requests.
 - `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception.
 - `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
 - `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of rows given as a result.
diff --git a/docs/en/sql-reference/statements/alter/quota.md b/docs/en/sql-reference/statements/alter/quota.md
index 905c57503fc..a43b5255598 100644
--- a/docs/en/sql-reference/statements/alter/quota.md
+++ b/docs/en/sql-reference/statements/alter/quota.md
@@ -5,7 +5,7 @@ toc_title: QUOTA
 
 # ALTER QUOTA {#alter-quota-statement}
 
-Changes [quotas](../../../operations/access-rights.md#quotas-management).
+Changes quotas.
 
 Syntax:
 
@@ -14,13 +14,13 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
     [RENAME TO new_name]
     [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
     [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
-        {MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
+        {MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
         NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
 Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
 
-Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
+Parameters `queries`, `query_selects`, 'query_inserts', errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
 
 `ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
 
diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md
index ec980af921f..71416abf588 100644
--- a/docs/en/sql-reference/statements/create/quota.md
+++ b/docs/en/sql-reference/statements/create/quota.md
@@ -13,14 +13,14 @@ Syntax:
 CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
     [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
     [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
-        {MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
+        {MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
          NO LIMITS | TRACKING ONLY} [,...]]
     [TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
 ```
 
-Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
+Keys `user_name`,  `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
 
-Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
+Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
 
 `ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
 
diff --git a/src/Access/Quota.h b/src/Access/Quota.h
index b636e83ec40..430bdca29b0 100644
--- a/src/Access/Quota.h
+++ b/src/Access/Quota.h
@@ -31,6 +31,8 @@ struct Quota : public IAccessEntity
     enum ResourceType
     {
         QUERIES,        /// Number of queries.
+        QUERY_SELECTS,  /// Number of select queries.
+        QUERY_INSERTS,  /// Number of inserts queries.
         ERRORS,         /// Number of queries with exceptions.
         RESULT_ROWS,    /// Number of rows returned as result.
         RESULT_BYTES,   /// Number of bytes returned as result.
@@ -152,6 +154,16 @@ inline const Quota::ResourceTypeInfo & Quota::ResourceTypeInfo::get(ResourceType
             static const auto info = make_info("QUERIES", 1);
             return info;
         }
+        case Quota::QUERY_SELECTS:
+        {
+            static const auto info = make_info("QUERY_SELECTS", 1);
+            return info;
+        }
+        case Quota::QUERY_INSERTS:
+        {
+            static const auto info = make_info("QUERY_INSERTS", 1);
+            return info;
+        }
         case Quota::ERRORS:
         {
             static const auto info = make_info("ERRORS", 1);
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 50e891a3524..56365565d95 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -524,6 +524,14 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             quota = context.getQuota();
             if (quota)
             {
+                if (ast->as<ASTSelectQuery>() || ast->as<ASTSelectWithUnionQuery>())
+                {
+                    quota->used(Quota::QUERY_SELECTS, 1);
+                }
+                else if (ast->as<ASTInsertQuery>())
+                {
+                    quota->used(Quota::QUERY_INSERTS, 1);
+                }
                 quota->used(Quota::QUERIES, 1);
                 quota->checkExceeded(Quota::ERRORS);
             }
diff --git a/tests/integration/test_quota/normal_limits.xml b/tests/integration/test_quota/normal_limits.xml
index b7c3a67b5cc..e32043ef5ec 100644
--- a/tests/integration/test_quota/normal_limits.xml
+++ b/tests/integration/test_quota/normal_limits.xml
@@ -8,6 +8,8 @@
 
                 <!-- Normal limits. -->
                 <queries>1000</queries>
+                <query_selects>500</query_selects>
+                <query_inserts>500</query_inserts>
                 <errors>0</errors>
                 <read_rows>1000</read_rows>
                 <result_rows>0</result_rows>
diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py
index 0614150ee07..84454159a58 100644
--- a/tests/integration/test_quota/test.py
+++ b/tests/integration/test_quota/test.py
@@ -28,7 +28,7 @@ def system_quota_limits(canonical):
 
 def system_quota_usage(canonical):
     canonical_tsv = TSV(canonical)
-    query = "SELECT quota_name, quota_key, duration, queries, max_queries, errors, max_errors, result_rows, max_result_rows," \
+    query = "SELECT quota_name, quota_key, duration, queries, max_queries, query_selects, max_query_selects, query_inserts, max_query_inserts, errors, max_errors, result_rows, max_result_rows," \
             "result_bytes, max_result_bytes, read_rows, max_read_rows, read_bytes, max_read_bytes, max_execution_time " \
             "FROM system.quota_usage ORDER BY duration"
     r = TSV(instance.query(query))
@@ -38,7 +38,7 @@ def system_quota_usage(canonical):
 
 def system_quotas_usage(canonical):
     canonical_tsv = TSV(canonical)
-    query = "SELECT quota_name, quota_key, is_current, duration, queries, max_queries, errors, max_errors, result_rows, max_result_rows, " \
+    query = "SELECT quota_name, quota_key, is_current, duration, queries, max_queries, query_selects, max_query_selects, query_inserts, max_query_inserts, errors, max_errors, result_rows, max_result_rows, " \
             "result_bytes, max_result_bytes, read_rows, max_read_rows, read_bytes, max_read_bytes, max_execution_time " \
             "FROM system.quotas_usage ORDER BY quota_name, quota_key, duration"
     r = TSV(instance.query(query))
@@ -73,6 +73,7 @@ def reset_quotas_and_usage_info():
     try:
         yield
     finally:
+        copy_quota_xml('simpliest.xml')  # To reset usage info.
         instance.query("DROP QUOTA IF EXISTS qA, qB")
         copy_quota_xml('simpliest.xml')  # To reset usage info.
         copy_quota_xml('normal_limits.xml')
@@ -81,18 +82,18 @@ def reset_quotas_and_usage_info():
 def test_quota_from_users_xml():
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
-    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
     system_quotas_usage(
-        [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+        [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 1, 1000, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]])
 
     instance.query("SELECT COUNT() from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 2, 1000, 0, "\\N", 51, "\\N", 208, "\\N", 50, 1000, 200, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 0, "\\N", 51, "\\N", 208, "\\N", 50, 1000, 200, "\\N", "\\N"]])
 
 
 def test_simpliest_quota():
@@ -102,11 +103,11 @@ def test_simpliest_quota():
                           "['default']", "[]"]])
     system_quota_limits("")
     system_quota_usage(
-        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
+        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
+        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
 
 
 def test_tracking_quota():
@@ -114,16 +115,16 @@ def test_tracking_quota():
     copy_quota_xml('tracking.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
-    system_quota_usage([["myQuota", "default", 31556952, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", "\\N"]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 1, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, "\\N", 200, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 1, "\\N", 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, "\\N", 200, "\\N", "\\N"]])
 
     instance.query("SELECT COUNT() from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 2, "\\N", 0, "\\N", 51, "\\N", 208, "\\N", 50, "\\N", 200, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 2, "\\N", 2, "\\N", 0, "\\N", 0, "\\N", 51, "\\N", 208, "\\N", 50, "\\N", 200, "\\N", "\\N"]])
 
 
 def test_exceed_quota():
@@ -131,55 +132,55 @@ def test_exceed_quota():
     copy_quota_xml('tiny_limits.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1, 1, 1, "\\N", 1, "\\N", "\\N"]])
-    system_quota_usage([["myQuota", "default", 31556952, 0, 1, 0, 1, 0, 1, 0, "\\N", 0, 1, 0, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, "\\N", 0, 1, 0, "\\N", "\\N"]])
 
     assert re.search("Quota.*has\ been\ exceeded", instance.query_and_get_error("SELECT * from test_table"))
-    system_quota_usage([["myQuota", "default", 31556952, 1, 1, 1, 1, 0, 1, 0, "\\N", 50, 1, 0, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, "\\N", 50, 1, 0, "\\N", "\\N"]])
 
     # Change quota, now the limits are enough to execute queries.
     copy_quota_xml('normal_limits.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
-    system_quota_usage([["myQuota", "default", 31556952, 1, 1000, 1, "\\N", 0, "\\N", 0, "\\N", 50, 1000, 0, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 1, "\\N", 0, "\\N", 0, "\\N", 50, 1000, 0, "\\N", "\\N"]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 2, 1000, 1, "\\N", 50, "\\N", 200, "\\N", 100, 1000, 200, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 1, "\\N", 50, "\\N", 200, "\\N", 100, 1000, 200, "\\N", "\\N"]])
 
 
 def test_add_remove_interval():
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
-    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
     # Add interval.
     copy_quota_xml('two_intervals.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']",
                           "[31556952,63113904]", 0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"],
-                         ["myQuota", 63113904, 1, "\\N", "\\N", "\\N", 30000, "\\N", 20000, 120]])
-    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"],
-                        ["myQuota", "default", 63113904, 0, "\\N", 0, "\\N", 0, "\\N", 0, 30000, 0, "\\N", 0, 20000, 120]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", "\\N", "\\N", 1000, "\\N", "\\N"],
+                         ["myQuota", 63113904, 1, "\\N", "\\N", "\\N", "\\N", "\\N", 30000, "\\N", 20000, 120]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"],
+                        ["myQuota", "default", 63113904, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, 30000, 0, "\\N", 0, 20000, 120]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 1, 1000, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"],
-         ["myQuota", "default", 63113904, 1, "\\N", 0, "\\N", 50, "\\N", 200, 30000, 50, "\\N", 200, 20000, 120]])
+        [["myQuota", "default", 31556952, 1, 1000, 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"],
+         ["myQuota", "default", 63113904, 1, "\\N", 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, 30000, 50, "\\N", 200, 20000, 120]])
 
     # Remove interval.
     copy_quota_xml('normal_limits.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
     system_quota_usage(
-        [["myQuota", "default", 31556952, 1, 1000, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 2, 1000, 0, "\\N", 100, "\\N", 400, "\\N", 100, 1000, 400, "\\N", "\\N"]])
+        [["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 0, "\\N", 100, "\\N", 400, "\\N", 100, 1000, 400, "\\N", "\\N"]])
 
     # Remove all intervals.
     copy_quota_xml('simpliest.xml')
@@ -187,26 +188,26 @@ def test_add_remove_interval():
                           "['default']", "[]"]])
     system_quota_limits("")
     system_quota_usage(
-        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
+        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
 
     instance.query("SELECT * from test_table")
     system_quota_usage(
-        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
+        [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
 
     # Add one interval back.
     copy_quota_xml('normal_limits.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
-    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
 
 def test_add_remove_quota():
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
     system_quotas_usage(
-        [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+        [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
     # Add quota.
     copy_quota_xml('two_quotas.xml')
@@ -214,19 +215,19 @@ def test_add_remove_quota():
                           0, "['default']", "[]"],
                          ["myQuota2", "4590510c-4d13-bf21-ec8a-c2187b092e73", "users.xml", "['client_key','user_name']",
                           "[3600,2629746]", 0, "[]", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"],
-                         ["myQuota2", 3600, 1, "\\N", "\\N", 4000, 400000, 4000, 400000, 60],
-                         ["myQuota2", 2629746, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", 1800]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", "\\N", "\\N", 1000, "\\N", "\\N"],
+                         ["myQuota2", 3600, 1, "\\N", "\\N", "\\N", "\\N", 4000, 400000, 4000, 400000, 60],
+                         ["myQuota2", 2629746, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", 1800]])
     system_quotas_usage(
-        [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+        [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
     # Drop quota.
     copy_quota_xml('normal_limits.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
     system_quotas_usage(
-        [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+        [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
     # Drop all quotas.
     copy_quota_xml('no_quotas.xml')
@@ -238,15 +239,15 @@ def test_add_remove_quota():
     copy_quota_xml('normal_limits.xml')
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
     system_quotas_usage(
-        [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+        [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
 
 
 def test_reload_users_xml_by_timer():
     check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
                           0, "['default']", "[]"]])
-    system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
 
     time.sleep(1)  # The modification time of the 'quota.xml' file should be different,
     # because config files are reload by timer only when the modification time is changed.
@@ -255,25 +256,25 @@ def test_reload_users_xml_by_timer():
         ["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", ['user_name'], "[31556952]", 0, "['default']",
          "[]"]])
     assert_eq_with_retry(instance, "SELECT * FROM system.quota_limits",
-                         [["myQuota", 31556952, 0, 1, 1, 1, "\\N", 1, "\\N", "\\N"]])
+                         [["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]])
 
 
 def test_dcl_introspection():
     assert instance.query("SHOW QUOTAS") == "myQuota\n"
     assert instance.query(
-        "SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n"
+        "SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, query_selects = 500, query_inserts = 500, read_rows = 1000 TO default\n"
     assert instance.query(
-        "SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n"
+        "SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, query_selects = 500, query_inserts = 500, read_rows = 1000 TO default\n"
     assert re.match(
-        "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t1000\\t0\\t\\\\N\\t.*\\t\\\\N\n",
+        "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t500\\t0\\t500\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t1000\\t0\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("SELECT * from test_table")
     assert re.match(
-        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n",
+        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
-    expected_access = "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n"
+    expected_access = "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, query_selects = 500, query_inserts = 500, read_rows = 1000 TO default\n"
     assert expected_access in instance.query("SHOW ACCESS")
 
     # Add interval.
@@ -282,8 +283,8 @@ def test_dcl_introspection():
     assert instance.query(
         "SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000, FOR RANDOMIZED INTERVAL 2 year MAX result_bytes = 30000, read_bytes = 20000, execution_time = 120 TO default\n"
     assert re.match(
-        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n"
-        "myQuota\\tdefault\\t.*\\t63113904\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t30000\\t0\\t\\\\N\\t0\\t20000\\t.*\\t120",
+        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n"
+        "myQuota\\tdefault\\t.*\\t63113904\\t0\\t\\\\N\t0\\t\\\\N\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t30000\\t0\\t\\\\N\\t0\\t20000\\t.*\\t120",
         instance.query("SHOW QUOTA"))
 
     # Drop interval, add quota.
@@ -297,7 +298,7 @@ def test_dcl_introspection():
         "SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" \
                                  "CREATE QUOTA myQuota2 KEYED BY client_key, user_name FOR RANDOMIZED INTERVAL 1 hour MAX result_rows = 4000, result_bytes = 400000, read_rows = 4000, read_bytes = 400000, execution_time = 60, FOR INTERVAL 1 month MAX execution_time = 1800\n"
     assert re.match(
-        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n",
+        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     # Drop all quotas.
@@ -315,12 +316,12 @@ def test_dcl_management():
     assert instance.query(
         "SHOW CREATE QUOTA qA") == "CREATE QUOTA qA FOR INTERVAL 5 quarter MAX queries = 123 TO default\n"
     assert re.match(
-        "qA\\t\\t.*\\t39446190\\t0\\t123\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n",
+        "qA\\t\\t.*\\t39446190\\t0\\t123\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("SELECT * from test_table")
     assert re.match(
-        "qA\\t\\t.*\\t39446190\\t1\\t123\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
+        "qA\\t\\t.*\\t39446190\\t1\\t123\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query(
@@ -328,37 +329,37 @@ def test_dcl_management():
     assert instance.query(
         "SHOW CREATE QUOTA qA") == "CREATE QUOTA qA FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default\n"
     assert re.match(
-        "qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\n"
-        "qA\\t\\t.*\\t39446190\\t1\\t321\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
+        "qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\n"
+        "qA\\t\\t.*\\t39446190\\t1\\t321\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("SELECT * from test_table")
     assert re.match(
-        "qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\n"
-        "qA\\t\\t.*\\t39446190\\t2\\t321\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
+        "qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\n"
+        "qA\\t\\t.*\\t39446190\\t2\\t321\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query(
         "ALTER QUOTA qA FOR INTERVAL 15 MONTH NO LIMITS, FOR RANDOMIZED INTERVAL 16 MONTH TRACKING ONLY, FOR INTERVAL 1800 SECOND NO LIMITS")
     assert re.match(
-        "qA\\t\\t.*\\t42075936\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n",
+        "qA\\t\\t.*\\t42075936\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("SELECT * from test_table")
     assert re.match(
-        "qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
+        "qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("ALTER QUOTA qA RENAME TO qB")
     assert instance.query(
         "SHOW CREATE QUOTA qB") == "CREATE QUOTA qB FOR RANDOMIZED INTERVAL 16 month TRACKING ONLY TO default\n"
     assert re.match(
-        "qB\\t\\t.*\\t42075936\\t1\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
+        "qB\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("SELECT * from test_table")
     assert re.match(
-        "qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
+        "qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
         instance.query("SHOW QUOTA"))
 
     instance.query("DROP QUOTA qB")
@@ -367,3 +368,15 @@ def test_dcl_management():
 
 def test_users_xml_is_readonly():
     assert re.search("storage is readonly", instance.query_and_get_error("DROP QUOTA myQuota"))
+
+def test_query_inserts():
+    check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
+                          0, "['default']", "[]"]])
+    system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
+    system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+    system_quotas_usage(
+        [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+
+    instance.query("INSERT INTO test_table values(1)")
+    system_quota_usage(
+        [["myQuota", "default", 31556952, 1, 1000, 0, 500, 1, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
\ No newline at end of file
diff --git a/tests/integration/test_quota/tiny_limits.xml b/tests/integration/test_quota/tiny_limits.xml
index 3ab8858738a..4797c360ddd 100644
--- a/tests/integration/test_quota/tiny_limits.xml
+++ b/tests/integration/test_quota/tiny_limits.xml
@@ -8,6 +8,8 @@
 
                 <!-- Tiny limits. -->
                 <queries>1</queries>
+                <query_selects>1</query_selects>
+                <query_inserts>1</query_inserts>
                 <errors>1</errors>
                 <read_rows>1</read_rows>
                 <result_rows>1</result_rows>
diff --git a/tests/integration/test_quota/tracking.xml b/tests/integration/test_quota/tracking.xml
index 47e12bf8005..c5e7c993edc 100644
--- a/tests/integration/test_quota/tracking.xml
+++ b/tests/integration/test_quota/tracking.xml
@@ -8,6 +8,8 @@
 
                 <!-- No limits. Just calculate resource usage for time interval. -->
                 <queries>0</queries>
+                <query_selects>0</query_selects>
+                <query_inserts>0</query_inserts>
                 <errors>0</errors>
                 <read_rows>0</read_rows>
                 <result_rows>0</result_rows>
diff --git a/tests/queries/0_stateless/01297_create_quota.reference b/tests/queries/0_stateless/01297_create_quota.reference
index b637f4f3296..375d67346be 100644
--- a/tests/queries/0_stateless/01297_create_quota.reference
+++ b/tests/queries/0_stateless/01297_create_quota.reference
@@ -57,7 +57,10 @@ q2_01297	local directory	[]	[5259492]	0	['r1_01297','u1_01297']	[]
 q3_01297	local directory	['client_key','user_name']	[5259492,15778476]	0	[]	[]
 q4_01297	local directory	[]	[604800]	1	[]	['u1_01297']
 -- system.quota_limits
-q2_01297	5259492	0	100	11	1000	10000	1001	10001	2.5
-q3_01297	5259492	0	\N	\N	1002	\N	\N	\N	\N
-q3_01297	15778476	0	100	11	\N	\N	\N	\N	\N
-q4_01297	604800	0	\N	\N	\N	\N	\N	\N	\N
+q2_01297	5259492	0	100	\N	\N	11	1000	10000	1001	10001	2.5
+q3_01297	5259492	0	\N	\N	\N	\N	1002	\N	\N	\N	\N
+q3_01297	15778476	0	100	\N	\N	11	\N	\N	\N	\N	\N
+q4_01297	604800	0	\N	\N	\N	\N	\N	\N	\N	\N	\N
+-- query_selects query_inserts
+CREATE QUOTA q1_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_selects = 1 TO r1_01297
+CREATE QUOTA q2_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_inserts = 1 TO r1_01297
diff --git a/tests/queries/0_stateless/01297_create_quota.sql b/tests/queries/0_stateless/01297_create_quota.sql
index a3fb8331e16..7d55b95601f 100644
--- a/tests/queries/0_stateless/01297_create_quota.sql
+++ b/tests/queries/0_stateless/01297_create_quota.sql
@@ -125,5 +125,13 @@ SELECT '-- system.quota_limits';
 SELECT * FROM system.quota_limits WHERE quota_name LIKE 'q%\_01297' ORDER BY quota_name, duration;
 DROP QUOTA q1_01297, q2_01297, q3_01297, q4_01297;
 
+SELECT '-- query_selects query_inserts';
+CREATE QUOTA q1_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_selects = 1 TO r1_01297;
+CREATE QUOTA q2_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_inserts = 1 TO r1_01297;
+SHOW CREATE QUOTA q1_01297;
+SHOW CREATE QUOTA q2_01297;
+DROP QUOTA q1_01297, q2_01297;
+
 DROP ROLE r1_01297;
 DROP USER u1_01297;
+

From 1b32292dd0d34b7ac323c19c869efe39e53789f2 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sun, 7 Feb 2021 11:32:54 +0800
Subject: [PATCH 0769/1238] bitmap function for all native integers

---
 .../AggregateFunctionGroupBitmap.cpp          |  20 ++-
 .../AggregateFunctionGroupBitmapData.h        |  15 +-
 src/Functions/FunctionsBitmap.h               | 141 +++++++++++++++---
 .../01702_bitmap_native_integers.reference    |   1 +
 .../01702_bitmap_native_integers.sql          |   5 +
 5 files changed, 154 insertions(+), 28 deletions(-)
 create mode 100644 tests/queries/0_stateless/01702_bitmap_native_integers.reference
 create mode 100644 tests/queries/0_stateless/01702_bitmap_native_integers.sql

diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
index bf1d0af73ff..415ba557ef5 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp
@@ -16,6 +16,22 @@ namespace ErrorCodes
 
 namespace
 {
+
+    template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
+    static IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs && ... args)
+    {
+        WhichDataType which(argument_type);
+        if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, Data<UInt16>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate<UInt32, Data<UInt32>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate<UInt64, Data<UInt64>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::Int8) return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::Int16) return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<Int32, Data<Int32>>(std::forward<TArgs>(args)...);
+        if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<Int64, Data<Int64>>(std::forward<TArgs>(args)...);
+        return nullptr;
+    }
+
     template <template <typename> class Data>
     AggregateFunctionPtr createAggregateFunctionBitmap(const std::string & name, const DataTypes & argument_types, const Array & parameters)
     {
@@ -28,7 +44,7 @@ namespace
                     + " is illegal, because it cannot be used in Bitmap operations",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
-        AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionBitmap, Data>(*argument_types[0], argument_types[0]));
+        AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionBitmap, Data>(*argument_types[0], argument_types[0]));
 
         if (!res)
             throw Exception(
@@ -55,7 +71,7 @@ namespace
         const DataTypeAggregateFunction & datatype_aggfunc = dynamic_cast<const DataTypeAggregateFunction &>(*argument_type_ptr);
         AggregateFunctionPtr aggfunc = datatype_aggfunc.getFunction();
         argument_type_ptr = aggfunc->getArgumentTypes()[0];
-        AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(
+        AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(
             *argument_type_ptr, argument_type_ptr));
         if (!res)
             throw Exception(
diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
index ec945d418f2..40885ba74a0 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
@@ -32,6 +32,7 @@ template <typename T, UInt8 small_set_size>
 class RoaringBitmapWithSmallSet : private boost::noncopyable
 {
 private:
+    using UnsignedT = std::make_unsigned_t<T>;
     SmallSet<T, small_set_size> small;
     using ValueBuffer = std::vector<T>;
     using RoaringBitmap = std::conditional_t<sizeof(T) >= 8, roaring::Roaring64Map, roaring::Roaring>;
@@ -363,6 +364,7 @@ public:
     /**
      * Check whether the argument is the subset of this set.
      * Empty set is a subset of any other set (consistent with hasAll).
+     * It's used in subset and currently only support comparing same type
      */
     UInt8 rb_is_subset(const RoaringBitmapWithSmallSet & r1) const
     {
@@ -486,6 +488,7 @@ public:
 
     /**
      * Return new set with specified range (not include the range_end)
+     * It's used in subset and currently only support UInt32
      */
     UInt64 rb_range(UInt64 range_start, UInt64 range_end, RoaringBitmapWithSmallSet & r1) const
     {
@@ -525,6 +528,7 @@ public:
 
     /**
      * Return new set of the smallest `limit` values in set which is no less than `range_start`.
+     * It's used in subset and currently only support UInt32
      */
     UInt64 rb_limit(UInt64 range_start, UInt64 limit, RoaringBitmapWithSmallSet & r1) const
     {
@@ -578,10 +582,10 @@ public:
         {
             if (small.empty())
                 return 0;
-            auto min_val = std::numeric_limits<std::make_unsigned_t<T>>::max();
+            auto min_val = std::numeric_limits<UnsignedT>::max();
             for (const auto & x : small)
             {
-                auto val = x.getValue();
+                UnsignedT val = x.getValue();
                 if (val < min_val)
                     min_val = val;
             }
@@ -597,10 +601,10 @@ public:
         {
             if (small.empty())
                 return 0;
-            auto max_val = std::numeric_limits<std::make_unsigned_t<T>>::min();
+            UnsignedT max_val = 0;
             for (const auto & x : small)
             {
-                auto val = x.getValue();
+                UnsignedT val = x.getValue();
                 if (val > max_val)
                     max_val = val;
             }
@@ -611,7 +615,8 @@ public:
     }
 
     /**
-     * Replace value
+     * Replace value.
+     * It's used in transform and currently can only support UInt32
      */
     void rb_replace(const UInt64 * from_vals, const UInt64 * to_vals, size_t num)
     {
diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h
index 4d9621338f8..054f8800630 100644
--- a/src/Functions/FunctionsBitmap.h
+++ b/src/Functions/FunctionsBitmap.h
@@ -116,8 +116,35 @@ public:
         DataTypes argument_types = {nested_type};
         Array params_row;
         AggregateFunctionProperties properties;
-        AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(
-            AggregateFunctionGroupBitmapData<UInt32>::name(), argument_types, params_row, properties);
+        AggregateFunctionPtr bitmap_function;
+        WhichDataType which(nested_type);
+        if (which.isUInt8())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<UInt8>::name(), argument_types, params_row, properties);
+        else if (which.isUInt16())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<UInt16>::name(), argument_types, params_row, properties);
+        else if (which.isUInt32())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<UInt32>::name(), argument_types, params_row, properties);
+        else if (which.isUInt64())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<UInt64>::name(), argument_types, params_row, properties);
+        else if (which.isInt8())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<Int8>::name(), argument_types, params_row, properties);
+        else if (which.isInt16())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<Int16>::name(), argument_types, params_row, properties);
+        else if (which.isInt32())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<Int32>::name(), argument_types, params_row, properties);
+        else if (which.isInt64())
+            bitmap_function = AggregateFunctionFactory::instance().get(
+                AggregateFunctionGroupBitmapData<Int64>::name(), argument_types, params_row, properties);
+        else
+            throw Exception(
+                "Unexpected type " + array_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         return std::make_shared<DataTypeAggregateFunction>(bitmap_function, argument_types, params_row);
     }
@@ -141,6 +168,14 @@ public:
             return executeBitmapData<UInt32>(argument_types, arguments);
         else if (which.isUInt64())
             return executeBitmapData<UInt64>(argument_types, arguments);
+        else if (which.isInt8())
+            return executeBitmapData<Int8>(argument_types, arguments);
+        else if (which.isInt16())
+            return executeBitmapData<Int16>(argument_types, arguments);
+        else if (which.isInt32())
+            return executeBitmapData<Int32>(argument_types, arguments);
+        else if (which.isInt64())
+            return executeBitmapData<Int64>(argument_types, arguments);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -161,7 +196,7 @@ private:
         Array params_row;
         AggregateFunctionProperties properties;
         AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(
-            AggregateFunctionGroupBitmapData<UInt32>::name(), argument_types, params_row, properties);
+            AggregateFunctionGroupBitmapData<T>::name(), argument_types, params_row, properties);
         auto col_to = ColumnAggregateFunction::create(bitmap_function);
         col_to->reserve(offsets.size());
 
@@ -197,7 +232,7 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type && bitmap_type->getFunctionName() =="groupBitmap"))
             throw Exception(
                 "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -230,6 +265,14 @@ public:
             executeIntType<UInt32>(arguments, input_rows_count, res_data, res_offsets);
         else if (which.isUInt64())
             executeIntType<UInt64>(arguments, input_rows_count, res_data, res_offsets);
+        else if (which.isInt8())
+            executeIntType<Int8>(arguments, input_rows_count, res_data, res_offsets);
+        else if (which.isInt16())
+            executeIntType<Int16>(arguments, input_rows_count, res_data, res_offsets);
+        else if (which.isInt32())
+            executeIntType<Int32>(arguments, input_rows_count, res_data, res_offsets);
+        else if (which.isInt64())
+            executeIntType<Int64>(arguments, input_rows_count, res_data, res_offsets);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -279,7 +322,7 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type && bitmap_type->getFunctionName() == "groupBitmap"))
             throw Exception(
                 "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -312,6 +355,14 @@ public:
             return executeIntType<UInt32>(arguments, input_rows_count);
         else if (which.isUInt64())
             return executeIntType<UInt64>(arguments, input_rows_count);
+        else if (which.isInt8())
+            return executeIntType<Int8>(arguments, input_rows_count);
+        else if (which.isInt16())
+            return executeIntType<Int16>(arguments, input_rows_count);
+        else if (which.isInt32())
+            return executeIntType<Int32>(arguments, input_rows_count);
+        else if (which.isInt64())
+            return executeIntType<Int64>(arguments, input_rows_count);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -384,7 +435,11 @@ struct BitmapSubsetInRangeImpl
 public:
     static constexpr auto name = "bitmapSubsetInRange";
     template <typename T>
-    static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
+    static void apply(
+        const AggregateFunctionGroupBitmapData<T> & bitmap_data_0,
+        UInt64 range_start,
+        UInt64 range_end,
+        AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
         bitmap_data_0.rbs.rb_range(range_start, range_end, bitmap_data_2.rbs);
     }
@@ -395,7 +450,11 @@ struct BitmapSubsetLimitImpl
 public:
     static constexpr auto name = "bitmapSubsetLimit";
     template <typename T>
-    static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
+    static void apply(
+        const AggregateFunctionGroupBitmapData<T> & bitmap_data_0,
+        UInt64 range_start,
+        UInt64 range_end,
+        AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
     {
         bitmap_data_0.rbs.rb_limit(range_start, range_end, bitmap_data_2.rbs);
     }
@@ -421,7 +480,7 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type && bitmap_type->getFunctionName() == "groupBitmap"))
             throw Exception(
                 "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -456,6 +515,14 @@ public:
             return executeIntType<UInt32>(arguments, input_rows_count);
         else if (which.isUInt64())
             return executeIntType<UInt64>(arguments, input_rows_count);
+        else if (which.isInt8())
+            return executeIntType<Int8>(arguments, input_rows_count);
+        else if (which.isInt16())
+            return executeIntType<Int16>(arguments, input_rows_count);
+        else if (which.isInt32())
+            return executeIntType<Int32>(arguments, input_rows_count);
+        else if (which.isInt64())
+            return executeIntType<Int64>(arguments, input_rows_count);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -579,7 +646,7 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const auto * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type && bitmap_type->getFunctionName() == "groupBitmap"))
             throw Exception(
                 "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -604,6 +671,14 @@ public:
             executeIntType<UInt32>(arguments, input_rows_count, vec_to);
         else if (which.isUInt64())
             executeIntType<UInt64>(arguments, input_rows_count, vec_to);
+        else if (which.isInt8())
+            executeIntType<Int8>(arguments, input_rows_count, vec_to);
+        else if (which.isInt16())
+            executeIntType<Int16>(arguments, input_rows_count, vec_to);
+        else if (which.isInt32())
+            executeIntType<Int32>(arguments, input_rows_count, vec_to);
+        else if (which.isInt64())
+            executeIntType<Int64>(arguments, input_rows_count, vec_to);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -743,15 +818,15 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const auto * bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type0 && bitmap_type0->getFunctionName() == "groupBitmap"))
             throw Exception(
-                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
+                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName(),
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         WhichDataType which(arguments[1].get());
-        if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()))
+        if (!which.isNativeInt() && !which.isNativeUInt())
             throw Exception(
-                "Second argument for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".",
+                "Second argument for function " + getName() + " must be an native integer type but it has type " + arguments[1]->getName(),
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         return std::make_shared<DataTypeNumber<UInt8>>();
@@ -775,6 +850,14 @@ public:
             executeIntType<UInt32>(arguments, input_rows_count, vec_to);
         else if (which.isUInt64())
             executeIntType<UInt64>(arguments, input_rows_count, vec_to);
+        else if (which.isInt8())
+            executeIntType<Int8>(arguments, input_rows_count, vec_to);
+        else if (which.isInt16())
+            executeIntType<Int16>(arguments, input_rows_count, vec_to);
+        else if (which.isInt32())
+            executeIntType<Int32>(arguments, input_rows_count, vec_to);
+        else if (which.isInt64())
+            executeIntType<Int64>(arguments, input_rows_count, vec_to);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -839,15 +922,15 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const auto * bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type0 && bitmap_type0->getFunctionName() == "groupBitmap"))
             throw Exception(
-                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
+                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName(),
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         const auto * bitmap_type1 = typeid_cast<const DataTypeAggregateFunction *>(arguments[1].get());
-        if (!(bitmap_type1 && bitmap_type1->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type1 && bitmap_type1->getFunctionName() == "groupBitmap"))
             throw Exception(
-                "Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName() + ".",
+                "Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName(),
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         if (bitmap_type0->getArgumentsDataTypes()[0]->getTypeId() != bitmap_type1->getArgumentsDataTypes()[0]->getTypeId())
@@ -877,6 +960,14 @@ public:
             executeIntType<UInt32>(arguments, input_rows_count, vec_to);
         else if (which.isUInt64())
             executeIntType<UInt64>(arguments, input_rows_count, vec_to);
+        else if (which.isInt8())
+            executeIntType<Int8>(arguments, input_rows_count, vec_to);
+        else if (which.isInt16())
+            executeIntType<Int16>(arguments, input_rows_count, vec_to);
+        else if (which.isInt32())
+            executeIntType<Int32>(arguments, input_rows_count, vec_to);
+        else if (which.isInt64())
+            executeIntType<Int64>(arguments, input_rows_count, vec_to);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@@ -974,15 +1065,15 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         const auto * bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
-        if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type0 && bitmap_type0->getFunctionName() == "groupBitmap"))
             throw Exception(
-                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
+                "First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName(),
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         const auto * bitmap_type1 = typeid_cast<const DataTypeAggregateFunction *>(arguments[1].get());
-        if (!(bitmap_type1 && bitmap_type1->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
+        if (!(bitmap_type1 && bitmap_type1->getFunctionName() == "groupBitmap"))
             throw Exception(
-                "Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName() + ".",
+                "Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName(),
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         if (bitmap_type0->getArgumentsDataTypes()[0]->getTypeId() != bitmap_type1->getArgumentsDataTypes()[0]->getTypeId())
@@ -1009,6 +1100,14 @@ public:
             return executeBitmapData<UInt32>(arguments, input_rows_count);
         else if (which.isUInt64())
             return executeBitmapData<UInt64>(arguments, input_rows_count);
+        else if (which.isUInt8())
+            return executeBitmapData<UInt8>(arguments, input_rows_count);
+        else if (which.isUInt16())
+            return executeBitmapData<UInt16>(arguments, input_rows_count);
+        else if (which.isUInt32())
+            return executeBitmapData<UInt32>(arguments, input_rows_count);
+        else if (which.isUInt64())
+            return executeBitmapData<UInt64>(arguments, input_rows_count);
         else
             throw Exception(
                 "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
diff --git a/tests/queries/0_stateless/01702_bitmap_native_integers.reference b/tests/queries/0_stateless/01702_bitmap_native_integers.reference
new file mode 100644
index 00000000000..5be3912b8d5
--- /dev/null
+++ b/tests/queries/0_stateless/01702_bitmap_native_integers.reference
@@ -0,0 +1 @@
+251	65531	4294967291	18446744073709551611	255	65535	4294967295	18446744073709551615
diff --git a/tests/queries/0_stateless/01702_bitmap_native_integers.sql b/tests/queries/0_stateless/01702_bitmap_native_integers.sql
new file mode 100644
index 00000000000..a31de25dc30
--- /dev/null
+++ b/tests/queries/0_stateless/01702_bitmap_native_integers.sql
@@ -0,0 +1,5 @@
+drop table if exists t;
+create table t(i8 Int8, i16 Int16, i32 Int32, i64 Int64) engine Memory;
+insert into t values (-1, -1, -1, -1), (-2, -2, -2, -2), (-3, -3, -3, -3), (-4, -4, -4, -4), (-5, -5, -5, -5);
+select * apply bitmapMin, * apply bitmapMax from (select * apply groupBitmapState from t);
+drop table t;

From c2f893c2e2a70b9fa5a292cfe77736c93c0b1a7c Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Wed, 3 Feb 2021 20:30:47 +0800
Subject: [PATCH 0770/1238] add 01297_create_quota to parallel skip list

---
 tests/queries/skip_list.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 3311eb3882d..d76603bf633 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -508,6 +508,7 @@
         "01294_lazy_database_concurrent_recreate_reattach_and_show_tables",
         "01294_system_distributed_on_cluster",
         "01296_create_row_policy_in_current_database",
+        "01297_create_quota",
         "01305_replica_create_drop_zookeeper",
         "01307_multiple_leaders_zookeeper",
         "01318_long_unsuccessful_mutation_zookeeper",

From a0c62db9c37bc66ca9a4bbd1dc9207edc059c7cb Mon Sep 17 00:00:00 2001
From: zhangxiao871 <zhangxiao871@ZBMAC-C02DN6312.local>
Date: Sun, 7 Feb 2021 14:06:25 +0800
Subject: [PATCH 0771/1238] Update documentation.

---
 docs/en/operations/system-tables/zookeeper.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md
index 713cb9269d4..82ace5e81dc 100644
--- a/docs/en/operations/system-tables/zookeeper.md
+++ b/docs/en/operations/system-tables/zookeeper.md
@@ -1,7 +1,7 @@
 # system.zookeeper {#system-zookeeper}
 
 The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config.
-The query must have a ‘path’ equality condition in the WHERE clause, or ‘path’ condition in the set in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for.
+The query must either have a ‘path =’   condition or a `path IN`  condition set with the `WHERE` clause as shown below. This corresponds to the path of the children in ZooKeeper that you want to get data for.
 
 The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node.
 To output data for all root nodes, write path = ‘/’.

From 4181f8d9b7686d277d9fecf575526739306d6373 Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Sun, 7 Feb 2021 11:06:39 +0300
Subject: [PATCH 0772/1238] Fix segfault using ANTLR parser (#20156)

* Fix build of utils

* Fix visitor
---
 src/Interpreters/MarkTableIdentifiersVisitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/MarkTableIdentifiersVisitor.cpp b/src/Interpreters/MarkTableIdentifiersVisitor.cpp
index 78563059ed1..6557e1b5292 100644
--- a/src/Interpreters/MarkTableIdentifiersVisitor.cpp
+++ b/src/Interpreters/MarkTableIdentifiersVisitor.cpp
@@ -47,7 +47,7 @@ void MarkTableIdentifiersMatcher::visit(const ASTFunction & func, ASTPtr &, Data
     // First argument of dictGet can be a dictionary name, perhaps with a database.
     if (functionIsJoinGet(func.name) || functionIsDictGet(func.name))
     {
-        if (func.arguments->children.empty())
+        if (!func.arguments || func.arguments->children.empty())
         {
             return;
         }

From db04af3dcef946dd33f37ef8224e7687ac224433 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sun, 7 Feb 2021 13:06:34 +0300
Subject: [PATCH 0773/1238] Fix macOS build docs

---
 docs/en/development/build-osx.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md
index c3a0a540b6d..d78dd12b2dc 100644
--- a/docs/en/development/build-osx.md
+++ b/docs/en/development/build-osx.md
@@ -40,7 +40,7 @@ $ cd ClickHouse
 ``` bash
 $ mkdir build
 $ cd build
-$ cmake ..-DCMAKE_C_COMPILER=`brew --prefix llvm`/bin/clang -DCMAKE_CXX_COMPILER=`brew --prefix llvm`/bin/clang++ -DCMAKE_PREFIX_PATH=`brew --prefix llvm`
+$ cmake .. -DCMAKE_C_COMPILER=`brew --prefix llvm`/bin/clang -DCMAKE_CXX_COMPILER=`brew --prefix llvm`/bin/clang++ -DCMAKE_PREFIX_PATH=`brew --prefix llvm`
 $ ninja
 $ cd ..
 ```

From 38a7248d3aceaed0ebaf73440df9a5e0cac79239 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Sun, 7 Feb 2021 14:01:00 +0300
Subject: [PATCH 0774/1238] Minor changes in date_sub doc

---
 docs/en/sql-reference/functions/date-time-functions.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 86a1110caf9..4a73bdb2546 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -520,7 +520,7 @@ Result:
 
 ## date\_sub {#date_sub}
 
-This subtracts a time/date interval from a date and then returns the date.
+Subtracts a time/date interval from the provided date.
 
 **Syntax**
 
@@ -536,7 +536,7 @@ Aliases: `dateSub`, `DATE_SUB`.
 
         Supported values: second, minute, hour, day, week, month, quarter, year.
 -   `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md)    
--   `date` — [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+-   `date` — [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md) to subtract value from.
 
 **Returned value**
 

From ab5c7b75a41a34a98fa515e1ef9dfe689766aafa Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sun, 7 Feb 2021 19:03:55 +0800
Subject: [PATCH 0775/1238] Delay or throw insertion when too many inactive
 parts

---
 src/Storages/MergeTree/MergeTreeData.cpp      | 48 +++++++++++++++++--
 src/Storages/MergeTree/MergeTreeData.h        |  2 +-
 src/Storages/MergeTree/MergeTreeSettings.h    |  2 +
 ...09_inactive_parts_to_delay_throw.reference |  0
 .../01709_inactive_parts_to_delay_throw.sql   | 12 +++++
 5 files changed, 59 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.reference
 create mode 100644 tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.sql

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 9ed751cbc8e..c4e00a9a7f3 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2346,7 +2346,7 @@ size_t MergeTreeData::getPartsCount() const
 }
 
 
-size_t MergeTreeData::getMaxPartsCountForPartition() const
+size_t MergeTreeData::getMaxPartsCountForPartition(size_t * inactive) const
 {
     auto lock = lockParts();
 
@@ -2369,6 +2369,26 @@ size_t MergeTreeData::getMaxPartsCountForPartition() const
         res = std::max(res, cur_count);
     }
 
+    if (inactive)
+    {
+        *inactive = 0;
+        cur_count = 0;
+        for (const auto & part : getDataPartsStateRange(DataPartState::Outdated))
+        {
+            if (cur_partition_id && part->info.partition_id == *cur_partition_id)
+            {
+                ++cur_count;
+            }
+            else
+            {
+                cur_partition_id = &part->info.partition_id;
+                cur_count = 1;
+            }
+
+            *inactive = std::max(*inactive, cur_count);
+        }
+    }
+
     return res;
 }
 
@@ -2398,15 +2418,35 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until) const
         throw Exception("Too many parts (" + toString(parts_count_in_total) + ") in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in <merge_tree> element in config.xml or with per-table setting.", ErrorCodes::TOO_MANY_PARTS);
     }
 
-    const size_t parts_count_in_partition = getMaxPartsCountForPartition();
+    size_t parts_count_in_partition;
+    bool should_delay = false;
+    if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0)
+    {
+        size_t inactive_parts;
+        parts_count_in_partition = getMaxPartsCountForPartition(&inactive_parts);
+        if (inactive_parts >= settings->inactive_parts_to_throw_insert)
+        {
+            ProfileEvents::increment(ProfileEvents::RejectedInserts);
+            throw Exception(
+                "Too many inactive parts (" + toString(parts_count_in_partition)
+                    + "). Parts cleaning are processing significantly slower than inserts.",
+                ErrorCodes::TOO_MANY_PARTS);
+        }
+        if (inactive_parts >= settings->inactive_parts_to_delay_insert)
+            should_delay = true;
+    }
+    else
+        parts_count_in_partition = getMaxPartsCountForPartition();
 
     if (parts_count_in_partition >= settings->parts_to_throw_insert)
     {
         ProfileEvents::increment(ProfileEvents::RejectedInserts);
-        throw Exception("Too many parts (" + toString(parts_count_in_partition) + "). Merges are processing significantly slower than inserts.", ErrorCodes::TOO_MANY_PARTS);
+        throw Exception(
+            "Too many parts (" + toString(parts_count_in_partition) + "). Merges are processing significantly slower than inserts.",
+            ErrorCodes::TOO_MANY_PARTS);
     }
 
-    if (parts_count_in_partition < settings->parts_to_delay_insert)
+    if (!should_delay && parts_count_in_partition < settings->parts_to_delay_insert)
         return;
 
     const size_t max_k = settings->parts_to_throw_insert - settings->parts_to_delay_insert; /// always > 0
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 425dcbfb316..d4b6c1fba27 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -415,7 +415,7 @@ public:
     size_t getTotalActiveSizeInRows() const;
 
     size_t getPartsCount() const;
-    size_t getMaxPartsCountForPartition() const;
+    size_t getMaxPartsCountForPartition(size_t * inactive = nullptr) const;
 
     /// Get min value of part->info.getDataVersion() for all active parts.
     /// Makes sense only for ordinary MergeTree engines because for them block numbering doesn't depend on partition.
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 53388617a07..16657b4083d 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -57,7 +57,9 @@ struct Settings;
     \
     /** Inserts settings. */ \
     M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \
+    M(UInt64, inactive_parts_to_delay_insert, 0, "If table contains at least that many inactive parts in single partition, artificially slow down insert into table.", 0) \
     M(UInt64, parts_to_throw_insert, 300, "If more than this number active parts in single partition, throw 'Too many parts ...' exception.", 0) \
+    M(UInt64, inactive_parts_to_throw_insert, 0, "If more than this number inactive parts in single partition, throw 'Too many inactive parts ...' exception.", 0) \
     M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \
     M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \
     \
diff --git a/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.reference b/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.sql b/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.sql
new file mode 100644
index 00000000000..fad890c4807
--- /dev/null
+++ b/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.sql
@@ -0,0 +1,12 @@
+drop table if exists x;
+
+create table x (i int) engine MergeTree order by i settings old_parts_lifetime = 10000000000, min_bytes_for_wide_part = 0, inactive_parts_to_throw_insert = 1;
+
+insert into x values (1);
+insert into x values (2);
+
+optimize table x final;
+
+insert into x values (3); -- { serverError 252; }
+
+drop table if exists x;

From b26ebd6df8e84c27285919cf257079ccd9405154 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 16:13:23 +0300
Subject: [PATCH 0776/1238] Slack link was expired

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8e114d5abe9..53778c79bef 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ ClickHouse® is an open-source column-oriented database management system that a
 * [Tutorial](https://clickhouse.tech/docs/en/getting_started/tutorial/) shows how to set up and query small ClickHouse cluster.
 * [Documentation](https://clickhouse.tech/docs/en/) provides more in-depth information.
 * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
-* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-d2zxkf9e-XyxDa_ucfPxzuH4SJIm~Ng) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
+* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-ly9m4w1x-6j7x5Ts_pQZqrctAbRZ3cg) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
 * [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
 * [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
 * [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.

From c941d3bf2092bbe16248bcd01127df4303336974 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 16:14:31 +0300
Subject: [PATCH 0777/1238] Renew Slack link

---
 website/templates/index/community.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/templates/index/community.html b/website/templates/index/community.html
index e65f9ff0f86..20b09e7318b 100644
--- a/website/templates/index/community.html
+++ b/website/templates/index/community.html
@@ -66,7 +66,7 @@
                 </div>
                 <div class="row mb-3">
                     <div class="col w-100">
-                        <a href="https://join.slack.com/t/clickhousedb/shared_invite/zt-d2zxkf9e-XyxDa_ucfPxzuH4SJIm~Ng"
+                        <a href="https://join.slack.com/t/clickhousedb/shared_invite/zt-ly9m4w1x-6j7x5Ts_pQZqrctAbRZ3cg"
                             rel="external nofollow noreferrer" target="_blank" class="text-decoration-none">
                             <div class="bg-dark p-4">
                                 <img data-src="/images/index/slack.svg"

From 375a9b440795e13aac298d0229de02d390ab31f9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 3 Feb 2021 11:00:20 +0300
Subject: [PATCH 0778/1238] Fix build

---
 contrib/base64-cmake/CMakeLists.txt    | 2 +-
 contrib/hyperscan-cmake/CMakeLists.txt | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/contrib/base64-cmake/CMakeLists.txt b/contrib/base64-cmake/CMakeLists.txt
index 63b4e324d29..a295ee45b84 100644
--- a/contrib/base64-cmake/CMakeLists.txt
+++ b/contrib/base64-cmake/CMakeLists.txt
@@ -11,7 +11,7 @@ endif ()
 target_compile_options(base64_scalar PRIVATE -falign-loops)
 
 if (ARCH_AMD64)
-    target_compile_options(base64_ssse3 PRIVATE -mssse3 -falign-loops)
+    target_compile_options(base64_ssse3 PRIVATE -mno-avx -mno-avx2 -mssse3 -falign-loops)
     target_compile_options(base64_avx PRIVATE -falign-loops -mavx)
     target_compile_options(base64_avx2 PRIVATE -falign-loops -mavx2)
 else ()
diff --git a/contrib/hyperscan-cmake/CMakeLists.txt b/contrib/hyperscan-cmake/CMakeLists.txt
index c44214cded8..75c45ff7bf5 100644
--- a/contrib/hyperscan-cmake/CMakeLists.txt
+++ b/contrib/hyperscan-cmake/CMakeLists.txt
@@ -252,6 +252,7 @@ if (NOT EXTERNAL_HYPERSCAN_LIBRARY_FOUND)
     target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1)
     target_compile_options (hyperscan
         PRIVATE -g0 # Library has too much debug information
+        -mno-avx -mno-avx2 # The library is using dynamic dispatch and is confused if AVX is enabled globally
         -march=corei7 -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # The options from original build system
         -fno-sanitize=undefined # Assume the library takes care of itself
     )

From 1201c4c55f2b644792eecbfd023c8f4f414abe1d Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sun, 7 Feb 2021 16:45:29 +0300
Subject: [PATCH 0779/1238] Fixed build issues

---
 src/Common/HashTable/HashTable.h       | 60 +++++++++++++-------------
 src/Common/tests/lru_hash_map_perf.cpp |  2 +-
 2 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h
index 5d4410ce4e7..06b4d74cfcb 100644
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@@ -341,6 +341,32 @@ struct ZeroValueStorage<false, Cell>
 };
 
 
+template <bool enable, typename Allocator, typename Cell>
+struct AllocatorBufferDeleter;
+
+template <typename Allocator, typename Cell>
+struct AllocatorBufferDeleter<false, Allocator, Cell>
+{
+    AllocatorBufferDeleter(Allocator &, size_t) {}
+
+    void operator()(Cell *) const {}
+
+};
+
+template <typename Allocator, typename Cell>
+struct AllocatorBufferDeleter<true, Allocator, Cell>
+{
+    AllocatorBufferDeleter(Allocator & allocator_, size_t size_)
+        : allocator(allocator_)
+        , size(size_) {}
+
+    void operator()(Cell * buffer) const { allocator.free(buffer, size); }
+
+    Allocator & allocator;
+    size_t size;
+};
+
+
 // The HashTable
 template
 <
@@ -434,35 +460,6 @@ protected:
         }
     }
 
-    template<bool enable>
-    struct AllocatorBufferDeleter;
-
-    template<>
-    struct AllocatorBufferDeleter<false>
-    {
-        AllocatorBufferDeleter(Allocator &, size_t) {}
-
-        void operator()(Cell *) const {}
-
-    };
-
-    template<>
-    struct AllocatorBufferDeleter<true>
-    {
-        AllocatorBufferDeleter(Allocator & allocator_, size_t size_)
-            : allocator(allocator_)
-            , size(size_)
-        {}
-
-        void operator()(Cell * buffer) const
-        {
-            allocator.free(buffer, size);
-        }
-
-        Allocator & allocator;
-        size_t size;
-    };
-
     /// Increase the size of the buffer.
     void resize(size_t for_num_elems = 0, size_t for_buf_size = 0)
     {
@@ -501,8 +498,9 @@ protected:
         /** If cell required to be notified during move we need to temporary keep old buffer
          * because realloc does not quarantee for reallocated buffer to have same base address
          */
-        AllocatorBufferDeleter<Cell::need_to_notify_cell_during_move> buffer_deleter(*this, old_buffer_size);
-        std::unique_ptr<Cell, decltype(buffer_deleter)> old_buffer(buf, buffer_deleter);
+        using Deleter = AllocatorBufferDeleter<Cell::need_to_notify_cell_during_move, Allocator, Cell>;
+        Deleter buffer_deleter(*this, old_buffer_size);
+        std::unique_ptr<Cell, Deleter> old_buffer(buf, buffer_deleter);
 
         if constexpr (Cell::need_to_notify_cell_during_move)
         {
diff --git a/src/Common/tests/lru_hash_map_perf.cpp b/src/Common/tests/lru_hash_map_perf.cpp
index f7ef1fdd759..14beff3f7da 100644
--- a/src/Common/tests/lru_hash_map_perf.cpp
+++ b/src/Common/tests/lru_hash_map_perf.cpp
@@ -142,7 +142,7 @@ void testInsertElementsIntoHashMap(size_t map_size, const std::vector<UInt64> &
 
     for (size_t i = 0; i < numbers_to_insert_size; ++i)
     {
-        auto it = hash_map.find(numbers_to_insert[i]);
+        auto * it = hash_map.find(numbers_to_insert[i]);
 
         if (it)
             summ += it->getMapped();

From 0145be85df3f11f7f809b96e5f9f9266b6ec3034 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 16:50:57 +0300
Subject: [PATCH 0780/1238] Do not spill warnings suppressions from ANTLR

---
 src/Parsers/New/CMakeLists.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Parsers/New/CMakeLists.txt b/src/Parsers/New/CMakeLists.txt
index 360dd4d7488..468394b7bd8 100644
--- a/src/Parsers/New/CMakeLists.txt
+++ b/src/Parsers/New/CMakeLists.txt
@@ -65,8 +65,6 @@ target_compile_options (clickhouse_parsers_new
         -Wno-documentation-deprecated-sync
         -Wno-shadow-field
         -Wno-unused-parameter
-
-    PUBLIC
         -Wno-extra-semi
         -Wno-inconsistent-missing-destructor-override
 )

From e814db68360db68831ca631408d551cf00bfe2c5 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Sun, 7 Feb 2021 17:18:18 +0300
Subject: [PATCH 0781/1238] Update version_date.tsv after release 21.2.2.8

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index c4b27f3199d..8d05f5fff46 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v21.2.2.8-stable	2021-02-07
 v21.1.3.32-stable	2021-02-03
 v21.1.2.15-stable	2021-01-18
 v20.12.5.18-stable	2021-02-03

From f910f0028518e41053f53306da46552d942c72f1 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Sun, 7 Feb 2021 17:29:54 +0300
Subject: [PATCH 0782/1238] Description and link (en, ru)

---
 .../table-engines/integrations/index.md       |  3 +
 .../table-engines/integrations/mongodb.md     | 57 +++++++++++++++++++
 .../table-engines/integrations/index.md       |  3 +
 .../table-engines/integrations/mongodb.md     | 57 +++++++++++++++++++
 4 files changed, 120 insertions(+)
 create mode 100644 docs/en/engines/table-engines/integrations/mongodb.md
 create mode 100644 docs/ru/engines/table-engines/integrations/mongodb.md

diff --git a/docs/en/engines/table-engines/integrations/index.md b/docs/en/engines/table-engines/integrations/index.md
index cf3e36c2f48..288c9c3cd56 100644
--- a/docs/en/engines/table-engines/integrations/index.md
+++ b/docs/en/engines/table-engines/integrations/index.md
@@ -12,6 +12,9 @@ List of supported integrations:
 -   [ODBC](../../../engines/table-engines/integrations/odbc.md)
 -   [JDBC](../../../engines/table-engines/integrations/jdbc.md)
 -   [MySQL](../../../engines/table-engines/integrations/mysql.md)
+-   [MongoDB](../../../engines/table-engines/integrations/mongodb.md)
 -   [HDFS](../../../engines/table-engines/integrations/hdfs.md)
 -   [S3](../../../engines/table-engines/integrations/s3.md)
 -   [Kafka](../../../engines/table-engines/integrations/kafka.md)
+-   [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
+-   [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md
new file mode 100644
index 00000000000..4c07961227a
--- /dev/null
+++ b/docs/en/engines/table-engines/integrations/mongodb.md
@@ -0,0 +1,57 @@
+---
+toc_priority: 7
+toc_title: MongoDB
+---
+
+# MongoDB {#mongodb}
+
+MongoDB engine is read-only, it allows to perform `SELECT` queries on data, stored on a remote MongoBD server. MongoDB engine supports onle flat fields (primitive, not nested types).
+
+## Creating a Table {#creating-a-table}
+
+``` sql
+CREATE TABLE [IF NOT EXISTS] [db.]table_name
+(
+    name1 [type1],
+    name2 [type2],
+    ...
+) ENGINE = MongoDB(host:port, database, collection, user, password);
+```
+
+**Engine Parameters**
+
+-   `host:port` — MongoDB server address.
+
+-   `database` — Remote database name.
+
+-   `table` — Remote table name.
+
+-   `user` — MongoDB user.
+
+-   `password` — User password.
+
+## Usage Example {#usage-example}
+
+Table in ClickHouse, retrieving data from the MongoDB table:
+
+``` text
+CREATE TABLE mongo_table
+(
+    key UInt64, 
+    data String
+) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'test', 'clickhouse');
+```
+
+Query:
+
+``` sql
+SELECT COUNT() FROM mongo_table;
+```
+
+``` text
+┌─count()─┐
+│       4 │
+└─────────┘
+```
+
+[Original article](https://clickhouse.tech/docs/en/operations/table_engines/integrations/mongodb/) <!--hide-->
diff --git a/docs/ru/engines/table-engines/integrations/index.md b/docs/ru/engines/table-engines/integrations/index.md
index 02189cf9e55..db7e527442e 100644
--- a/docs/ru/engines/table-engines/integrations/index.md
+++ b/docs/ru/engines/table-engines/integrations/index.md
@@ -12,7 +12,10 @@ toc_priority: 30
 -   [ODBC](../../../engines/table-engines/integrations/odbc.md)
 -   [JDBC](../../../engines/table-engines/integrations/jdbc.md)
 -   [MySQL](../../../engines/table-engines/integrations/mysql.md)
+-   [MongoDB](../../../engines/table-engines/integrations/mongodb.md)
 -   [HDFS](../../../engines/table-engines/integrations/hdfs.md)
 -   [Kafka](../../../engines/table-engines/integrations/kafka.md)
+-   [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
+-   [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/) <!--hide-->
diff --git a/docs/ru/engines/table-engines/integrations/mongodb.md b/docs/ru/engines/table-engines/integrations/mongodb.md
new file mode 100644
index 00000000000..d2858c01abc
--- /dev/null
+++ b/docs/ru/engines/table-engines/integrations/mongodb.md
@@ -0,0 +1,57 @@
+---
+toc_priority: 7
+toc_title: MongoDB
+---
+
+# MongoDB {#mongodb}
+
+Движок MongoDB работает только на чтение данных, он поддерживает запросы `SELECT` над данными, хранящимися на серверах MongoBD. Движок MongoDB поддерживает только плоские типы данных (простые, не вложенные).
+
+## Создание таблицы {#creating-a-table}
+
+``` sql
+CREATE TABLE [IF NOT EXISTS] [db.]table_name
+(
+    name1 [type1],
+    name2 [type2],
+    ...
+) ENGINE = MongoDB(host:port, database, collection, user, password);
+```
+
+**Параметры движка**
+
+-   `host:port` — адрес сервера MongoDB.
+
+-   `database` — имя базы данных на удалённом сервере.
+
+-   `table` — имя таблицы на удалённом сервере.
+
+-   `user` — пользователь MongoDB.
+
+-   `password` — пароль пользователя.
+
+## Примеры использования {#usage-example}
+
+Таблица в ClickHouse, которая получает данные из таблицы MongoDB:
+
+``` text
+CREATE TABLE mongo_table
+(
+    key UInt64, 
+    data String
+) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'test', 'clickhouse');
+```
+
+Запрос к таблице:
+
+``` sql
+SELECT COUNT() FROM mongo_table;
+```
+
+``` text
+┌─count()─┐
+│       4 │
+└─────────┘
+```
+
+[Original article](https://clickhouse.tech/docs/ru/operations/table_engines/integrations/mongodb/) <!--hide-->

From 6a5a539e98533539a679dd30ed94e3e8021c74b3 Mon Sep 17 00:00:00 2001
From: feng lv <fenglv15@mails.ucas.ac.cn>
Date: Sun, 7 Feb 2021 14:41:41 +0000
Subject: [PATCH 0783/1238] rewrite avg for algebraic optimization

---
 .../ArithmeticOperationsInAgrFuncOptimize.cpp | 10 ++++----
 .../ArithmeticOperationsInAgrFuncOptimize.h   |  2 +-
 ...e_avg_for_algebraic_optimization.reference | 23 +++++++++++++++++++
 ...rewrite_avg_for_algebraic_optimization.sql | 22 ++++++++++++++++++
 4 files changed, 51 insertions(+), 6 deletions(-)
 create mode 100644 tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.reference
 create mode 100644 tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.sql

diff --git a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp
index d544ceb81a2..c7d32ba2721 100644
--- a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp
+++ b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp
@@ -89,11 +89,11 @@ const String & changeNameIfNeeded(const String & func_name, const String & child
 
 ASTPtr tryExchangeFunctions(const ASTFunction & func)
 {
-    static const std::unordered_map<String, std::unordered_set<String>> supported = {
-        { "sum", { "multiply", "divide" } },
-        { "min", { "multiply", "divide", "plus", "minus" } },
-        { "max", { "multiply", "divide", "plus", "minus" } }
-    };
+    static const std::unordered_map<String, std::unordered_set<String>> supported
+        = {{"sum", {"multiply", "divide"}},
+           {"min", {"multiply", "divide", "plus", "minus"}},
+           {"max", {"multiply", "divide", "plus", "minus"}},
+           {"avg", {"multiply", "divide", "plus", "minus"}}};
 
     const ASTFunction * child_func = getInternalFunction(func);
     if (!child_func || !child_func->arguments || child_func->arguments->children.size() != 2 ||
diff --git a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.h b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.h
index 5d445335045..81d936aeba5 100644
--- a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.h
+++ b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.h
@@ -11,7 +11,7 @@ class ASTFunction;
 /// Extract constant arguments out of aggregate functions from child functions
 /// 'sum(a * 2)' -> 'sum(a) * 2'
 /// Rewrites:   sum([multiply|divide]) -> [multiply|divide](sum)
-///             [min|max]([multiply|divide|plus|minus]) -> [multiply|divide|plus|minus]([min|max])
+///             [min|max|avg]([multiply|divide|plus|minus]) -> [multiply|divide|plus|minus]([min|max|avg])
 /// TODO: groupBitAnd, groupBitOr, groupBitXor
 /// TODO: better constant detection: f(const) is not detected as const.
 /// TODO: 'f((2 * n) * n)' -> '2 * f(n * n)'
diff --git a/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.reference b/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.reference
new file mode 100644
index 00000000000..2bdcedba90e
--- /dev/null
+++ b/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.reference
@@ -0,0 +1,23 @@
+SELECT avg(number + 2) FROM numbers(10)
+value: 	6.5
+EXPLAIN syntax:
+SELECT avg(number) + 2
+FROM numbers(10)
+
+SELECT avg(number - 2) FROM numbers(10)
+value: 	2.5
+EXPLAIN syntax:
+SELECT avg(number) - 2
+FROM numbers(10)
+
+SELECT avg(number * 2) FROM numbers(10)
+value: 	9
+EXPLAIN syntax:
+SELECT avg(number) * 2
+FROM numbers(10)
+
+SELECT avg(number / 2) FROM numbers(10)
+value: 	2.25
+EXPLAIN syntax:
+SELECT avg(number) / 2
+FROM numbers(10)
diff --git a/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.sql b/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.sql
new file mode 100644
index 00000000000..8fa4dd0ae47
--- /dev/null
+++ b/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.sql
@@ -0,0 +1,22 @@
+SELECT 'SELECT avg(number + 2) FROM numbers(10)';
+SELECT 'value: ', avg(number + 2) FROM numbers(10);
+SELECT 'EXPLAIN syntax:';
+EXPLAIN SYNTAX SELECT avg(number + 2) FROM numbers(10);
+
+SELECT '';
+SELECT 'SELECT avg(number - 2) FROM numbers(10)';
+SELECT 'value: ', avg(number - 2) FROM numbers(10);
+SELECT 'EXPLAIN syntax:';
+EXPLAIN SYNTAX SELECT avg(number - 2) FROM numbers(10);
+
+SELECT '';
+SELECT 'SELECT avg(number * 2) FROM numbers(10)';
+SELECT 'value: ', avg(number * 2) FROM numbers(10);
+SELECT 'EXPLAIN syntax:';
+EXPLAIN SYNTAX SELECT avg(number * 2) FROM numbers(10);
+
+SELECT '';
+SELECT 'SELECT avg(number / 2) FROM numbers(10)';
+SELECT 'value: ', avg(number / 2) FROM numbers(10);
+SELECT 'EXPLAIN syntax:';
+EXPLAIN SYNTAX SELECT avg(number / 2) FROM numbers(10);

From 06a92e497345495f65db17c8ccdb045bfa0726c9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 18:17:13 +0300
Subject: [PATCH 0784/1238] Add changelog for 21.2

---
 CHANGELOG.md | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d2cc3e51997..fffd732f7d7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,148 @@
+## ClickHouse release 21.2
+
+### ClickHouse release v21.2.2.8-stable, 2021-02-07
+
+#### Backward Incompatible Change
+
+* Bitwise functions (`bitAnd`, `bitOr`, etc) are forbidden for floating point arguments. Now you have to do explicit cast to integer. [#19853](https://github.com/ClickHouse/ClickHouse/pull/19853) ([Azat Khuzhin](https://github.com/azat)).
+* Forbid `lcm`/`gcd` for floats. [#19532](https://github.com/ClickHouse/ClickHouse/pull/19532) ([Azat Khuzhin](https://github.com/azat)).
+* Fix memory tracking for `OPTIMIZE TABLE`/merges; account query memory limits and sampling for `OPTIMIZE TABLE`/merges. [#18772](https://github.com/ClickHouse/ClickHouse/pull/18772) ([Azat Khuzhin](https://github.com/azat)).
+* Disallow floating point column as partition key, see [#18421](https://github.com/ClickHouse/ClickHouse/issues/18421#event-4147046255). [#18464](https://github.com/ClickHouse/ClickHouse/pull/18464) ([hexiaoting](https://github.com/hexiaoting)).
+* Excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`.
+
+#### New Feature
+
+* Added `PostgreSQL` table engine (both select/insert, with support for multidimensional arrays), also as table function. Added `PostgreSQL` dictionary source. Added `PostgreSQL` database engine. [#18554](https://github.com/ClickHouse/ClickHouse/pull/18554) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Data type `Nested` now supports arbitrary levels of nesting. Introduced subcolumns of complex types, such as `size0` in `Array`, `null` in `Nullable`, names of `Tuple` elements, which can be read without reading of whole column. [#17310](https://github.com/ClickHouse/ClickHouse/pull/17310) ([Anton Popov](https://github.com/CurtizJ)).
+* Added `Nullable` support for `FlatDictionary`, `HashedDictionary`, `ComplexKeyHashedDictionary`, `DirectDictionary`, `ComplexKeyDirectDictionary`, `RangeHashedDictionary`. [#18236](https://github.com/ClickHouse/ClickHouse/pull/18236) ([Maksim Kita](https://github.com/kitaisreal)).
+* Adds a new table called `system.distributed_ddl_queue` that displays the queries in the DDL worker queue. [#17656](https://github.com/ClickHouse/ClickHouse/pull/17656) ([Bharat Nallan](https://github.com/bharatnc)).
+* Added support of mapping LDAP group names, and attribute values in general, to local roles for users from ldap user directories. [#17211](https://github.com/ClickHouse/ClickHouse/pull/17211) ([Denis Glazachev](https://github.com/traceon)).
+* Support insert into table function `cluster`, and for both table functions `remote` and `cluster`, support distributing data across nodes by specify sharding key. Close [#16752](https://github.com/ClickHouse/ClickHouse/issues/16752). [#18264](https://github.com/ClickHouse/ClickHouse/pull/18264) ([flynn](https://github.com/ucasFL)).
+* Add function `decodeXMLComponent` to decode characters for XML. Example: `SELECT decodeXMLComponent('Hello,&quot;world&quot;!')` [#17659](https://github.com/ClickHouse/ClickHouse/issues/17659). [#18542](https://github.com/ClickHouse/ClickHouse/pull/18542) ([nauta](https://github.com/nautaa)).
+* Added functions `parseDateTimeBestEffortUSOrZero`, `parseDateTimeBestEffortUSOrNull`. [#19712](https://github.com/ClickHouse/ClickHouse/pull/19712) ([Maksim Kita](https://github.com/kitaisreal)).
+* Add `sign` math function. [#19527](https://github.com/ClickHouse/ClickHouse/pull/19527) ([flynn](https://github.com/ucasFL)).
+* Add information about used features (functions, table engines, etc) into system.query_log. [#18495](https://github.com/ClickHouse/ClickHouse/issues/18495). [#19371](https://github.com/ClickHouse/ClickHouse/pull/19371) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Function `formatDateTime` support the `%Q` modification to format date to quarter. [#19224](https://github.com/ClickHouse/ClickHouse/pull/19224) ([Jianmei Zhang](https://github.com/zhangjmruc)).
+* Support MetaKey+Enter hotkey binding in play UI. [#19012](https://github.com/ClickHouse/ClickHouse/pull/19012) ([sundyli](https://github.com/sundy-li)).
+* Add three functions for map data type: 1. `mapContains(map, key)` to check weather map.keys include the second parameter key. 2. `mapKeys(map)` return all the keys in Array format 3. `mapValues(map)` return all the values in Array format. [#18788](https://github.com/ClickHouse/ClickHouse/pull/18788) ([hexiaoting](https://github.com/hexiaoting)).
+* Add `log_comment` setting related to [#18494](https://github.com/ClickHouse/ClickHouse/issues/18494). [#18549](https://github.com/ClickHouse/ClickHouse/pull/18549) ([Zijie Lu](https://github.com/TszKitLo40)).
+* Add support of tuple argument to `argMin` and `argMax` functions. [#17359](https://github.com/ClickHouse/ClickHouse/pull/17359) ([Ildus Kurbangaliev](https://github.com/ildus)).
+* Support `EXISTS VIEW` syntax. [#18552](https://github.com/ClickHouse/ClickHouse/pull/18552) ([Du Chuan](https://github.com/spongedu)).
+* Add `SELECT ALL` syntax. closes [#18706](https://github.com/ClickHouse/ClickHouse/issues/18706). [#18723](https://github.com/ClickHouse/ClickHouse/pull/18723) ([flynn](https://github.com/ucasFL)).
+
+#### Performance Improvement
+
+* Faster parts removal by lowering the number of `stat` syscalls. This returns the optimization that existed while ago. More safe interface of `IDisk`. This closes [#19065](https://github.com/ClickHouse/ClickHouse/issues/19065). [#19086](https://github.com/ClickHouse/ClickHouse/pull/19086) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Aliases declared in `WITH` statement are properly used in index analysis. Queries like `WITH column AS alias SELECT ... WHERE alias = ...` may use index now. [#18896](https://github.com/ClickHouse/ClickHouse/pull/18896) ([Amos Bird](https://github.com/amosbird)).
+* Add `optimize_alias_column_prediction` (on by default), that will: - Respect aliased columns in WHERE during partition pruning and skipping data using secondary indexes; - Respect aliased columns in WHERE for trivial count queries for optimize_trivial_count; - Respect aliased columns in GROUP BY/ORDER BY for optimize_aggregation_in_order/optimize_read_in_order. [#16995](https://github.com/ClickHouse/ClickHouse/pull/16995) ([sundyli](https://github.com/sundy-li)).
+* Speed up aggregate function `sum`. Improvement only visible on synthetic benchmarks and not very practical. [#19216](https://github.com/ClickHouse/ClickHouse/pull/19216) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Update libc++ and use another ABI to provide better performance. [#18914](https://github.com/ClickHouse/ClickHouse/pull/18914) ([Danila Kutenin](https://github.com/danlark1)).
+* Rewrite `sumIf()` and `sum(if())` function to `countIf()` function when logically equivalent. [#17041](https://github.com/ClickHouse/ClickHouse/pull/17041) ([flynn](https://github.com/ucasFL)).
+* Use a connection pool for S3 connections, controlled by the `s3_max_connections` settings. [#13405](https://github.com/ClickHouse/ClickHouse/pull/13405) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* Add support for zstd long option for better compression of string columns to save space. [#17184](https://github.com/ClickHouse/ClickHouse/pull/17184) ([ygrek](https://github.com/ygrek)).
+* Slightly improve server latency by removing access to configuration on every connection. [#19863](https://github.com/ClickHouse/ClickHouse/pull/19863) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Reduce lock contention for multiple layers of the `Buffer` engine. [#19379](https://github.com/ClickHouse/ClickHouse/pull/19379) ([Azat Khuzhin](https://github.com/azat)).
+* Support splitting `Filter` step of query plan into `Expression + Filter` pair. Together with `Expression + Expression` merging optimization ([#17458](https://github.com/ClickHouse/ClickHouse/issues/17458)) it may delay execution for some expressions after `Filter` step. [#19253](https://github.com/ClickHouse/ClickHouse/pull/19253) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
+#### Improvement
+
+* `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Set charset to `utf8mb4` when interacting with remote MySQL servers. Fixes [#19795](https://github.com/ClickHouse/ClickHouse/issues/19795). [#19800](https://github.com/ClickHouse/ClickHouse/pull/19800) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* `S3` table function now supports `auto` compression mode (autodetect). This closes [#18754](https://github.com/ClickHouse/ClickHouse/issues/18754). [#19793](https://github.com/ClickHouse/ClickHouse/pull/19793) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* Correctly output infinite arguments for `formatReadableTimeDelta` function. In previous versions, there was implicit conversion to implementation specific integer value. [#19791](https://github.com/ClickHouse/ClickHouse/pull/19791) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Table function `S3` will use global region if the region can't be determined exactly. This closes [#10998](https://github.com/ClickHouse/ClickHouse/issues/10998). [#19750](https://github.com/ClickHouse/ClickHouse/pull/19750) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* In distributed queries if the setting `async_socket_for_remote` is enabled, it was possible to get stack overflow at least in debug build configuration if very deeply nested data type is used in table (e.g. `Array(Array(Array(...more...)))`). This fixes [#19108](https://github.com/ClickHouse/ClickHouse/issues/19108). This change introduces minor backward incompatibility: excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`. [#19736](https://github.com/ClickHouse/ClickHouse/pull/19736) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Add separate pool for message brokers (RabbitMQ and Kafka). [#19722](https://github.com/ClickHouse/ClickHouse/pull/19722) ([Azat Khuzhin](https://github.com/azat)).
+* Fix rare `max_number_of_merges_with_ttl_in_pool` limit overrun (more merges with TTL can be assigned) for non-replicated MergeTree. [#19708](https://github.com/ClickHouse/ClickHouse/pull/19708) ([alesapin](https://github.com/alesapin)).
+* Dictionary: better error message during attribute parsing. [#19678](https://github.com/ClickHouse/ClickHouse/pull/19678) ([Maksim Kita](https://github.com/kitaisreal)).
+* Add an option to disable validation of checksums on reading. Should never be used in production. Please do not expect any benefits in disabling it. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network. In my observations there is no performance difference or it is less than 0.5%. [#19588](https://github.com/ClickHouse/ClickHouse/pull/19588) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Support constant result in function `multiIf`. [#19533](https://github.com/ClickHouse/ClickHouse/pull/19533) ([Maksim Kita](https://github.com/kitaisreal)).
+* Enable function length/empty/notEmpty for datatype Map, which returns keys number in Map. [#19530](https://github.com/ClickHouse/ClickHouse/pull/19530) ([taiyang-li](https://github.com/taiyang-li)).
+* Add `--reconnect` option to `clickhouse-benchmark`. When this option is specified, it will reconnect before every request. This is needed for testing. [#19872](https://github.com/ClickHouse/ClickHouse/pull/19872) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Support using the new location of `.debug` file. This fixes [#19348](https://github.com/ClickHouse/ClickHouse/issues/19348). [#19520](https://github.com/ClickHouse/ClickHouse/pull/19520) ([Amos Bird](https://github.com/amosbird)).
+* `toIPv6` function parses `IPv4` addresses. [#19518](https://github.com/ClickHouse/ClickHouse/pull/19518) ([Bharat Nallan](https://github.com/bharatnc)).
+* Add `http_referer` field to `system.query_log`, `system.processes`, etc. This closes [#19389](https://github.com/ClickHouse/ClickHouse/issues/19389). [#19390](https://github.com/ClickHouse/ClickHouse/pull/19390) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Improve MySQL compatibility by making more functions case insensitive and adding aliases. [#19387](https://github.com/ClickHouse/ClickHouse/pull/19387) ([Daniil Kondratyev](https://github.com/dankondr)).
+* Add metrics for MergeTree parts (Wide/Compact/InMemory) types. [#19381](https://github.com/ClickHouse/ClickHouse/pull/19381) ([Azat Khuzhin](https://github.com/azat)).
+* Allow docker to be executed with arbitrary uid. [#19374](https://github.com/ClickHouse/ClickHouse/pull/19374) ([filimonov](https://github.com/filimonov)).
+* Fix wrong alignment of values of `IPv4` data type in Pretty formats. They were aligned to the right, not to the left. This closes [#19184](https://github.com/ClickHouse/ClickHouse/issues/19184). [#19339](https://github.com/ClickHouse/ClickHouse/pull/19339) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Allow change `max_server_memory_usage` without restart. This closes [#18154](https://github.com/ClickHouse/ClickHouse/issues/18154). [#19186](https://github.com/ClickHouse/ClickHouse/pull/19186) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* The exception when function `bar` is called with certain NaN argument may be slightly misleading in previous versions. This fixes [#19088](https://github.com/ClickHouse/ClickHouse/issues/19088). [#19107](https://github.com/ClickHouse/ClickHouse/pull/19107) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)).
+* Fixed `PeekableReadBuffer: Memory limit exceed` error when inserting data with huge strings. Fixes [#18690](https://github.com/ClickHouse/ClickHouse/issues/18690). [#18979](https://github.com/ClickHouse/ClickHouse/pull/18979) ([tavplubix](https://github.com/tavplubix)).
+* Docker image: several improvements for clickhouse-server entrypoint. [#18954](https://github.com/ClickHouse/ClickHouse/pull/18954) ([filimonov](https://github.com/filimonov)).
+* Add `normalizeQueryKeepNames` and `normalizedQueryHashKeepNames` to normalize queries without masking long names with `?`. This helps better analyze complex query logs. [#18910](https://github.com/ClickHouse/ClickHouse/pull/18910) ([Amos Bird](https://github.com/amosbird)).
+* - Check per-block checksum of the distributed batch on the sender before sending (without reading the file twice, the checksums will be verified while reading), this will avoid stuck of the INSERT on the receiver (on truncated .bin file on the sender) - Avoid reading .bin files twice for batched INSERT (it was required to calculate rows/bytes to take squashing into account, now this information included into the header, backward compatible is preserved). [#18853](https://github.com/ClickHouse/ClickHouse/pull/18853) ([Azat Khuzhin](https://github.com/azat)).
+* Fix issues with RIGHT and FULL JOIN of tables with aggregate function states. In previous versions exception about `cloneResized` method was thrown. [#18818](https://github.com/ClickHouse/ClickHouse/pull/18818) ([templarzq](https://github.com/templarzq)).
+* Added prefix-based S3 endpoint settings. [#18812](https://github.com/ClickHouse/ClickHouse/pull/18812) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* Add [UInt8, UInt16, UInt32, UInt64] arguments types support for bitmapTransform, bitmapSubsetInRange, bitmapSubsetLimit, bitmapContains functions. This closes [#18713](https://github.com/ClickHouse/ClickHouse/issues/18713). [#18791](https://github.com/ClickHouse/ClickHouse/pull/18791) ([sundyli](https://github.com/sundy-li)).
+* Allow CTE (Common Table Expressions) to be further aliased. Propagate CSE (Common Subexpressions Elimination) to subqueries in the same level when `enable_global_with_statement = 1`. This fixes [#17378](https://github.com/ClickHouse/ClickHouse/issues/17378) . This fixes https://github.com/ClickHouse/ClickHouse/pull/16575#issuecomment-753416235 . [#18684](https://github.com/ClickHouse/ClickHouse/pull/18684) ([Amos Bird](https://github.com/amosbird)).
+* Update librdkafka to v1.6.0-RC2. Fixes [#18668](https://github.com/ClickHouse/ClickHouse/issues/18668). [#18671](https://github.com/ClickHouse/ClickHouse/pull/18671) ([filimonov](https://github.com/filimonov)).
+* In case of unexpected exceptions automatically restart background thread which is responsible for execution of distributed DDL queries. Fixes [#17991](https://github.com/ClickHouse/ClickHouse/issues/17991). [#18285](https://github.com/ClickHouse/ClickHouse/pull/18285) ([徐炘](https://github.com/weeds085490)).
+* Updated AWS C++ SDK in order to utilize global regions in S3. [#17870](https://github.com/ClickHouse/ClickHouse/pull/17870) ([Vladimir Chebotarev](https://github.com/excitoon)).
+* Added support for `WITH ... [AND] [PERIODIC] REFRESH [interval_in_sec]` clause when creating `LIVE VIEW` tables. [#14822](https://github.com/ClickHouse/ClickHouse/pull/14822) ([vzakaznikov](https://github.com/vzakaznikov)).
+* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)).
+
+#### Bug Fix
+
+* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)).
+* Fix starting the server with tables having default expressions containing dictGet(). Allow getting return type of dictGet() without loading dictionary. [#19805](https://github.com/ClickHouse/ClickHouse/pull/19805) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix server crash after query with `if` function with `Tuple` type of then/else branches result. `Tuple` type must contain `Array` or another complex type. Fixes [#18356](https://github.com/ClickHouse/ClickHouse/issues/18356). [#20133](https://github.com/ClickHouse/ClickHouse/pull/20133) ([alesapin](https://github.com/alesapin)).
+* `MaterializeMySQL` (experimental feature): Fix replication for statements that update several tables. [#20066](https://github.com/ClickHouse/ClickHouse/pull/20066) ([Håvard Kvålen](https://github.com/havardk)).
+* Prevent "Connection refused" in docker during initialization script execution. [#20012](https://github.com/ClickHouse/ClickHouse/pull/20012) ([filimonov](https://github.com/filimonov)).
+* `EmbeddedRocksDB` is an experimental storage. Fix the issue with lack of proper type checking. Simplified code. This closes [#19967](https://github.com/ClickHouse/ClickHouse/issues/19967). [#19972](https://github.com/ClickHouse/ClickHouse/pull/19972) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix a segfault in function `fromModifiedJulianDay` when the argument type is `Nullable(T)` for any integral types other than Int32. [#19959](https://github.com/ClickHouse/ClickHouse/pull/19959) ([PHO](https://github.com/depressed-pho)).
+* The function `greatCircleAngle` returned inaccurate results in previous versions. This closes [#19769](https://github.com/ClickHouse/ClickHouse/issues/19769). [#19789](https://github.com/ClickHouse/ClickHouse/pull/19789) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix rare bug when some replicated operations (like mutation) cannot process some parts after data corruption. Fixes [#19593](https://github.com/ClickHouse/ClickHouse/issues/19593). [#19702](https://github.com/ClickHouse/ClickHouse/pull/19702) ([alesapin](https://github.com/alesapin)).
+* Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)).
+* Fix wrong deserialization of columns description. It makes INSERT into a table with a column named `\` impossible. [#19479](https://github.com/ClickHouse/ClickHouse/pull/19479) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)).
+* Fixed very rare bug that might cause mutation to hang after `DROP/DETACH/REPLACE/MOVE PARTITION`. It was partially fixed by [#15537](https://github.com/ClickHouse/ClickHouse/issues/15537) for the most cases. [#19443](https://github.com/ClickHouse/ClickHouse/pull/19443) ([tavplubix](https://github.com/tavplubix)).
+* Fix possible error `Extremes transform was already added to pipeline`. Fixes [#14100](https://github.com/ClickHouse/ClickHouse/issues/14100). [#19430](https://github.com/ClickHouse/ClickHouse/pull/19430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix default value in join types with non-zero default (e.g. some Enums). Closes [#18197](https://github.com/ClickHouse/ClickHouse/issues/18197). [#19360](https://github.com/ClickHouse/ClickHouse/pull/19360) ([vdimir](https://github.com/vdimir)).
+* Do not mark file for distributed send as broken on EOF. [#19290](https://github.com/ClickHouse/ClickHouse/pull/19290) ([Azat Khuzhin](https://github.com/azat)).
+* Fix leaking of pipe fd for `async_socket_for_remote`. [#19153](https://github.com/ClickHouse/ClickHouse/pull/19153) ([Azat Khuzhin](https://github.com/azat)).
+* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix issue in merge tree data writer which can lead to marks with bigger size than fixed granularity size. Fixes [#18913](https://github.com/ClickHouse/ClickHouse/issues/18913). [#19123](https://github.com/ClickHouse/ClickHouse/pull/19123) ([alesapin](https://github.com/alesapin)).
+* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)).
+* Simplify the implementation of `tupleHammingDistance`. Support for tuples of any equal length. Fixes [#19029](https://github.com/ClickHouse/ClickHouse/issues/19029). [#19084](https://github.com/ClickHouse/ClickHouse/pull/19084) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix minor issue in JOIN: Join tries to materialize const columns, but our code waits for them in other places. [#18982](https://github.com/ClickHouse/ClickHouse/pull/18982) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Fix possible exception `QueryPipeline stream: different number of columns` caused by merging of query plan's `Expression` steps. Fixes [#18190](https://github.com/ClickHouse/ClickHouse/issues/18190). [#18980](https://github.com/ClickHouse/ClickHouse/pull/18980) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([tavplubix](https://github.com/tavplubix)).
+* Fixed rare crashes when server run out of memory. [#18976](https://github.com/ClickHouse/ClickHouse/pull/18976) ([tavplubix](https://github.com/tavplubix)).
+* Fix incorrect behavior when `ALTER TABLE ... DROP PART 'part_name'` query removes all deduplication blocks for the whole partition. Fixes [#18874](https://github.com/ClickHouse/ClickHouse/issues/18874). [#18969](https://github.com/ClickHouse/ClickHouse/pull/18969) ([alesapin](https://github.com/alesapin)).
+* Fixed issue [#18894](https://github.com/ClickHouse/ClickHouse/issues/18894) Add a check to avoid exception when long column alias('table.column' style, usually auto-generated by BI tools like Looker) equals to long table name. [#18968](https://github.com/ClickHouse/ClickHouse/pull/18968) ([Daniel Qin](https://github.com/mathfool)).
+* Fix error `Task was not found in task queue` (possible only for remote queries, with `async_socket_for_remote = 1`). [#18964](https://github.com/ClickHouse/ClickHouse/pull/18964) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)).
+* ATTACH PARTITION will reset mutations. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)).
+* Fix issue with `bitmapOrCardinality` that may lead to nullptr dereference. This closes [#18911](https://github.com/ClickHouse/ClickHouse/issues/18911). [#18912](https://github.com/ClickHouse/ClickHouse/pull/18912) ([sundyli](https://github.com/sundy-li)).
+* Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)).
+* Fix data type convert issue for MySQL engine. [#18124](https://github.com/ClickHouse/ClickHouse/pull/18124) ([bo zeng](https://github.com/mis98zb)).
+* Fix clickhouse-client abort exception while executing only `select`. [#19790](https://github.com/ClickHouse/ClickHouse/pull/19790) ([taiyang-li](https://github.com/taiyang-li)).
+
+
+#### Build/Testing/Packaging Improvement
+
+* Run [SQLancer](https://twitter.com/RiggerManuel/status/1352345625480884228) (logical SQL fuzzer) in CI. [#19006](https://github.com/ClickHouse/ClickHouse/pull/19006) ([Ilya Yatsishin](https://github.com/qoega)).
+* Query Fuzzer will fuzz newly added tests more extensively. This closes [#18916](https://github.com/ClickHouse/ClickHouse/issues/18916). [#19185](https://github.com/ClickHouse/ClickHouse/pull/19185) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Integrate with [Big List of Naughty Strings](https://github.com/minimaxir/big-list-of-naughty-strings/) for better fuzzing. [#19480](https://github.com/ClickHouse/ClickHouse/pull/19480) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Add integration tests run with MSan. [#18974](https://github.com/ClickHouse/ClickHouse/pull/18974) ([alesapin](https://github.com/alesapin)).
+* Fixed MemorySanitizer errors in cyrus-sasl and musl. [#19821](https://github.com/ClickHouse/ClickHouse/pull/19821) ([Ilya Yatsishin](https://github.com/qoega)).
+* Insuffiient arguments check in `positionCaseInsensitiveUTF8` function triggered address sanitizer. [#19720](https://github.com/ClickHouse/ClickHouse/pull/19720) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Remove --project-directory for docker-compose in integration test. Fix logs formatting from docker container. [#19706](https://github.com/ClickHouse/ClickHouse/pull/19706) ([Ilya Yatsishin](https://github.com/qoega)).
+* Made generation of macros.xml easier for integration tests. No more excessive logging from dicttoxml. dicttoxml project is not active for 5+ years. [#19697](https://github.com/ClickHouse/ClickHouse/pull/19697) ([Ilya Yatsishin](https://github.com/qoega)).
+* Allow to explicitly enable or disable watchdog via environment variable `CLICKHOUSE_WATCHDOG_ENABLE`. By default it is enabled if server is not attached to terminal. [#19522](https://github.com/ClickHouse/ClickHouse/pull/19522) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Allow building ClickHouse with Kafka support on arm64. [#19369](https://github.com/ClickHouse/ClickHouse/pull/19369) ([filimonov](https://github.com/filimonov)).
+* Allow building librdkafka without ssl. [#19337](https://github.com/ClickHouse/ClickHouse/pull/19337) ([filimonov](https://github.com/filimonov)).
+* Restore Kafka input in FreeBSD builds. [#18924](https://github.com/ClickHouse/ClickHouse/pull/18924) ([Alexandre Snarskii](https://github.com/snar)).
+* Fix potential nullptr dereference in table function `VALUES`. [#19357](https://github.com/ClickHouse/ClickHouse/pull/19357) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+* Avoid UBSan reports in `arrayElement` function, `substring` and `arraySum`. Fixes [#19305](https://github.com/ClickHouse/ClickHouse/issues/19305). Fixes [#19287](https://github.com/ClickHouse/ClickHouse/issues/19287). This closes [#19336](https://github.com/ClickHouse/ClickHouse/issues/19336). [#19347](https://github.com/ClickHouse/ClickHouse/pull/19347) ([alexey-milovidov](https://github.com/alexey-milovidov)).
+
+
 ## ClickHouse release 21.1
 
 ### ClickHouse release v21.1.3.32-stable, 2021-02-03

From 6325b15a63335e2efd7de1ae92d2907493a07a9c Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 18:19:10 +0300
Subject: [PATCH 0785/1238] Update CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fffd732f7d7..e2c777b3bcf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -72,7 +72,7 @@
 * Fixed `PeekableReadBuffer: Memory limit exceed` error when inserting data with huge strings. Fixes [#18690](https://github.com/ClickHouse/ClickHouse/issues/18690). [#18979](https://github.com/ClickHouse/ClickHouse/pull/18979) ([tavplubix](https://github.com/tavplubix)).
 * Docker image: several improvements for clickhouse-server entrypoint. [#18954](https://github.com/ClickHouse/ClickHouse/pull/18954) ([filimonov](https://github.com/filimonov)).
 * Add `normalizeQueryKeepNames` and `normalizedQueryHashKeepNames` to normalize queries without masking long names with `?`. This helps better analyze complex query logs. [#18910](https://github.com/ClickHouse/ClickHouse/pull/18910) ([Amos Bird](https://github.com/amosbird)).
-* - Check per-block checksum of the distributed batch on the sender before sending (without reading the file twice, the checksums will be verified while reading), this will avoid stuck of the INSERT on the receiver (on truncated .bin file on the sender) - Avoid reading .bin files twice for batched INSERT (it was required to calculate rows/bytes to take squashing into account, now this information included into the header, backward compatible is preserved). [#18853](https://github.com/ClickHouse/ClickHouse/pull/18853) ([Azat Khuzhin](https://github.com/azat)).
+* Check per-block checksum of the distributed batch on the sender before sending (without reading the file twice, the checksums will be verified while reading), this will avoid stuck of the INSERT on the receiver (on truncated .bin file on the sender). Avoid reading .bin files twice for batched INSERT (it was required to calculate rows/bytes to take squashing into account, now this information included into the header, backward compatible is preserved). [#18853](https://github.com/ClickHouse/ClickHouse/pull/18853) ([Azat Khuzhin](https://github.com/azat)).
 * Fix issues with RIGHT and FULL JOIN of tables with aggregate function states. In previous versions exception about `cloneResized` method was thrown. [#18818](https://github.com/ClickHouse/ClickHouse/pull/18818) ([templarzq](https://github.com/templarzq)).
 * Added prefix-based S3 endpoint settings. [#18812](https://github.com/ClickHouse/ClickHouse/pull/18812) ([Vladimir Chebotarev](https://github.com/excitoon)).
 * Add [UInt8, UInt16, UInt32, UInt64] arguments types support for bitmapTransform, bitmapSubsetInRange, bitmapSubsetLimit, bitmapContains functions. This closes [#18713](https://github.com/ClickHouse/ClickHouse/issues/18713). [#18791](https://github.com/ClickHouse/ClickHouse/pull/18791) ([sundyli](https://github.com/sundy-li)).

From 869bca74a7cc771312299ef35c83e0c775cbde93 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 23:37:55 +0300
Subject: [PATCH 0786/1238] Fix some tests

---
 .../CompressedReadBufferFromFile.cpp          |  1 +
 src/Core/ExternalTable.cpp                    | 33 ++++++++-----------
 src/Core/ExternalTable.h                      |  5 +--
 src/Interpreters/GlobalSubqueriesVisitor.h    |  1 -
 src/Server/TCPHandler.cpp                     |  4 ++-
 .../01455_optimize_trivial_insert_select.sql  |  4 ++-
 6 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp
index 54f360f417b..eba8ba68438 100644
--- a/src/Compression/CompressedReadBufferFromFile.cpp
+++ b/src/Compression/CompressedReadBufferFromFile.cpp
@@ -108,6 +108,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
         /// If the decompressed block fits entirely where it needs to be copied.
         if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
         {
+            //std::cerr << "readBig " << file_in.getFileName() << "\n";
             decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum);
             bytes_read += size_decompressed;
             bytes += size_decompressed;
diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp
index 722bc5705c3..529d01b79e4 100644
--- a/src/Core/ExternalTable.cpp
+++ b/src/Core/ExternalTable.cpp
@@ -1,18 +1,23 @@
 #include <boost/program_options.hpp>
-#include <DataStreams/AsynchronousBlockInputStream.h>
+#include <DataStreams/IBlockOutputStream.h>
 #include <DataTypes/DataTypeFactory.h>
+#include <Storages/IStorage.h>
+#include <Storages/ColumnsDescription.h>
+#include <Storages/ConstraintsDescription.h>
 #include <Interpreters/Context.h>
-#include <IO/copyData.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <IO/ReadBufferFromIStream.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/LimitReadBuffer.h>
-#include <Storages/StorageMemory.h>
-#include <Processors/Sources/SourceFromInputStream.h>
+
 #include <Processors/Pipe.h>
 #include <Processors/Sources/SinkToOutputStream.h>
 #include <Processors/Executors/PipelineExecutor.h>
+#include <Processors/Formats/IInputFormat.h>
+
 #include <Core/ExternalTable.h>
 #include <Poco/Net/MessageHeader.h>
+#include <Formats/FormatFactory.h>
 #include <common/find_symbols.h>
 
 
@@ -29,17 +34,16 @@ ExternalTableDataPtr BaseExternalTable::getData(const Context & context)
 {
     initReadBuffer();
     initSampleBlock();
-    auto input = context.getInputFormat(format, *read_buffer, sample_block, DEFAULT_BLOCK_SIZE);
-    auto stream = std::make_shared<AsynchronousBlockInputStream>(input);
+    auto input = FormatFactory::instance().getInputFormat(format, *read_buffer, sample_block, context, DEFAULT_BLOCK_SIZE);
 
     auto data = std::make_unique<ExternalTableData>();
     data->table_name = name;
-    data->pipe = std::make_unique<Pipe>(std::make_shared<SourceFromInputStream>(std::move(stream)));
+    data->pipe = std::make_unique<Pipe>(std::move(input));
 
     return data;
 }
 
-void BaseExternalTable::clean()
+void BaseExternalTable::clear()
 {
     name.clear();
     file.clear();
@@ -49,17 +53,6 @@ void BaseExternalTable::clean()
     read_buffer.reset();
 }
 
-/// Function for debugging information output
-void BaseExternalTable::write()
-{
-    std::cerr << "file " << file << std::endl;
-    std::cerr << "name " << name << std::endl;
-    std::cerr << "format " << format << std::endl;
-    std::cerr << "structure: \n";
-    for (const auto & elem : structure)
-        std::cerr << '\t' << elem.first << ' ' << elem.second << std::endl;
-}
-
 void BaseExternalTable::parseStructureFromStructureField(const std::string & argument)
 {
     std::vector<std::string> vals;
@@ -182,7 +175,7 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,
     executor->execute(/*num_threads = */ 1);
 
     /// We are ready to receive the next file, for this we clear all the information received
-    clean();
+    clear();
 }
 
 }
diff --git a/src/Core/ExternalTable.h b/src/Core/ExternalTable.h
index f26af1cc6ca..0d8e0aaf8ac 100644
--- a/src/Core/ExternalTable.h
+++ b/src/Core/ExternalTable.h
@@ -61,10 +61,7 @@ public:
 
 protected:
     /// Clear all accumulated information
-    void clean();
-
-    /// Function for debugging information output
-    void write();
+    void clear();
 
     /// Construct the `structure` vector from the text field `structure`
     virtual void parseStructureFromStructureField(const std::string & argument);
diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h
index cde59d1e6c9..80d133ebea6 100644
--- a/src/Interpreters/GlobalSubqueriesVisitor.h
+++ b/src/Interpreters/GlobalSubqueriesVisitor.h
@@ -12,7 +12,6 @@
 #include <Core/Block.h>
 #include <Core/NamesAndTypes.h>
 #include <Databases/IDatabase.h>
-#include <Storages/StorageMemory.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/InDepthNodeVisitor.h>
 #include <Interpreters/IdentifierSemantic.h>
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index fa213dcdc55..fb42df19746 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -22,7 +22,6 @@
 #include <Interpreters/TablesStatus.h>
 #include <Interpreters/InternalTextLogsQueue.h>
 #include <Interpreters/OpenTelemetrySpanLog.h>
-#include <Storages/StorageMemory.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/MergeTree/MergeTreeDataPartUUID.h>
 #include <Core/ExternalTable.h>
@@ -214,6 +213,9 @@ void TCPHandler::runImpl()
                 /// Get blocks of temporary tables
                 readData(connection_settings);
 
+                if (state.io.out)
+                    state.io.out->writeSuffix();
+
                 /// Reset the input stream, as we received an empty block while receiving external table data.
                 /// So, the stream has been marked as cancelled and we can't read from it anymore.
                 state.block_in.reset();
diff --git a/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql b/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql
index de470fe6a57..5b59bc065dd 100644
--- a/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql
+++ b/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql
@@ -1,7 +1,9 @@
 SET max_insert_threads = 1, max_threads = 100, min_insert_block_size_rows = 1048576, max_block_size = 65536;
-CREATE TEMPORARY TABLE t (x UInt64);
+DROP TABLE IF EXISTS t;
+CREATE TABLE t (x UInt64) ENGINE = StripeLog;
 -- For trivial INSERT SELECT, max_threads is lowered to max_insert_threads and max_block_size is changed to min_insert_block_size_rows.
 INSERT INTO t SELECT * FROM numbers_mt(1000000);
 SET max_threads = 1;
 -- If data was inserted by more threads, we will probably see data out of order.
 SELECT DISTINCT blockSize(), runningDifference(x) FROM t;
+DROP TABLE t;

From 8118e20eecd3381cdcb71dd0530795aa4fcbc27a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 23:40:30 +0300
Subject: [PATCH 0787/1238] Fix some tests

---
 src/Core/ExternalTable.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp
index 529d01b79e4..767ed959950 100644
--- a/src/Core/ExternalTable.cpp
+++ b/src/Core/ExternalTable.cpp
@@ -1,5 +1,6 @@
 #include <boost/program_options.hpp>
 #include <DataStreams/IBlockOutputStream.h>
+#include <DataStreams/AsynchronousBlockInputStream.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Storages/IStorage.h>
 #include <Storages/ColumnsDescription.h>
@@ -13,7 +14,7 @@
 #include <Processors/Pipe.h>
 #include <Processors/Sources/SinkToOutputStream.h>
 #include <Processors/Executors/PipelineExecutor.h>
-#include <Processors/Formats/IInputFormat.h>
+#include <Processors/Sources/SourceFromInputStream.h>
 
 #include <Core/ExternalTable.h>
 #include <Poco/Net/MessageHeader.h>
@@ -34,11 +35,12 @@ ExternalTableDataPtr BaseExternalTable::getData(const Context & context)
 {
     initReadBuffer();
     initSampleBlock();
-    auto input = FormatFactory::instance().getInputFormat(format, *read_buffer, sample_block, context, DEFAULT_BLOCK_SIZE);
+    auto input = context.getInputFormat(format, *read_buffer, sample_block, DEFAULT_BLOCK_SIZE);
+    auto stream = std::make_shared<AsynchronousBlockInputStream>(input);
 
     auto data = std::make_unique<ExternalTableData>();
     data->table_name = name;
-    data->pipe = std::make_unique<Pipe>(std::move(input));
+    data->pipe = std::make_unique<Pipe>(std::make_shared<SourceFromInputStream>(std::move(stream)));
 
     return data;
 }

From 285d46bc56c560de9218bc34a01bb97f11ef59b8 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Sun, 7 Feb 2021 23:54:14 +0300
Subject: [PATCH 0788/1238] Fix gcc build issues

---
 src/Common/HashTable/HashTable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h
index 06b4d74cfcb..d63adc936a4 100644
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@@ -505,7 +505,7 @@ protected:
         if constexpr (Cell::need_to_notify_cell_during_move)
         {
             buf = reinterpret_cast<Cell *>(Allocator::alloc(new_grower.bufSize() * sizeof(Cell)));
-            memcpy(buf, old_buffer.get(), old_buffer_size);
+            memcpy(reinterpret_cast<void *>(buf), reinterpret_cast<const void *>(old_buffer.get()), old_buffer_size);
         }
         else
             buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, old_buffer_size, new_grower.bufSize() * sizeof(Cell)));

From 198a8d3b27248034bdb2f15ac5f21dc60aef8ef7 Mon Sep 17 00:00:00 2001
From: madianjun <madianjun@qq.com>
Date: Thu, 28 Jan 2021 10:06:31 +0800
Subject: [PATCH 0789/1238] Drop helping tables when the whole table is done in
 clickhouse-copier

---
 programs/copier/ClusterCopier.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp
index d44e24dca49..7eea23160b2 100644
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@@ -316,9 +316,6 @@ void ClusterCopier::process(const ConnectionTimeouts & timeouts)
             }
         }
 
-        /// Delete helping tables in both cases (whole table is done or not)
-        dropHelpingTables(task_table);
-
         if (!table_is_done)
         {
             throw Exception("Too many tries to process table " + task_table.table_id + ". Abort remaining execution",
@@ -1044,6 +1041,11 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
     {
         LOG_INFO(log, "Table {} is not processed yet.Copied {} of {}, will retry", task_table.table_id, finished_partitions, required_partitions);
     }
+    else
+    {
+        /// Delete helping tables in case that whole table is done
+        dropHelpingTables(task_table);
+    }
 
     return table_is_done;
 }

From 2c278f1e0272ceec1372ae30800be27ce423d51a Mon Sep 17 00:00:00 2001
From: hexiaoting <hewenting_ict@163.com>
Date: Mon, 8 Feb 2021 13:44:50 +0800
Subject: [PATCH 0790/1238] Restrict move JOINON to WHERE optimizer only to
 inner join

---
 src/Interpreters/CollectJoinOnKeysVisitor.cpp | 51 ++++++++-------
 src/Interpreters/CollectJoinOnKeysVisitor.h   |  2 +
 src/Interpreters/TreeRewriter.cpp             |  2 +-
 ...conditions_from_join_on_to_where.reference | 62 +++++++++++++++++++
 ..._move_conditions_from_join_on_to_where.sql | 10 +++
 5 files changed, 105 insertions(+), 22 deletions(-)

diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
index ba151b7f903..8b5fbeef7eb 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
@@ -79,23 +79,26 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
         ASTPtr left = func.arguments->children.at(0);
         ASTPtr right = func.arguments->children.at(1);
         auto table_numbers = getTableNumbers(left, right, data);
-        if (table_numbers.first != table_numbers.second)
-        {
-            // related to two different tables
-            data.addJoinKeys(left, right, table_numbers);
-            if (!data.new_on_expression)
-                data.new_on_expression = ast->clone();
-            else
-                data.new_on_expression = makeASTFunction("and", data.new_on_expression, ast->clone());
-        }
-        else
+
+        /**
+          * if this is an inner join and the expression related to less than 2 tables, then move it to WHERE
+          */
+        if (data.kind == ASTTableJoin::Kind::Inner
+            && (table_numbers.first == table_numbers.second || table_numbers.first == 0 || table_numbers.second == 0))
         {
             if (!data.new_where_conditions)
                 data.new_where_conditions = ast->clone();
             else
                 data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone());
         }
-
+        else
+        {
+            data.addJoinKeys(left, right, table_numbers);
+            if (!data.new_on_expression)
+                data.new_on_expression = ast->clone();
+            else
+                data.new_on_expression = makeASTFunction("and", data.new_on_expression, ast->clone());
+        }
     }
     else if (inequality != ASOF::Inequality::None)
     {
@@ -104,17 +107,21 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
             ASTPtr left = func.arguments->children.at(0);
             ASTPtr right = func.arguments->children.at(1);
             auto table_numbers = getTableNumbers(left, right, data);
-            if (table_numbers.first != table_numbers.second)
-            {
-                throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'",
-                    ErrorCodes::NOT_IMPLEMENTED);
-            }
-            else
+
+            if (data.kind == ASTTableJoin::Kind::Inner
+                && (table_numbers.first == table_numbers.second || table_numbers.first == 0 || table_numbers.second == 0))
             {
                 if (!data.new_where_conditions)
                     data.new_where_conditions = ast->clone();
                 else
                     data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone());
+
+		return;
+            }
+            else
+            {
+                throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'",
+                    ErrorCodes::NOT_IMPLEMENTED);
             }
         }
 
@@ -159,11 +166,13 @@ std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr
     getIdentifiers(left_ast, left_identifiers);
     getIdentifiers(right_ast, right_identifiers);
 
-    if (left_identifiers.empty() || right_identifiers.empty())
-        return {0, 0};
+    size_t left_idents_table = 0;
+    size_t right_idents_table = 0;
 
-    size_t left_idents_table = getTableForIdentifiers(left_identifiers, data);
-    size_t right_idents_table = getTableForIdentifiers(right_identifiers, data);
+    if (!left_identifiers.empty())
+        left_idents_table = getTableForIdentifiers(left_identifiers, data);
+    if (!right_identifiers.empty())
+        right_idents_table = getTableForIdentifiers(right_identifiers, data);
 
     return std::make_pair(left_idents_table, right_idents_table);
 }
diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h
index 42133cf0b6e..aa2fd80d07c 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.h
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.h
@@ -5,6 +5,7 @@
 #include <Interpreters/InDepthNodeVisitor.h>
 #include <Interpreters/DatabaseAndTableWithAlias.h>
 #include <Interpreters/Aliases.h>
+#include <Parsers/ASTTablesInSelectQuery.h>
 
 
 namespace DB
@@ -30,6 +31,7 @@ public:
         const TableWithColumnNamesAndTypes & right_table;
         const Aliases & aliases;
         const bool is_asof{false};
+        ASTTableJoin::Kind kind;
         ASTPtr asof_left_key{};
         ASTPtr asof_right_key{};
         ASTPtr new_on_expression{};
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 7a194df8f30..332734e4ca6 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -418,7 +418,7 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele
     {
         bool is_asof = (table_join.strictness == ASTTableJoin::Strictness::Asof);
 
-        CollectJoinOnKeysVisitor::Data data{analyzed_join, tables[0], tables[1], aliases, is_asof};
+        CollectJoinOnKeysVisitor::Data data{analyzed_join, tables[0], tables[1], aliases, is_asof, table_join.kind};
         CollectJoinOnKeysVisitor(data).visit(table_join.on_expression);
         if (!data.has_some)
             throw Exception("Cannot get JOIN keys from JOIN ON section: " + queryToString(table_join.on_expression),
diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
index 4f4909a0cb5..19487c9f942 100644
--- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
+++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference
@@ -76,3 +76,65 @@ ALL INNER JOIN
     FROM table2
 ) AS table2 ON a = table2.a
 WHERE (table2.b < toUInt32(40)) AND (b < 1)
+---------Q8----------
+---------Q9---will not be optimized----------
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL LEFT JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON (a = table2.a) AND (b = toUInt32(10))
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL RIGHT JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON (a = table2.a) AND (b = toUInt32(10))
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL FULL OUTER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON (a = table2.a) AND (b = toUInt32(10))
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+ALL FULL OUTER JOIN 
+(
+    SELECT
+        a,
+        b
+    FROM table2
+) AS table2 ON (a = table2.a) AND (table2.b = toUInt32(10))
+WHERE a < toUInt32(20)
+SELECT
+    a,
+    b,
+    table2.a,
+    table2.b
+FROM table1
+CROSS JOIN table2
diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
index 259ff822f3f..23871a9c47c 100644
--- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
+++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql
@@ -34,5 +34,15 @@ SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt
 EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b < 1;
 SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b > 10;
 
+SELECT '---------Q8----------';
+SELECT * FROM table1 INNER JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(table1, 10)); -- { serverError 47 }
+
+SELECT '---------Q9---will not be optimized----------';
+EXPLAIN SYNTAX SELECT * FROM table1 LEFT JOIN table2 ON (table1.a = table2.a) AND (table1.b = toUInt32(10));
+EXPLAIN SYNTAX SELECT * FROM table1 RIGHT JOIN table2 ON (table1.a = table2.a) AND (table1.b = toUInt32(10));
+EXPLAIN SYNTAX SELECT * FROM table1 FULL JOIN table2 ON (table1.a = table2.a) AND (table1.b = toUInt32(10));
+EXPLAIN SYNTAX SELECT * FROM table1 FULL JOIN table2 ON (table1.a = table2.a) AND (table2.b = toUInt32(10)) WHERE table1.a < toUInt32(20);
+EXPLAIN SYNTAX SELECT * FROM table1 , table2;
+
 DROP TABLE table1;
 DROP TABLE table2;

From 8aba8e6b1a0e063278c31728d6219eea0d00c79b Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 8 Feb 2021 10:09:44 +0300
Subject: [PATCH 0791/1238] Update mongodb.md

---
 docs/ru/engines/table-engines/integrations/mongodb.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/engines/table-engines/integrations/mongodb.md b/docs/ru/engines/table-engines/integrations/mongodb.md
index d2858c01abc..dd5fa03390d 100644
--- a/docs/ru/engines/table-engines/integrations/mongodb.md
+++ b/docs/ru/engines/table-engines/integrations/mongodb.md
@@ -5,7 +5,7 @@ toc_title: MongoDB
 
 # MongoDB {#mongodb}
 
-Движок MongoDB работает только на чтение данных, он поддерживает запросы `SELECT` над данными, хранящимися на серверах MongoBD. Движок MongoDB поддерживает только плоские типы данных (простые, не вложенные).
+Движок MongoDB работает только на чтение данных, он поддерживает запросы `SELECT` над данными, хранящимися на серверах MongoDB. Движок MongoDB поддерживает только плоские типы данных (простые, не вложенные).
 
 ## Создание таблицы {#creating-a-table}
 

From 313d17bf9650136cfa15f767881742f02df85a3c Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 8 Feb 2021 10:10:07 +0300
Subject: [PATCH 0792/1238] Update mongodb.md

---
 docs/en/engines/table-engines/integrations/mongodb.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md
index 4c07961227a..2f1ae394521 100644
--- a/docs/en/engines/table-engines/integrations/mongodb.md
+++ b/docs/en/engines/table-engines/integrations/mongodb.md
@@ -5,7 +5,7 @@ toc_title: MongoDB
 
 # MongoDB {#mongodb}
 
-MongoDB engine is read-only, it allows to perform `SELECT` queries on data, stored on a remote MongoBD server. MongoDB engine supports onle flat fields (primitive, not nested types).
+MongoDB engine is read-only, it allows to perform `SELECT` queries on data, stored on a remote MongoDB server. MongoDB engine supports onle flat fields (primitive, not nested types).
 
 ## Creating a Table {#creating-a-table}
 

From 041c50c6eb30608357c900d6ad366ff2305c4c68 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 8 Feb 2021 11:36:08 +0300
Subject: [PATCH 0793/1238] Update arcadia_skip_list.txt

---
 tests/queries/0_stateless/arcadia_skip_list.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 7f34b5a9a84..7ae8c4ce89f 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -201,3 +201,4 @@
 01671_aggregate_function_group_bitmap_data
 01674_executable_dictionary_implicit_key
 01683_dist_INSERT_block_structure_mismatch
+01702_bitmap_native_integers

From 7ee1ad683334b042f2d093df84f984c700ce9dc5 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Mon, 8 Feb 2021 11:44:32 +0300
Subject: [PATCH 0794/1238] Fixed unit tests

---
 src/Common/tests/gtest_lru_hash_map.cpp | 62 ++++++++++++-------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/Common/tests/gtest_lru_hash_map.cpp b/src/Common/tests/gtest_lru_hash_map.cpp
index 237c751303e..562ee667b7b 100644
--- a/src/Common/tests/gtest_lru_hash_map.cpp
+++ b/src/Common/tests/gtest_lru_hash_map.cpp
@@ -19,14 +19,14 @@ std::vector<typename LRUHashMap::Key> convertToVector(const LRUHashMap & map)
 
 void testInsert(size_t elements_to_insert_size, size_t map_size)
 {
-    using LRUHashMap = LRUHashMap<int, std::string>;
+    using LRUHashMap = LRUHashMap<int, int>;
 
     LRUHashMap map(map_size);
 
     std::vector<int> expected;
 
     for (size_t i = 0; i < elements_to_insert_size; ++i)
-        map.insert(i, std::to_string(i));
+        map.insert(i, i);
 
     for (size_t i = elements_to_insert_size - map_size; i < elements_to_insert_size; ++i)
         expected.emplace_back(i);
@@ -39,15 +39,15 @@ void testInsert(size_t elements_to_insert_size, size_t map_size)
 TEST(LRUHashMap, Insert)
 {
     {
-        using LRUHashMap = LRUHashMap<int, std::string>;
+        using LRUHashMap = LRUHashMap<int, int>;
 
         LRUHashMap map(3);
 
-        map.emplace(1, "1");
-        map.insert(2, "2");
-        std::string v = "3";
+        map.emplace(1, 1);
+        map.insert(2, 2);
+        int v = 3;
         map.insert(3, v);
-        map.emplace(4, "4");
+        map.emplace(4, 4);
 
         std::vector<int> expected = { 2, 3, 4 };
         std::vector<int> actual = convertToVector(map);
@@ -63,18 +63,18 @@ TEST(LRUHashMap, Insert)
 
 TEST(LRUHashMap, GetModify)
 {
-    using LRUHashMap = LRUHashMap<int, std::string>;
+    using LRUHashMap = LRUHashMap<int, int>;
 
     LRUHashMap map(3);
 
-    map.emplace(1, "1");
-    map.emplace(2, "2");
-    map.emplace(3, "3");
+    map.emplace(1, 1);
+    map.emplace(2, 2);
+    map.emplace(3, 3);
 
-    map.get(3) = "4";
+    map.get(3) = 4;
 
-    std::vector<std::string> expected = { "1", "2", "4" };
-    std::vector<std::string> actual;
+    std::vector<int> expected = { 1, 2, 4 };
+    std::vector<int> actual;
     actual.reserve(map.size());
 
     for (auto & node : map)
@@ -85,14 +85,14 @@ TEST(LRUHashMap, GetModify)
 
 TEST(LRUHashMap, SetRecentKeyToTop)
 {
-    using LRUHashMap = LRUHashMap<int, std::string>;
+    using LRUHashMap = LRUHashMap<int, int>;
 
     LRUHashMap map(3);
 
-    map.emplace(1, "1");
-    map.emplace(2, "2");
-    map.emplace(3, "3");
-    map.emplace(1, "4");
+    map.emplace(1, 1);
+    map.emplace(2, 2);
+    map.emplace(3, 3);
+    map.emplace(1, 4);
 
     std::vector<int> expected = { 2, 3, 1 };
     std::vector<int> actual = convertToVector(map);
@@ -102,13 +102,13 @@ TEST(LRUHashMap, SetRecentKeyToTop)
 
 TEST(LRUHashMap, GetRecentKeyToTop)
 {
-    using LRUHashMap = LRUHashMap<int, std::string>;
+    using LRUHashMap = LRUHashMap<int, int>;
 
     LRUHashMap map(3);
 
-    map.emplace(1, "1");
-    map.emplace(2, "2");
-    map.emplace(3, "3");
+    map.emplace(1, 1);
+    map.emplace(2, 2);
+    map.emplace(3, 3);
     map.get(1);
 
     std::vector<int> expected = { 2, 3, 1 };
@@ -119,13 +119,13 @@ TEST(LRUHashMap, GetRecentKeyToTop)
 
 TEST(LRUHashMap, Contains)
 {
-    using LRUHashMap = LRUHashMap<int, std::string>;
+    using LRUHashMap = LRUHashMap<int, int>;
 
     LRUHashMap map(3);
 
-    map.emplace(1, "1");
-    map.emplace(2, "2");
-    map.emplace(3, "3");
+    map.emplace(1, 1);
+    map.emplace(2, 2);
+    map.emplace(3, 3);
 
     ASSERT_TRUE(map.contains(1));
     ASSERT_TRUE(map.contains(2));
@@ -144,13 +144,13 @@ TEST(LRUHashMap, Contains)
 
 TEST(LRUHashMap, Clear)
 {
-    using LRUHashMap = LRUHashMap<int, std::string>;
+    using LRUHashMap = LRUHashMap<int, int>;
 
     LRUHashMap map(3);
 
-    map.emplace(1, "1");
-    map.emplace(2, "2");
-    map.emplace(3, "3");
+    map.emplace(1, 1);
+    map.emplace(2, 2);
+    map.emplace(3, 3);
     map.clear();
 
     std::vector<int> expected = {};

From 77cb68c3e76515b64ae3f3009ee44f9c6376224e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Mon, 8 Feb 2021 12:03:04 +0300
Subject: [PATCH 0795/1238] Fixing build.

---
 src/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index dba9385fe27..86db7742c97 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -100,8 +100,8 @@ endif()
 list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD})
 list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON})
 
-list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp)
-list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h)
+list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/replicate.cpp)
+list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/replicate.h)
 
 list (APPEND dbms_sources
     AggregateFunctions/AggregateFunctionFactory.cpp

From bd8f88f464ac7e87ea77942a95f4a282bc5cd53e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Mon, 8 Feb 2021 12:04:19 +0300
Subject: [PATCH 0796/1238] Fix style

---
 src/Functions/replicate.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/replicate.cpp b/src/Functions/replicate.cpp
index bc41d817326..ca391bec6ce 100644
--- a/src/Functions/replicate.cpp
+++ b/src/Functions/replicate.cpp
@@ -19,7 +19,7 @@ DataTypePtr FunctionReplicate::getReturnTypeImpl(const DataTypes & arguments) co
 {
     if (arguments.size() < 2)
         throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
-                        "Function {} expect at leas two arguments, got {}", getName(), arguments.size());
+                        "Function {} expect at least two arguments, got {}", getName(), arguments.size());
 
     for (size_t i = 1; i < arguments.size(); ++i)
     {

From 13b2c2de20b5f5d112e90586f8fbcbda291182b7 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Mon, 8 Feb 2021 12:06:14 +0300
Subject: [PATCH 0797/1238] Fixing ya.make

---
 src/Processors/ya.make | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index d42746791fb..d6574f0180e 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -94,7 +94,6 @@ SRCS(
     QueryPipeline.cpp
     QueryPlan/AddingConstColumnStep.cpp
     QueryPlan/AddingDelayedSourceStep.cpp
-    QueryPlan/AddingMissedStep.cpp
     QueryPlan/AggregatingStep.cpp
     QueryPlan/ArrayJoinStep.cpp
     QueryPlan/CreatingSetsStep.cpp
@@ -138,7 +137,6 @@ SRCS(
     Sources/SinkToOutputStream.cpp
     Sources/SourceFromInputStream.cpp
     Sources/SourceWithProgress.cpp
-    Transforms/AddingMissedTransform.cpp
     Transforms/AddingSelectorTransform.cpp
     Transforms/AggregatingInOrderTransform.cpp
     Transforms/AggregatingTransform.cpp

From 7ce0ef2561deda64192a2a0531dcc054b6ea1c60 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Mon, 8 Feb 2021 12:14:17 +0300
Subject: [PATCH 0798/1238] show clusters for replicated db

---
 src/Databases/DatabaseReplicated.cpp          | 108 +++++++++++++++++-
 src/Databases/DatabaseReplicated.h            |   8 +-
 src/Databases/DatabaseReplicatedWorker.cpp    |   2 +-
 src/Interpreters/DDLWorker.cpp                |   2 +-
 src/Interpreters/InterpreterCreateQuery.cpp   |  29 +++--
 src/Storages/System/StorageSystemClusters.cpp |  66 ++++++-----
 src/Storages/System/StorageSystemClusters.h   |   3 +
 tests/queries/skip_list.json                  |  12 ++
 8 files changed, 186 insertions(+), 44 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 5a11787331c..43568379632 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -36,8 +36,11 @@ namespace ErrorCodes
     extern const int UNKNOWN_DATABASE;
     extern const int NOT_IMPLEMENTED;
     extern const int INCORRECT_QUERY;
+    extern const int ALL_CONNECTION_TRIES_FAILED;
 }
 
+static constexpr const char * DROPPED_MARK = "DROPPED";
+
 zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
 {
     return global_context.getZooKeeper();
@@ -68,6 +71,8 @@ DatabaseReplicated::DatabaseReplicated(
         throw Exception("ZooKeeper path, shard and replica names must be non-empty", ErrorCodes::BAD_ARGUMENTS);
     if (shard_name.find('/') != std::string::npos || replica_name.find('/') != std::string::npos)
         throw Exception("Shard and replica names should not contain '/'", ErrorCodes::BAD_ARGUMENTS);
+    if (shard_name.find('|') != std::string::npos || replica_name.find('|') != std::string::npos)
+        throw Exception("Shard and replica names should not contain '|'", ErrorCodes::BAD_ARGUMENTS);
 
     if (zookeeper_path.back() == '/')
         zookeeper_path.resize(zookeeper_path.size() - 1);
@@ -90,7 +95,7 @@ DatabaseReplicated::DatabaseReplicated(
         createDatabaseNodesInZooKeeper(current_zookeeper);
     }
 
-    replica_path = zookeeper_path + "/replicas/" + shard_name + "/" + replica_name;
+    replica_path = zookeeper_path + "/replicas/" + getFullReplicaName();
 
     String replica_host_id;
     if (current_zookeeper->tryGet(replica_path, replica_host_id))
@@ -110,6 +115,93 @@ DatabaseReplicated::DatabaseReplicated(
     }
 }
 
+String DatabaseReplicated::getFullReplicaName() const
+{
+    return shard_name + '|' + replica_name;
+}
+
+std::pair<String, String> DatabaseReplicated::parseFullReplicaName(const String & name)
+{
+    String shard;
+    String replica;
+    auto pos = name.find('|');
+    if (pos == std::string::npos || name.find('|', pos + 1) != std::string::npos)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect replica identifier: {}", name);
+    shard = name.substr(0, pos);
+    replica = name.substr(pos + 1);
+    return {shard, replica};
+}
+
+ClusterPtr DatabaseReplicated::getCluster() const
+{
+    Strings hosts;
+    Strings host_ids;
+
+    auto zookeeper = global_context.getZooKeeper();
+    constexpr int max_retries = 10;
+    int iteration = 0;
+    bool success = false;
+    while (++iteration <= max_retries)
+    {
+        host_ids.resize(0);
+        Coordination::Stat stat;
+        hosts = zookeeper->getChildren(zookeeper_path + "/replicas", &stat);
+        if (hosts.empty())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "No hosts found");
+        Int32 cver = stat.cversion;
+
+        std::vector<zkutil::ZooKeeper::FutureGet> futures;
+        futures.reserve(hosts.size());
+        host_ids.reserve(hosts.size());
+        for (const auto & host : hosts)
+            futures.emplace_back(zookeeper->asyncTryGet(zookeeper_path + "/replicas/" + host));
+
+        success = true;
+        for (auto & future : futures)
+        {
+            auto res = future.get();
+            if (res.error != Coordination::Error::ZOK)
+                success = false;
+            host_ids.emplace_back(res.data);
+        }
+
+        zookeeper->get(zookeeper_path + "/replicas", &stat);
+        if (success && cver == stat.version)
+            break;
+    }
+    if (!success)
+        throw Exception(ErrorCodes::ALL_CONNECTION_TRIES_FAILED, "Cannot get consistent cluster snapshot");
+
+    assert(!hosts.empty());
+    assert(hosts.size() == host_ids.size());
+    std::sort(hosts.begin(), hosts.end());
+    String current_shard = parseFullReplicaName(hosts.front()).first;
+    std::vector<Strings> shards;
+    shards.emplace_back();
+    for (size_t i = 0; i < hosts.size(); ++i)
+    {
+        const auto & id = host_ids[i];
+        if (id == DROPPED_MARK)
+            continue;
+        auto [shard, replica] = parseFullReplicaName(hosts[i]);
+        auto pos = id.find(':');
+        String host = id.substr(0, pos);
+        if (shard != current_shard)
+        {
+            current_shard = shard;
+            if (!shards.back().empty())
+                shards.emplace_back();
+        }
+        shards.back().emplace_back(unescapeForFileName(host));
+    }
+
+    /// TODO make it configurable
+    String username = "default";
+    String password;
+
+    return std::make_shared<Cluster>(global_context.getSettingsRef(), shards, username, password, global_context.getTCPPort(), false);
+}
+
 bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper)
 {
     current_zookeeper->createAncestors(zookeeper_path);
@@ -139,8 +231,6 @@ bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperP
 
 void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper)
 {
-    current_zookeeper->createAncestors(replica_path);
-
     /// When creating new replica, use latest snapshot version as initial value of log_pointer
     //log_entry_to_execute = 0;   //FIXME
 
@@ -296,9 +386,15 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node
 void DatabaseReplicated::drop(const Context & context_)
 {
     auto current_zookeeper = getZooKeeper();
-    current_zookeeper->set(replica_path, "DROPPED");
+    current_zookeeper->set(replica_path, DROPPED_MARK);
     DatabaseAtomic::drop(context_);
     current_zookeeper->tryRemoveRecursive(replica_path);
+    /// TODO it may leave garbage in ZooKeeper if the last node lost connection here
+    if (current_zookeeper->tryRemove(zookeeper_path + "/replicas") == Coordination::Error::ZOK)
+    {
+        /// It was the last replica, remove all metadata
+        current_zookeeper->tryRemoveRecursive(zookeeper_path);
+    }
 }
 
 void DatabaseReplicated::stopReplication()
@@ -318,7 +414,7 @@ void DatabaseReplicated::shutdown()
 void DatabaseReplicated::dropTable(const Context & context, const String & table_name, bool no_delay)
 {
     auto txn = context.getMetadataTransaction();
-    //assert(!ddl_worker->isCurrentlyActive() || txn /*|| called from DROP DATABASE */);
+    assert(!ddl_worker->isCurrentlyActive() || txn);
     if (txn && txn->is_initial_query)
     {
         String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_name);
@@ -335,6 +431,8 @@ void DatabaseReplicated::renameTable(const Context & context, const String & tab
 
     if (txn->is_initial_query)
     {
+        if (this != &to_database)
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Moving tables between databases is not supported for Replicated engine");
         if (!isTableExist(table_name, context))
             throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", table_name);
         if (exchange && !to_database.isTableExist(to_table_name, context))
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index a866a61558c..0f500b16470 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -15,6 +15,9 @@ namespace DB
 class DatabaseReplicatedDDLWorker;
 using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
 
+class Cluster;
+using ClusterPtr = std::shared_ptr<Cluster>;
+
 /** DatabaseReplicated engine
   * supports replication of metadata
   * via DDL log being written to ZooKeeper
@@ -67,7 +70,10 @@ public:
 
     void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach) override;
 
-    String getFullReplicaName() const { return shard_name + '|' + replica_name; }
+    String getFullReplicaName() const;
+    static std::pair<String, String> parseFullReplicaName(const String & name);
+
+    ClusterPtr getCluster() const;
 
     //FIXME
     friend struct DatabaseReplicatedTask;
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index 1c000a8f0a7..748305922b7 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -208,7 +208,7 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
     if (task->is_initial_query)
     {
         assert(!zookeeper->exists(entry_path + "/try"));
-        assert(zookeeper->exists(entry_path + "/committed") == (zookeeper->get(task->getFinishedNodePath()) == "0"));
+        assert(zookeeper->exists(entry_path + "/committed") == (zookeeper->get(task->getFinishedNodePath()) == ExecutionStatus(0).serializeText()));
         out_reason = fmt::format("Entry {} has been executed as initial query", entry_name);
         return {};
     }
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index da2e878541d..f0cc3370211 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -488,7 +488,7 @@ void DDLWorker::processTask(DDLTaskBase & task)
         /// updating metadata in Replicated database), so we make create request for finished_node_path with status "0",
         /// which means that query executed successfully.
         task.ops.emplace_back(zkutil::makeRemoveRequest(active_node_path, -1));
-        task.ops.emplace_back(zkutil::makeCreateRequest(finished_node_path, "0", zkutil::CreateMode::Persistent));
+        task.ops.emplace_back(zkutil::makeCreateRequest(finished_node_path, ExecutionStatus(0).serializeText(), zkutil::CreateMode::Persistent));
 
         try
         {
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 6af212172b2..be241339ef7 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -827,17 +827,28 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
 
     if (create.attach_from_path)
     {
-        fs::path data_path = fs::path(*create.attach_from_path).lexically_normal();
         fs::path user_files = fs::path(context.getUserFilesPath()).lexically_normal();
-        if (data_path.is_relative())
-            data_path = (user_files / data_path).lexically_normal();
-        if (!startsWith(data_path, user_files))
-            throw Exception(ErrorCodes::PATH_ACCESS_DENIED,
-                            "Data directory {} must be inside {} to attach it", String(data_path), String(user_files));
-
         fs::path root_path = fs::path(context.getPath()).lexically_normal();
-        /// Data path must be relative to root_path
-        create.attach_from_path = fs::relative(data_path, root_path) / "";
+
+        if (context.getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY)
+        {
+            fs::path data_path = fs::path(*create.attach_from_path).lexically_normal();
+            if (data_path.is_relative())
+                data_path = (user_files / data_path).lexically_normal();
+            if (!startsWith(data_path, user_files))
+                throw Exception(ErrorCodes::PATH_ACCESS_DENIED,
+                                "Data directory {} must be inside {} to attach it", String(data_path), String(user_files));
+
+            /// Data path must be relative to root_path
+            create.attach_from_path = fs::relative(data_path, root_path) / "";
+        }
+        else
+        {
+            fs::path data_path = (root_path / *create.attach_from_path).lexically_normal();
+            if (!startsWith(data_path, user_files))
+                throw Exception(ErrorCodes::PATH_ACCESS_DENIED,
+                                "Data directory {} must be inside {} to attach it", String(data_path), String(user_files));
+        }
     }
     else if (create.attach && !create.attach_short_syntax && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
     {
diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp
index ae8bcca2804..62ad1c5150f 100644
--- a/src/Storages/System/StorageSystemClusters.cpp
+++ b/src/Storages/System/StorageSystemClusters.cpp
@@ -3,6 +3,7 @@
 #include <Interpreters/Cluster.h>
 #include <Interpreters/Context.h>
 #include <Storages/System/StorageSystemClusters.h>
+#include <Databases/DatabaseReplicated.h>
 
 namespace DB
 {
@@ -26,40 +27,51 @@ NamesAndTypesList StorageSystemClusters::getNamesAndTypes()
     };
 }
 
+
 void StorageSystemClusters::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const
 {
     for (const auto & name_and_cluster : context.getClusters().getContainer())
+        writeCluster(res_columns, name_and_cluster);
+
+    const auto databases = DatabaseCatalog::instance().getDatabases();
+    for (const auto & name_and_database : databases)
     {
-        const String & cluster_name = name_and_cluster.first;
-        const ClusterPtr & cluster = name_and_cluster.second;
-        const auto & shards_info = cluster->getShardsInfo();
-        const auto & addresses_with_failover = cluster->getShardsAddresses();
+        if (const auto * replicated = typeid_cast<const DatabaseReplicated *>(name_and_database.second.get()))
+            writeCluster(res_columns, {name_and_database.first, replicated->getCluster()});
+    }
+}
 
-        for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
+void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster) const
+{
+    const String & cluster_name = name_and_cluster.first;
+    const ClusterPtr & cluster = name_and_cluster.second;
+    const auto & shards_info = cluster->getShardsInfo();
+    const auto & addresses_with_failover = cluster->getShardsAddresses();
+
+    for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
+    {
+        const auto & shard_info = shards_info[shard_index];
+        const auto & shard_addresses = addresses_with_failover[shard_index];
+        const auto pool_status = shard_info.pool->getStatus();
+
+        for (size_t replica_index = 0; replica_index < shard_addresses.size(); ++replica_index)
         {
-            const auto & shard_info = shards_info[shard_index];
-            const auto & shard_addresses = addresses_with_failover[shard_index];
-            const auto pool_status = shard_info.pool->getStatus();
+            size_t i = 0;
+            const auto & address = shard_addresses[replica_index];
 
-            for (size_t replica_index = 0; replica_index < shard_addresses.size(); ++replica_index)
-            {
-                size_t i = 0;
-                const auto & address = shard_addresses[replica_index];
-
-                res_columns[i++]->insert(cluster_name);
-                res_columns[i++]->insert(shard_info.shard_num);
-                res_columns[i++]->insert(shard_info.weight);
-                res_columns[i++]->insert(replica_index + 1);
-                res_columns[i++]->insert(address.host_name);
-                auto resolved = address.getResolvedAddress();
-                res_columns[i++]->insert(resolved ? resolved->host().toString() : String());
-                res_columns[i++]->insert(address.port);
-                res_columns[i++]->insert(address.is_local);
-                res_columns[i++]->insert(address.user);
-                res_columns[i++]->insert(address.default_database);
-                res_columns[i++]->insert(pool_status[replica_index].error_count);
-                res_columns[i++]->insert(pool_status[replica_index].estimated_recovery_time.count());
-            }
+            res_columns[i++]->insert(cluster_name);
+            res_columns[i++]->insert(shard_info.shard_num);
+            res_columns[i++]->insert(shard_info.weight);
+            res_columns[i++]->insert(replica_index + 1);
+            res_columns[i++]->insert(address.host_name);
+            auto resolved = address.getResolvedAddress();
+            res_columns[i++]->insert(resolved ? resolved->host().toString() : String());
+            res_columns[i++]->insert(address.port);
+            res_columns[i++]->insert(address.is_local);
+            res_columns[i++]->insert(address.user);
+            res_columns[i++]->insert(address.default_database);
+            res_columns[i++]->insert(pool_status[replica_index].error_count);
+            res_columns[i++]->insert(pool_status[replica_index].estimated_recovery_time.count());
         }
     }
 }
diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h
index 4cda7c372b2..68282f1b1fe 100644
--- a/src/Storages/System/StorageSystemClusters.h
+++ b/src/Storages/System/StorageSystemClusters.h
@@ -10,6 +10,7 @@ namespace DB
 {
 
 class Context;
+class Cluster;
 
 /** Implements system table 'clusters'
   *  that allows to obtain information about available clusters
@@ -25,8 +26,10 @@ public:
 
 protected:
     using IStorageSystemOneBlock::IStorageSystemOneBlock;
+    using NameAndCluster = std::pair<String, std::shared_ptr<Cluster>>;
 
     void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const override;
+    void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster) const;
 };
 
 }
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index adee777f900..4c6927f575a 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -103,7 +103,19 @@
         "memory_tracking",     /// FIXME remove it before merge
         "memory_tracking",
         "memory_usage",
+        "01686_rocksdb",
+        "01550_mutation_subquery",
+        "01070_mutations_with_dependencies",
+        "01070_materialize_ttl",
+        "01055_compact_parts",
+        "01017_mutations_with_nondeterministic_functions_zookeeper",
+        "00926_adaptive_index_granularity_pk",
+        "00910_zookeeper_test_alter_compression_codecs",
+        "00908_bloom_filter_index",
+        "00616_final_single_part",
+        "00446_clear_column_in_partition_zookeeper",
         "01533_multiple_nested",
+        "01213_alter_rename_column_zookeeper",
         "01575_disable_detach_table_of_dictionary",
         "01457_create_as_table_function_structure",
         "01415_inconsistent_merge_tree_settings",

From 91d0924665401514396ed30ef6c01c8212b0b4bb Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Mon, 8 Feb 2021 12:46:30 +0300
Subject: [PATCH 0799/1238] write dictionaries metadata to zk

---
 src/Databases/DatabaseReplicated.cpp        | 30 +++++++++++++++++++++
 src/Databases/DatabaseReplicated.h          |  4 +++
 src/Databases/DatabaseWithDictionaries.cpp  | 12 ++++++++-
 src/Interpreters/InterpreterCreateQuery.cpp |  7 +++++
 src/Interpreters/InterpreterDropQuery.cpp   | 13 +++++++++
 5 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 43568379632..a134ba5dec7 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -303,6 +303,9 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query, const Context & query_
     if (query_context.getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY)
         throw Exception(ErrorCodes::INCORRECT_QUERY, "It's not initial query. ON CLUSTER is not allowed for Replicated database.");
 
+    if (auto * ddl_query = query->as<ASTQueryWithTableAndOutput>())
+        ddl_query->database.clear();
+
     if (const auto * query_alter = query->as<ASTAlterQuery>())
     {
         for (const auto & command : query_alter->command_list->children)
@@ -493,4 +496,31 @@ void DatabaseReplicated::commitAlterTable(const StorageID & table_id,
     DatabaseAtomic::commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path, statement, query_context);
 }
 
+void DatabaseReplicated::createDictionary(const Context & context,
+                                          const String & dictionary_name,
+                                          const ASTPtr & query)
+{
+    auto txn = context.getMetadataTransaction();
+    assert(!ddl_worker->isCurrentlyActive() || txn);
+    if (txn && txn->is_initial_query)
+    {
+        String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(dictionary_name);
+        String statement = getObjectDefinitionFromCreateQuery(query->clone());
+        txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path, statement, zkutil::CreateMode::Persistent));
+    }
+    DatabaseAtomic::createDictionary(context, dictionary_name, query);
+}
+
+void DatabaseReplicated::removeDictionary(const Context & context, const String & dictionary_name)
+{
+    auto txn = context.getMetadataTransaction();
+    assert(!ddl_worker->isCurrentlyActive() || txn);
+    if (txn && txn->is_initial_query)
+    {
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(dictionary_name);
+        txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+    }
+    DatabaseAtomic::removeDictionary(context, dictionary_name);
+}
+
 }
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 0f500b16470..c39321f0caa 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -58,6 +58,10 @@ public:
     void commitAlterTable(const StorageID & table_id,
                           const String & table_metadata_tmp_path, const String & table_metadata_path,
                           const String & statement, const Context & query_context) override;
+    void createDictionary(const Context & context,
+                          const String & dictionary_name,
+                          const ASTPtr & query) override;
+    void removeDictionary(const Context & context, const String & dictionary_name) override;
 
     void drop(const Context & /*context*/) override;
 
diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp
index ee16f4ae15e..7ce5de56b64 100644
--- a/src/Databases/DatabaseWithDictionaries.cpp
+++ b/src/Databases/DatabaseWithDictionaries.cpp
@@ -4,6 +4,7 @@
 #include <Interpreters/ExternalDictionariesLoader.h>
 #include <Interpreters/ExternalLoaderTempConfigRepository.h>
 #include <Interpreters/ExternalLoaderDatabaseConfigRepository.h>
+#include <Interpreters/DDLTask.h>
 #include <Dictionaries/getDictionaryConfigurationFromAST.h>
 #include <Dictionaries/DictionaryStructure.h>
 #include <Parsers/ASTCreateQuery.h>
@@ -193,6 +194,10 @@ void DatabaseWithDictionaries::createDictionary(const Context & context, const S
             detachDictionary(dictionary_name);
     });
 
+    auto txn = context.getMetadataTransaction();
+    if (txn && !context.isInternalSubquery())
+        txn->commit();      /// Commit point (a sort of) for Replicated database
+
     /// If it was ATTACH query and file with dictionary metadata already exist
     /// (so, ATTACH is done after DETACH), then rename atomically replaces old file with new one.
     Poco::File(dictionary_metadata_tmp_path).renameTo(dictionary_metadata_path);
@@ -205,7 +210,7 @@ void DatabaseWithDictionaries::createDictionary(const Context & context, const S
     succeeded = true;
 }
 
-void DatabaseWithDictionaries::removeDictionary(const Context &, const String & dictionary_name)
+void DatabaseWithDictionaries::removeDictionary(const Context & context, const String & dictionary_name)
 {
     DictionaryAttachInfo attach_info;
     detachDictionaryImpl(dictionary_name, attach_info);
@@ -213,6 +218,11 @@ void DatabaseWithDictionaries::removeDictionary(const Context &, const String &
     try
     {
         String dictionary_metadata_path = getObjectMetadataPath(dictionary_name);
+
+        auto txn = context.getMetadataTransaction();
+        if (txn && !context.isInternalSubquery())
+            txn->commit();      /// Commit point (a sort of) for Replicated database
+
         Poco::File(dictionary_metadata_path).remove();
         CurrentStatusInfo::unset(CurrentStatusInfo::DictionaryStatus,
                                  StorageID(attach_info.create_query).getInternalDictionaryName());
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index be241339ef7..376bf8417ff 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -1107,6 +1107,13 @@ BlockIO InterpreterCreateQuery::createDictionary(ASTCreateQuery & create)
     auto guard = DatabaseCatalog::instance().getDDLGuard(database_name, dictionary_name);
     DatabasePtr database = DatabaseCatalog::instance().getDatabase(database_name);
 
+    if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+    {
+        assertOrSetUUID(create, database);
+        guard->releaseTableLock();
+        return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr, context);
+    }
+
     if (database->isDictionaryExist(dictionary_name))
     {
         /// TODO Check structure of dictionary
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index b22d46358f9..e6943f06e06 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -212,6 +212,19 @@ BlockIO InterpreterDropQuery::executeToDictionary(
 
     DatabasePtr database = tryGetDatabase(database_name, if_exists);
 
+    bool is_drop_or_detach_database = query_ptr->as<ASTDropQuery>()->table.empty();
+    bool is_replicated_ddl_query = typeid_cast<DatabaseReplicated *>(database.get()) &&
+                                   context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY &&
+                                   !is_drop_or_detach_database;
+    if (is_replicated_ddl_query)
+    {
+        if (kind == ASTDropQuery::Kind::Detach)
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "DETACH DICTIONARY is not allowed for Replicated databases.");
+
+        ddl_guard->releaseTableLock();
+        return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr, context);
+    }
+
     if (!database || !database->isDictionaryExist(dictionary_name))
     {
         if (!if_exists)

From 786e687b2fa2d77784b4569ecd95e8170c743e58 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 8 Feb 2021 14:01:50 +0300
Subject: [PATCH 0800/1238] Trying to avoid unlimited wait

---
 contrib/NuRaft                      | 2 +-
 src/Coordination/NuKeeperServer.cpp | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/contrib/NuRaft b/contrib/NuRaft
index c6f8528ead6..7adf7ae33e7 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit c6f8528ead61f7e4565164c6f15afef221235aa8
+Subproject commit 7adf7ae33e7d5c307342431b577c8ab1025ee793
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index aa1747ca3e6..6111bdb2dd9 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -53,8 +53,6 @@ void NuKeeperServer::startup(int64_t operation_timeout_ms)
     params.snapshot_distance_ = 5000;
     params.client_req_timeout_ = operation_timeout_ms;
     params.auto_forwarding_ = true;
-    /// For some reason may lead to a very long timeouts
-    params.use_bg_thread_for_urgent_commit_ = false;
     params.return_method_ = nuraft::raft_params::blocking;
 
     nuraft::asio_service::options asio_opts{};

From 776b682f28cfd2074bcaef8e1f07416f8aa3b325 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Mon, 8 Feb 2021 14:38:31 +0300
Subject: [PATCH 0801/1238] Add comment for used_flags in hash join, fix build

---
 src/Common/ColumnsHashingImpl.h       | 5 +++++
 src/Common/HashTable/FixedHashTable.h | 4 ++++
 src/Common/HashTable/HashTable.h      | 4 ++++
 src/Interpreters/HashJoin.cpp         | 2 ++
 src/Interpreters/HashJoin.h           | 3 +++
 5 files changed, 18 insertions(+)

diff --git a/src/Common/ColumnsHashingImpl.h b/src/Common/ColumnsHashingImpl.h
index c5068175ffa..9af746a69ad 100644
--- a/src/Common/ColumnsHashingImpl.h
+++ b/src/Common/ColumnsHashingImpl.h
@@ -87,6 +87,9 @@ public:
     bool isInserted() const { return inserted; }
 };
 
+/// FindResult optionally may contain pointer to value and offset in hashtable buffer.
+/// Only bool found is required.
+/// So we will have 4 different specializations for FindResultImpl
 class FindResultImplBase
 {
     bool found;
@@ -241,6 +244,8 @@ protected:
     {
         if constexpr (Cache::consecutive_keys_optimization)
         {
+            /// It's possible to support such combination, but code will became more complex.
+            /// Now there's not place where we need this options enabled together
             static_assert(!FindResult::has_offset, "`consecutive_keys_optimization` and `has_offset` are conflicting options");
             if (cache.check(key))
             {
diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h
index 0f88856f83a..e4715a6c1da 100644
--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@@ -476,6 +476,10 @@ public:
 
     size_t getBufferSizeInCells() const { return NUM_CELLS; }
 
+    /// Return offset for result in internal buffer.
+    /// Result can have value up to `getBufferSizeInCells() + 1`
+    /// because offset for zero value considered to be 0
+    /// and for other values it will be `offset in buffer + 1`
     size_t offsetInternal(ConstLookupResult ptr) const
     {
         if (ptr->isZero(*this))
diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h
index 8b69527e090..7ea0ed449ca 100644
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@@ -1214,6 +1214,10 @@ public:
         return grower.bufSize();
     }
 
+    /// Return offset for result in internal buffer.
+    /// Result can have value up to `getBufferSizeInCells() + 1`
+    /// because offset for zero value considered to be 0
+    /// and for other values it will be `offset in buffer + 1`
     size_t offsetInternal(ConstLookupResult ptr) const
     {
         if (ptr->isZero(*this))
diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index 8293f959cd9..5c50b53e2ca 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -57,6 +57,7 @@ struct NotProcessedCrossJoin : public ExtraBlock
 
 namespace JoinStuff
 {
+    /// Version of `getUsed` with dynamic dispatch
     bool JoinUsedFlags::getUsedSafe(size_t i) const
     {
         if (flags.empty())
@@ -576,6 +577,7 @@ namespace
             APPLY_FOR_JOIN_VARIANTS(M)
         #undef M
         }
+        __builtin_unreachable();
     }
 }
 
diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h
index d9b9d4dd9fa..06ce7559f31 100644
--- a/src/Interpreters/HashJoin.h
+++ b/src/Interpreters/HashJoin.h
@@ -350,6 +350,9 @@ private:
 
     /// Right table data. StorageJoin shares it between many Join objects.
     std::shared_ptr<RightTableData> data;
+    /// Flags that indicate that particular row already used in join.
+    /// Flag is stored for every record in hash map.
+    /// Number of this flags equals to hashtable buffer size (plus one for zero value).
     mutable JoinStuff::JoinUsedFlags used_flags;
     Sizes key_sizes;
 

From 109a392e0c2edca26836ecad4a617187c57b5cb1 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 8 Feb 2021 19:41:16 +0800
Subject: [PATCH 0802/1238] Fix ubsan

---
 src/Storages/MergeTree/MergeTreeData.cpp | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index c4e00a9a7f3..4bed3868f9d 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2419,7 +2419,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until) const
     }
 
     size_t parts_count_in_partition;
-    bool should_delay = false;
+    ssize_t k_inactive = -1;
     if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0)
     {
         size_t inactive_parts;
@@ -2432,8 +2432,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until) const
                     + "). Parts cleaning are processing significantly slower than inserts.",
                 ErrorCodes::TOO_MANY_PARTS);
         }
-        if (inactive_parts >= settings->inactive_parts_to_delay_insert)
-            should_delay = true;
+        k_inactive = ssize_t(inactive_parts) - ssize_t(settings->inactive_parts_to_delay_insert);
     }
     else
         parts_count_in_partition = getMaxPartsCountForPartition();
@@ -2446,11 +2445,22 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until) const
             ErrorCodes::TOO_MANY_PARTS);
     }
 
-    if (!should_delay && parts_count_in_partition < settings->parts_to_delay_insert)
+    if (k_inactive < 0 && parts_count_in_partition < settings->parts_to_delay_insert)
         return;
 
-    const size_t max_k = settings->parts_to_throw_insert - settings->parts_to_delay_insert; /// always > 0
-    const size_t k = 1 + parts_count_in_partition - settings->parts_to_delay_insert; /// from 1 to max_k
+    const ssize_t k_active = ssize_t(parts_count_in_partition) - ssize_t(settings->parts_to_delay_insert);
+    size_t max_k;
+    size_t k;
+    if (k_active > k_inactive)
+    {
+        max_k = settings->parts_to_throw_insert - settings->parts_to_delay_insert;
+        k = k_active + 1;
+    }
+    else
+    {
+        max_k = settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert;
+        k = k_inactive + 1;
+    }
     const double delay_milliseconds = ::pow(settings->max_delay_to_insert * 1000, static_cast<double>(k) / max_k);
 
     ProfileEvents::increment(ProfileEvents::DelayedInserts);

From e9001a7dde6979500d70751b0e868d780faa4d45 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 8 Feb 2021 19:41:58 +0800
Subject: [PATCH 0803/1238] Fix ubsan error

---
 src/Interpreters/DNSCacheUpdater.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/DNSCacheUpdater.cpp b/src/Interpreters/DNSCacheUpdater.cpp
index fb0298f480f..723945165e3 100644
--- a/src/Interpreters/DNSCacheUpdater.cpp
+++ b/src/Interpreters/DNSCacheUpdater.cpp
@@ -37,7 +37,7 @@ void DNSCacheUpdater::run()
       * - automatically throttle when DNS requests take longer time;
       * - add natural randomization on huge clusters - avoid sending all requests at the same moment of time from different servers.
       */
-    task_handle->scheduleAfter(update_period_seconds * 1000);
+    task_handle->scheduleAfter(size_t(update_period_seconds) * 1000);
 }
 
 void DNSCacheUpdater::start()

From 2daa4032017ef02a618b4c20c6a0224ac8659dc8 Mon Sep 17 00:00:00 2001
From: tavplubix <avtokmakov@yandex-team.ru>
Date: Mon, 8 Feb 2021 14:59:51 +0300
Subject: [PATCH 0804/1238] Update AggregateFunctionGroupArrayMoving.h

---
 src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
index eecf97e1e8c..2a713f3aed2 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
@@ -40,7 +40,7 @@ struct MovingData
     Array value;    /// Prefix sums.
     T sum = 0;
 
-    void add(T val, Arena * arena)
+    void NO_SANITIZE_UNDEFINED add(T val, Arena * arena)
     {
         sum += val;
         value.push_back(sum, arena);
@@ -120,7 +120,7 @@ public:
         this->data(place).add(static_cast<ResultT>(value), arena);
     }
 
-    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
+    void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
     {
         auto & cur_elems = this->data(place);
         auto & rhs_elems = this->data(rhs);

From 060be5b2db5d4ea250163be7f2cd160f11b63988 Mon Sep 17 00:00:00 2001
From: tavplubix <avtokmakov@yandex-team.ru>
Date: Mon, 8 Feb 2021 15:04:40 +0300
Subject: [PATCH 0805/1238] rerun CI checks

---
 src/Server/MySQLHandler.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp
index f660d97cdc6..3cbe285615e 100644
--- a/src/Server/MySQLHandler.cpp
+++ b/src/Server/MySQLHandler.cpp
@@ -342,6 +342,7 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
 
             affected_rows += progress.written_rows;
         });
+
         CurrentThread::QueryScope query_scope{query_context};
 
         executeQuery(should_replace ? replacement : payload, *out, false, query_context,

From cf0a479dbebc851f63a841364cde5b71c1743386 Mon Sep 17 00:00:00 2001
From: FgoDt <fgodtdev@hotmail.com>
Date: Mon, 8 Feb 2021 20:27:29 +0800
Subject: [PATCH 0806/1238] Update versionedcollapsingmergetree.md
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

typo '划‘ -> '行’
---
 .../mergetree-family/versionedcollapsingmergetree.md            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
index 7a0a42fa47c..3b89da9f595 100644
--- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
+++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
@@ -37,7 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 VersionedCollapsingMergeTree(sign, version)
 ```
 
--   `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划
+-   `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 行
 
     列数据类型应为 `Int8`.
 

From 00d5d28ddf41f7f54ab78d44af8cdf035c61360b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Mon, 8 Feb 2021 15:52:30 +0300
Subject: [PATCH 0807/1238] Fix build.

---
 src/Interpreters/addMissingDefaults.h         |  1 +
 .../Optimizations/filterPushDown.cpp          | 22 +++++++++++++++++++
 2 files changed, 23 insertions(+)
 create mode 100644 src/Processors/QueryPlan/Optimizations/filterPushDown.cpp

diff --git a/src/Interpreters/addMissingDefaults.h b/src/Interpreters/addMissingDefaults.h
index d8bed07e857..e746c7cc9e6 100644
--- a/src/Interpreters/addMissingDefaults.h
+++ b/src/Interpreters/addMissingDefaults.h
@@ -2,6 +2,7 @@
 
 #include <unordered_map>
 #include <string>
+#include <memory>
 
 
 namespace DB
diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
new file mode 100644
index 00000000000..0d651897bf8
--- /dev/null
+++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
@@ -0,0 +1,22 @@
+#include <Processors/QueryPlan/Optimizations/Optimizations.h>
+#include <Processors/QueryPlan/FilterStep.h>
+
+namespace DB::QueryPlanOptimizations
+{
+
+size_t tryPushDownLimit(QueryPlan::Node * node, QueryPlan::Nodes &)
+{
+    auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
+    if (!filter_step)
+        return 0;
+
+    QueryPlan::Node * child_node = node->children.front();
+    auto & child = child_node->step;
+
+    if (const auto * adding_const_column = typeid_cast<const AddingConstColumnStep *>(child.get()))
+    {
+
+    }
+}
+
+}

From 363aefb42e02cee1f56e0cb1a0d8fbc5b80c0336 Mon Sep 17 00:00:00 2001
From: lehasm <lehasm@gmail.com>
Date: Mon, 8 Feb 2021 15:58:27 +0300
Subject: [PATCH 0808/1238] DOCSUP-3908: mannWitneyUTest, welchTTest and
 studentTTest documentation (#19784)

* studentTTest function description

* welchttest description, links in studentttest

* mannWhitneyUTest description

* Text changed slightly

* Text changed slightly

* Text changed slightly

* Text changed slightly

* - First header is first level.
- sample_index type set to UInt64.
- Return values renamed.

- mannWhitneyUTest:
  - added continuity_correction parameter
  - optional parameters wrapped in braces [ ]
  - description refactored
  - example SQL placed on single line

* Improved formulations (hopefully)
General data type names used (Integer, Float, Decimal)
Removed names from return values

* Fixes

* Optional parameters stated explicitly

* New line at the end of files

* Russian function description

* Files moved in the right folder

* Translated 'Original article' to russian

* Typos are fixed

* fixed alowed parameter value: "two sided" -> "two-sided"
* duplicate word "require" is removed from exception messages

* mannWhitneyUTest description changed
Added Tuple link in return values
"samples from two populations" added in short descriptions

* Russian translation updated

* fix

Co-authored-by: Alexey Smirnov <alexey-sm@yandex-team.ru>
---
 .../reference/mannwhitneyutest.md             | 71 +++++++++++++++++++
 .../reference/studentttest.md                 | 65 +++++++++++++++++
 .../reference/welchttest.md                   | 65 +++++++++++++++++
 .../reference/mannwhitneyutest.md             | 71 +++++++++++++++++++
 .../reference/studentttest.md                 | 65 +++++++++++++++++
 .../reference/welchttest.md                   | 65 +++++++++++++++++
 .../AggregateFunctionMannWhitney.h            |  6 +-
 7 files changed, 405 insertions(+), 3 deletions(-)
 create mode 100644 docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
 create mode 100644 docs/en/sql-reference/aggregate-functions/reference/studentttest.md
 create mode 100644 docs/en/sql-reference/aggregate-functions/reference/welchttest.md
 create mode 100644 docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
 create mode 100644 docs/ru/sql-reference/aggregate-functions/reference/studentttest.md
 create mode 100644 docs/ru/sql-reference/aggregate-functions/reference/welchttest.md

diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
new file mode 100644
index 00000000000..012df7052aa
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
@@ -0,0 +1,71 @@
+---
+toc_priority: 310
+toc_title: mannWhitneyUTest
+---
+
+# mannWhitneyUTest {#mannwhitneyutest}
+
+Applies the Mann-Whitney rank test to samples from two populations.
+
+**Syntax**
+
+``` sql
+mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index)
+```
+
+Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. 
+The null hypothesis is that two populations are stochastically equal. Also one-sided hypothesises can be tested. This test does not assume that data have normal distribution.
+
+**Parameters**
+
+-   `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
+    -   `'two-sided'`;
+    -   `'greater'`;
+    -   `'less'`.
+-   `continuity_correction` - if not 0 then continuity correction in the normal approximation for the p-value is applied. (Optional, default: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+
+**Returned values**
+
+[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
+-   calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md).
+-   calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Example**
+
+Input table:
+
+``` text
+┌─sample_data─┬─sample_index─┐
+│          10 │            0 │
+│          11 │            0 │
+│          12 │            0 │
+│           1 │            1 │
+│           2 │            1 │
+│           3 │            1 │
+└─────────────┴──────────────┘
+```
+
+Query:
+
+``` sql
+SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest;
+```
+
+Result:
+
+``` text
+┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐
+│ (9,0.04042779918503192)                                │
+└────────────────────────────────────────────────────────┘
+```
+
+**See Also**
+
+-   [Mann–Whitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test)
+-   [Stochastic ordering](https://en.wikipedia.org/wiki/Stochastic_ordering)
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest/) <!--hide-->
diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md
new file mode 100644
index 00000000000..f868e976039
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md
@@ -0,0 +1,65 @@
+---
+toc_priority: 300
+toc_title: studentTTest
+---
+
+# studentTTest {#studentttest}
+
+Applies Student's t-test to samples from two populations. 
+
+**Syntax**
+
+``` sql
+studentTTest(sample_data, sample_index)
+```
+
+Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
+The null hypothesis is that means of populations are equal. Normal distribution with equal variances is assumed.
+
+**Parameters**
+
+-   `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Returned values**
+
+[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
+-   calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
+-   calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Example**
+
+Input table:
+
+``` text
+┌─sample_data─┬─sample_index─┐
+│        20.3 │            0 │
+│        21.1 │            0 │
+│        21.9 │            1 │
+│        21.7 │            0 │
+│        19.9 │            1 │
+│        21.8 │            1 │
+└─────────────┴──────────────┘
+```
+
+Query:
+
+``` sql
+SELECT studentTTest(sample_data, sample_index) FROM student_ttest;
+```
+
+Result:
+
+``` text
+┌─studentTTest(sample_data, sample_index)───┐
+│ (-0.21739130434783777,0.8385421208415731) │
+└───────────────────────────────────────────┘
+```
+
+**See Also**
+
+-   [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test)
+-   [welchTTest function](welchttest.md#welchttest)
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/studentttest/) <!--hide-->
diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md
new file mode 100644
index 00000000000..3fe1c9d58b9
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md
@@ -0,0 +1,65 @@
+---
+toc_priority: 301
+toc_title: welchTTest
+---
+
+# welchTTest {#welchttest}
+
+Applies Welch's t-test to samples from two populations. 
+
+**Syntax**
+
+``` sql
+welchTTest(sample_data, sample_index)
+```
+
+Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
+The null hypothesis is that means of populations are equal. Normal distribution is assumed. Populations may have unequal variance.
+
+**Parameters**
+
+-   `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Returned values**
+
+[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
+-   calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
+-   calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Example**
+
+Input table:
+
+``` text
+┌─sample_data─┬─sample_index─┐
+│        20.3 │            0 │
+│        22.1 │            0 │
+│        21.9 │            0 │
+│        18.9 │            1 │
+│        20.3 │            1 │
+│          19 │            1 │
+└─────────────┴──────────────┘
+```
+
+Query:
+
+``` sql
+SELECT welchTTest(sample_data, sample_index) FROM welch_ttest;
+```
+
+Result:
+
+``` text
+┌─welchTTest(sample_data, sample_index)─────┐
+│ (2.7988719532211235,0.051807360348581945) │
+└───────────────────────────────────────────┘
+```
+
+**See Also**
+
+-   [Welch's t-test](https://en.wikipedia.org/wiki/Welch%27s_t-test)
+-   [studentTTest function](studentttest.md#studentttest)
+
+[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/welchTTest/) <!--hide-->
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
new file mode 100644
index 00000000000..fb73fff5f00
--- /dev/null
+++ b/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
@@ -0,0 +1,71 @@
+---
+toc_priority: 310
+toc_title: mannWhitneyUTest
+---
+
+# mannWhitneyUTest {#mannwhitneyutest}
+
+Вычисляет U-критерий Манна — Уитни для выборок из двух генеральных совокупностей.
+
+**Синтаксис**
+
+``` sql
+mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index)
+```
+
+Значения выборок берутся из столбца `sample_data`. Если  `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
+Проверяется нулевая гипотеза, что генеральные совокупности стохастически равны. Наряду с двусторонней гипотезой могут быть проверены и односторонние.
+Для применения U-критерия Манна — Уитни закон распределения генеральных совокупностей не обязан быть нормальным.
+
+**Параметры**
+
+-   `alternative` — альтернативная гипотеза. (Необязательный параметр, по умолчанию: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
+    -   `'two-sided'`;
+    -   `'greater'`;
+    -   `'less'`.
+-   `continuity_correction` - если не 0, то при вычислении p-значения применяется коррекция непрерывности. (Необязательный параметр, по умолчанию: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+
+**Возвращаемые значения**
+
+[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
+-   вычисленное значение критерия Манна — Уитни. [Float64](../../../sql-reference/data-types/float.md).
+-   вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Пример**
+
+Таблица:
+
+``` text
+┌─sample_data─┬─sample_index─┐
+│          10 │            0 │
+│          11 │            0 │
+│          12 │            0 │
+│           1 │            1 │
+│           2 │            1 │
+│           3 │            1 │
+└─────────────┴──────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest;
+```
+
+Результат:
+
+``` text
+┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐
+│ (9,0.04042779918503192)                                │
+└────────────────────────────────────────────────────────┘
+```
+
+**Смотрите также**
+
+-   [U-критерий Манна — Уитни](https://ru.wikipedia.org/wiki/U-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%9C%D0%B0%D0%BD%D0%BD%D0%B0_%E2%80%94_%D0%A3%D0%B8%D1%82%D0%BD%D0%B8)
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest/) <!--hide-->
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md b/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md
new file mode 100644
index 00000000000..5361e06c5e2
--- /dev/null
+++ b/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md
@@ -0,0 +1,65 @@
+---
+toc_priority: 300
+toc_title: studentTTest
+---
+
+# studentTTest {#studentttest}
+
+Вычисляет t-критерий Стьюдента для выборок из двух генеральных совокупностей. 
+
+**Синтаксис**
+
+``` sql
+studentTTest(sample_data, sample_index)
+```
+
+Значения выборок берутся из столбца `sample_data`. Если  `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
+Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Стьюдента распределение в генеральных совокупностях должно быть нормальным и дисперсии должны совпадать.
+
+**Параметры**
+
+-   `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
+-   вычисленное значение критерия Стьюдента. [Float64](../../../sql-reference/data-types/float.md).
+-   вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Пример**
+
+Таблица:
+
+``` text
+┌─sample_data─┬─sample_index─┐
+│        20.3 │            0 │
+│        21.1 │            0 │
+│        21.9 │            1 │
+│        21.7 │            0 │
+│        19.9 │            1 │
+│        21.8 │            1 │
+└─────────────┴──────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT studentTTest(sample_data, sample_index) FROM student_ttest;
+```
+
+Результат:
+
+``` text
+┌─studentTTest(sample_data, sample_index)───┐
+│ (-0.21739130434783777,0.8385421208415731) │
+└───────────────────────────────────────────┘
+```
+
+**Смотрите также**
+
+-   [t-критерий Стьюдента](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A1%D1%82%D1%8C%D1%8E%D0%B4%D0%B5%D0%BD%D1%82%D0%B0)
+-   [welchTTest](welchttest.md#welchttest)
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/studentttest/) <!--hide-->
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md b/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md
new file mode 100644
index 00000000000..1f36b2d04ee
--- /dev/null
+++ b/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md
@@ -0,0 +1,65 @@
+---
+toc_priority: 301
+toc_title: welchTTest
+---
+
+# welchTTest {#welchttest}
+
+Вычисляет t-критерий Уэлча для выборок из двух генеральных совокупностей. 
+
+**Синтаксис**
+
+``` sql
+welchTTest(sample_data, sample_index)
+```
+
+Значения выборок берутся из столбца `sample_data`. Если  `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
+Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Уэлча распределение в генеральных совокупностях должно быть нормальным. Дисперсии могут не совпадать.
+
+**Параметры**
+
+-   `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
+-   вычисленное значение критерия Уэлча. [Float64](../../../sql-reference/data-types/float.md).
+-   вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Пример**
+
+Таблица:
+
+``` text
+┌─sample_data─┬─sample_index─┐
+│        20.3 │            0 │
+│        22.1 │            0 │
+│        21.9 │            0 │
+│        18.9 │            1 │
+│        20.3 │            1 │
+│          19 │            1 │
+└─────────────┴──────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT welchTTest(sample_data, sample_index) FROM welch_ttest;
+```
+
+Результат:
+
+``` text
+┌─welchTTest(sample_data, sample_index)─────┐
+│ (2.7988719532211235,0.051807360348581945) │
+└───────────────────────────────────────────┘
+```
+
+**Смотрите также**
+
+-   [t-критерий Уэлча](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A3%D1%8D%D0%BB%D1%87%D0%B0)
+-   [studentTTest](studentttest.md#studentttest)
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/welchTTest/) <!--hide-->
diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.h b/src/AggregateFunctions/AggregateFunctionMannWhitney.h
index 403f628a9ff..1451536d519 100644
--- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h
+++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.h
@@ -147,7 +147,7 @@ public:
         }
 
         if (params[0].getType() != Field::Types::String)
-            throw Exception("Aggregate function " + getName() + " require require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            throw Exception("Aggregate function " + getName() + " require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         auto param = params[0].get<String>();
         if (param == "two-sided")
@@ -158,13 +158,13 @@ public:
             alternative = Alternative::Greater;
         else
             throw Exception("Unknown parameter in aggregate function " + getName() +
-                    ". It must be one of: 'two sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
+                    ". It must be one of: 'two-sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
 
         if (params.size() != 2)
             return;
 
         if (params[1].getType() != Field::Types::UInt64)
-                throw Exception("Aggregate function " + getName() + " require require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+                throw Exception("Aggregate function " + getName() + " require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         continuity_correction = static_cast<bool>(params[1].get<UInt64>());
     }

From add89c17f2f0ecbf83bda559101301cef9f15b99 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 8 Feb 2021 16:06:55 +0300
Subject: [PATCH 0809/1238] Less serde in responses, watches on followers

---
 src/Coordination/NuKeeperServer.cpp           |  69 ++-------
 src/Coordination/NuKeeperServer.h             |   6 +-
 src/Coordination/NuKeeperStateMachine.cpp     |  18 ++-
 src/Coordination/NuKeeperStateMachine.h       |   9 +-
 .../NuKeeperStorageDispatcher.cpp             |  53 +++++--
 src/Coordination/NuKeeperStorageDispatcher.h  |   7 +-
 src/Coordination/ThreadSafeQueue.h            |  45 ++++++
 src/Coordination/tests/gtest_for_build.cpp    | 131 ------------------
 src/Server/NuKeeperTCPHandler.cpp             |  30 ----
 src/Server/NuKeeperTCPHandler.h               |   5 +-
 10 files changed, 125 insertions(+), 248 deletions(-)
 create mode 100644 src/Coordination/ThreadSafeQueue.h

diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 6111bdb2dd9..cbd52b98377 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -17,16 +17,16 @@ namespace ErrorCodes
 {
     extern const int TIMEOUT_EXCEEDED;
     extern const int RAFT_ERROR;
-    extern const int LOGICAL_ERROR;
 }
 
-NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, int port_)
+NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, int port_, ResponsesQueue & responses_queue_)
     : server_id(server_id_)
     , hostname(hostname_)
     , port(port_)
     , endpoint(hostname + ":" + std::to_string(port))
-    , state_machine(nuraft::cs_new<NuKeeperStateMachine>(500 /* FIXME */))
+    , state_machine(nuraft::cs_new<NuKeeperStateMachine>(responses_queue_))
     , state_manager(nuraft::cs_new<InMemoryStateManager>(server_id, endpoint))
+    , responses_queue(responses_queue_)
 {
 }
 
@@ -53,6 +53,7 @@ void NuKeeperServer::startup(int64_t operation_timeout_ms)
     params.snapshot_distance_ = 5000;
     params.client_req_timeout_ = operation_timeout_ms;
     params.auto_forwarding_ = true;
+    params.auto_forwarding_req_timeout_ = operation_timeout_ms * 2;
     params.return_method_ = nuraft::raft_params::blocking;
 
     nuraft::asio_service::options asio_opts{};
@@ -94,58 +95,14 @@ nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coord
     return buf.getBuffer();
 }
 
-NuKeeperStorage::ResponsesForSessions readZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer, const Coordination::ZooKeeperRequestPtr & request)
-{
-    DB::NuKeeperStorage::ResponsesForSessions results;
-    DB::ReadBufferFromNuraftBuffer buf(buffer);
-    bool response_found = false;
-
-    while (!buf.eof())
-    {
-        int64_t session_id;
-        DB::readIntBinary(session_id, buf);
-        int32_t length;
-        Coordination::XID xid;
-        int64_t zxid;
-        Coordination::Error err;
-
-        Coordination::read(length, buf);
-        Coordination::read(xid, buf);
-        Coordination::read(zxid, buf);
-        Coordination::read(err, buf);
-        Coordination::ZooKeeperResponsePtr response;
-
-        if (xid == Coordination::WATCH_XID)
-            response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
-        else
-        {
-            if (response_found)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "More than one non-watch response for single request with xid {}, response xid {}", request->xid, xid);
-
-            response_found = true;
-            response = request->makeResponse();
-        }
-
-        if (err == Coordination::Error::ZOK && (xid == Coordination::WATCH_XID || response->getOpNum() != Coordination::OpNum::Close))
-            response->readImpl(buf);
-
-        response->xid = xid;
-        response->zxid = zxid;
-        response->error = err;
-
-        results.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
-    }
-    return results;
 }
 
-}
-
-NuKeeperStorage::ResponsesForSessions NuKeeperServer::putRequest(const NuKeeperStorage::RequestForSession & request_for_session)
+void NuKeeperServer::putRequest(const NuKeeperStorage::RequestForSession & request_for_session)
 {
     auto [session_id, request] = request_for_session;
-    if (isLeaderAlive() && request_for_session.request->isReadRequest())
+    if (isLeaderAlive() && request->isReadRequest())
     {
-        return state_machine->processReadRequest(request_for_session);
+        state_machine->processReadRequest(request_for_session);
     }
     else
     {
@@ -162,8 +119,7 @@ NuKeeperStorage::ResponsesForSessions NuKeeperServer::putRequest(const NuKeeperS
             response->xid = request->xid;
             response->zxid = 0;
             response->error = Coordination::Error::ZOPERATIONTIMEOUT;
-            responses.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
-            return responses;
+            responses_queue.push(DB::NuKeeperStorage::ResponseForSession{session_id, response});
         }
 
         if (result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
@@ -173,17 +129,10 @@ NuKeeperStorage::ResponsesForSessions NuKeeperServer::putRequest(const NuKeeperS
             response->xid = request->xid;
             response->zxid = 0;
             response->error = Coordination::Error::ZOPERATIONTIMEOUT;
-            responses.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
-            return responses;
+            responses_queue.push(DB::NuKeeperStorage::ResponseForSession{session_id, response});
         }
         else if (result->get_result_code() != nuraft::cmd_result_code::OK)
             throw Exception(ErrorCodes::RAFT_ERROR, "Requests result failed with code {} and message: '{}'", result->get_result_code(), result->get_result_str());
-
-        auto result_buf = result->get();
-        if (result_buf == nullptr)
-            throw Exception(ErrorCodes::RAFT_ERROR, "Received nullptr from RAFT leader");
-
-        return readZooKeeperResponses(result_buf, request);
     }
 }
 
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index 6151cd095e0..5646bbbd002 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -31,12 +31,14 @@ private:
 
     std::mutex append_entries_mutex;
 
+    ResponsesQueue & responses_queue;
+
 public:
-    NuKeeperServer(int server_id_, const std::string & hostname_, int port_);
+    NuKeeperServer(int server_id_, const std::string & hostname_, int port_, ResponsesQueue & responses_queue_);
 
     void startup(int64_t operation_timeout_ms);
 
-    NuKeeperStorage::ResponsesForSessions putRequest(const NuKeeperStorage::RequestForSession & request);
+    void putRequest(const NuKeeperStorage::RequestForSession & request);
 
     int64_t getSessionID(int64_t session_timeout_ms);
 
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 092b2b0580f..7896caad568 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -43,8 +43,9 @@ nuraft::ptr<nuraft::buffer> writeResponses(NuKeeperStorage::ResponsesForSessions
 }
 
 
-NuKeeperStateMachine::NuKeeperStateMachine(int64_t tick_time)
+NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, long tick_time)
     : storage(tick_time)
+    , responses_queue(responses_queue_)
     , last_committed_idx(0)
     , log(&Poco::Logger::get("NuRaftStateMachine"))
 {
@@ -76,10 +77,12 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
         {
             std::lock_guard lock(storage_lock);
             responses_for_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id);
+            for (auto & response_for_session : responses_for_sessions)
+                responses_queue.push(response_for_session);
         }
 
         last_committed_idx = log_idx;
-        return writeResponses(responses_for_sessions);
+        return nullptr;
     }
 }
 
@@ -228,10 +231,15 @@ int NuKeeperStateMachine::read_logical_snp_obj(
     return 0;
 }
 
-NuKeeperStorage::ResponsesForSessions NuKeeperStateMachine::processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session)
+void NuKeeperStateMachine::processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session)
 {
-    std::lock_guard lock(storage_lock);
-    return storage.processRequest(request_for_session.request, request_for_session.session_id);
+    NuKeeperStorage::ResponsesForSessions responses;
+    {
+        std::lock_guard lock(storage_lock);
+        responses = storage.processRequest(request_for_session.request, request_for_session.session_id);
+    }
+    for (const auto & response : responses)
+        responses_queue.push(response);
 }
 
 std::unordered_set<int64_t> NuKeeperStateMachine::getDeadSessions()
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index e45c197db8c..6dfb9ff4c3a 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -3,14 +3,17 @@
 #include <Coordination/NuKeeperStorage.h>
 #include <libnuraft/nuraft.hxx>
 #include <common/logger_useful.h>
+#include <Coordination/ThreadSafeQueue.h>
 
 namespace DB
 {
 
+using ResponsesQueue = ThreadSafeQueue<NuKeeperStorage::ResponseForSession>;
+
 class NuKeeperStateMachine : public nuraft::state_machine
 {
 public:
-    NuKeeperStateMachine(long tick_time = 500);
+    NuKeeperStateMachine(ResponsesQueue & responses_queue_, long tick_time = 500);
 
     nuraft::ptr<nuraft::buffer> pre_commit(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; }
 
@@ -47,7 +50,7 @@ public:
         return storage;
     }
 
-    NuKeeperStorage::ResponsesForSessions processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session);
+    void processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session);
 
     std::unordered_set<int64_t> getDeadSessions();
 
@@ -74,6 +77,8 @@ private:
     static void writeSnapshot(const StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out);
 
     NuKeeperStorage storage;
+
+    ResponsesQueue & responses_queue;
     /// Mutex for snapshots
     std::mutex snapshots_lock;
 
diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
index e327272cab1..86bdae9cc37 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -16,9 +16,9 @@ NuKeeperStorageDispatcher::NuKeeperStorageDispatcher()
 {
 }
 
-void NuKeeperStorageDispatcher::processingThread()
+void NuKeeperStorageDispatcher::requestThread()
 {
-    setThreadName("NuKeeperSProc");
+    setThreadName("NuKeeperReqT");
     while (!shutdown_called)
     {
         NuKeeperStorage::RequestForSession request;
@@ -32,9 +32,33 @@ void NuKeeperStorageDispatcher::processingThread()
 
             try
             {
-                auto responses = server->putRequest(request);
-                for (const auto & response_for_session : responses)
-                    setResponse(response_for_session.session_id, response_for_session.response);
+                server->putRequest(request);
+            }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+            }
+        }
+    }
+}
+
+void NuKeeperStorageDispatcher::responseThread()
+{
+    setThreadName("NuKeeperRspT");
+    while (!shutdown_called)
+    {
+        NuKeeperStorage::ResponseForSession response_for_session;
+
+        UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds());
+
+        if (responses_queue.tryPop(response_for_session, max_wait))
+        {
+            if (shutdown_called)
+                break;
+
+            try
+            {
+                 setResponse(response_for_session.session_id, response_for_session.response);
             }
             catch (...)
             {
@@ -139,7 +163,7 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
         ids.push_back(server_id);
     }
 
-    server = std::make_unique<NuKeeperServer>(myid, myhostname, myport);
+    server = std::make_unique<NuKeeperServer>(myid, myhostname, myport, responses_queue);
     try
     {
         server->startup(operation_timeout.totalMilliseconds());
@@ -170,7 +194,8 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
         throw;
     }
 
-    processing_thread = ThreadFromGlobalPool([this] { processingThread(); });
+    request_thread = ThreadFromGlobalPool([this] { requestThread(); });
+    responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
     session_cleaner_thread = ThreadFromGlobalPool([this] { sessionCleanerTask(); });
 
     LOG_DEBUG(log, "Dispatcher initialized");
@@ -192,8 +217,11 @@ void NuKeeperStorageDispatcher::shutdown()
             if (session_cleaner_thread.joinable())
                 session_cleaner_thread.join();
 
-            if (processing_thread.joinable())
-                processing_thread.join();
+            if (request_thread.joinable())
+                request_thread.join();
+
+            if (responses_thread.joinable())
+                responses_thread.join();
         }
 
         if (server)
@@ -246,12 +274,7 @@ void NuKeeperStorageDispatcher::sessionCleanerTask()
                     Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
                     request->xid = Coordination::CLOSE_XID;
                     putRequest(request, dead_session);
-                    {
-                        std::lock_guard lock(session_to_response_callback_mutex);
-                        auto session_it = session_to_response_callback.find(dead_session);
-                        if (session_it != session_to_response_callback.end())
-                            session_to_response_callback.erase(session_it);
-                    }
+                    finishSession(dead_session);
                 }
             }
         }
diff --git a/src/Coordination/NuKeeperStorageDispatcher.h b/src/Coordination/NuKeeperStorageDispatcher.h
index dfd36b39537..6820247a5af 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.h
+++ b/src/Coordination/NuKeeperStorageDispatcher.h
@@ -31,13 +31,15 @@ private:
 
     using RequestsQueue = ConcurrentBoundedQueue<NuKeeperStorage::RequestForSession>;
     RequestsQueue requests_queue{1};
+    ResponsesQueue responses_queue;
     std::atomic<bool> shutdown_called{false};
     using SessionToResponseCallback = std::unordered_map<int64_t, ZooKeeperResponseCallback>;
 
     std::mutex session_to_response_callback_mutex;
     SessionToResponseCallback session_to_response_callback;
 
-    ThreadFromGlobalPool processing_thread;
+    ThreadFromGlobalPool request_thread;
+    ThreadFromGlobalPool responses_thread;
 
     ThreadFromGlobalPool session_cleaner_thread;
 
@@ -46,7 +48,8 @@ private:
     Poco::Logger * log;
 
 private:
-    void processingThread();
+    void requestThread();
+    void responseThread();
     void sessionCleanerTask();
     void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
 
diff --git a/src/Coordination/ThreadSafeQueue.h b/src/Coordination/ThreadSafeQueue.h
new file mode 100644
index 00000000000..d36e25244bb
--- /dev/null
+++ b/src/Coordination/ThreadSafeQueue.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <queue>
+#include <mutex>
+
+namespace DB
+{
+
+/// Queue with mutex and condvar. As simple as possible.
+template <typename T>
+class ThreadSafeQueue
+{
+private:
+    mutable std::mutex queue_mutex;
+    std::condition_variable cv;
+    std::queue<T> queue;
+public:
+
+    void push(const T & response)
+    {
+        std::lock_guard lock(queue_mutex);
+        queue.push(response);
+        cv.notify_one();
+    }
+
+    bool tryPop(T & response, int64_t timeout_ms = 0)
+    {
+        std::unique_lock lock(queue_mutex);
+        if (!cv.wait_for(lock,
+                std::chrono::milliseconds(timeout_ms), [this] { return !queue.empty(); }))
+            return false;
+
+        response = queue.front();
+        queue.pop();
+        return true;
+    }
+
+    size_t size() const
+    {
+        std::lock_guard lock(queue_mutex);
+        return queue.size();
+    }
+};
+
+}
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index 956b12d6e08..baba7fc115e 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -272,9 +272,6 @@ TEST(CoordinationTest, TestSummingRaft3)
     s3.launcher.shutdown(5);
 }
 
-using NuKeeperRaftServer = SimpliestRaftServer<DB::NuKeeperStateMachine>;
-
-
 nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
 {
     DB::WriteBufferFromNuraftBuffer buf;
@@ -337,132 +334,4 @@ TEST(CoordinationTest, TestStorageSerialization)
     EXPECT_EQ(new_storage.ephemerals[1].size(), 1);
 }
 
-/// Code with obvious races, but I don't want to make it
-/// more complex to avoid races.
-#if defined(__has_feature)
-#  if ! __has_feature(thread_sanitizer)
-
-TEST(CoordinationTest, TestNuKeeperRaft)
-{
-    NuKeeperRaftServer s1(1, "localhost", 44447);
-    NuKeeperRaftServer s2(2, "localhost", 44448);
-    NuKeeperRaftServer s3(3, "localhost", 44449);
-
-    nuraft::srv_config first_config(1, "localhost:44447");
-    auto ret1 = s2.raft_instance->add_srv(first_config);
-
-    EXPECT_TRUE(ret1->get_accepted()) << "failed to add server: " << ret1->get_result_str() << std::endl;
-
-    while (s1.raft_instance->get_leader() != 2)
-    {
-        std::cout << "Waiting s1 to join to s2 quorum\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    nuraft::srv_config third_config(3, "localhost:44449");
-    auto ret3 = s2.raft_instance->add_srv(third_config);
-
-    EXPECT_TRUE(ret3->get_accepted()) << "failed to add server: " << ret3->get_result_str() << std::endl;
-
-    while (s3.raft_instance->get_leader() != 2)
-    {
-        std::cout << "Waiting s3 to join to s2 quorum\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    /// S2 is leader
-    EXPECT_EQ(s1.raft_instance->get_leader(), 2);
-    EXPECT_EQ(s2.raft_instance->get_leader(), 2);
-    EXPECT_EQ(s3.raft_instance->get_leader(), 2);
-
-    int64_t session_id = 34;
-    std::shared_ptr<Coordination::ZooKeeperCreateRequest> create_request = std::make_shared<Coordination::ZooKeeperCreateRequest>();
-    create_request->path = "/hello";
-    create_request->data = "world";
-
-    auto entry1 = getZooKeeperLogEntry(session_id, create_request);
-    auto ret_leader = s2.raft_instance->append_entries({entry1});
-
-    EXPECT_TRUE(ret_leader->get_accepted()) << "failed to replicate create entry:" << ret_leader->get_result_code();
-    EXPECT_EQ(ret_leader->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate create entry:" << ret_leader->get_result_code();
-
-    auto * result = ret_leader.get();
-
-    auto responses = getZooKeeperResponses(result->get(), create_request);
-
-    EXPECT_EQ(responses.size(), 1);
-    EXPECT_EQ(responses[0].session_id, 34);
-    EXPECT_EQ(responses[0].response->getOpNum(), Coordination::OpNum::Create);
-    EXPECT_EQ(dynamic_cast<Coordination::ZooKeeperCreateResponse *>(responses[0].response.get())->path_created, "/hello");
-
-    while (s1.state_machine->getStorage().container.count("/hello") == 0)
-    {
-        std::cout << "Waiting s1 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    while (s2.state_machine->getStorage().container.count("/hello") == 0)
-    {
-        std::cout << "Waiting s2 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    while (s3.state_machine->getStorage().container.count("/hello") == 0)
-    {
-        std::cout << "Waiting s3 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    EXPECT_EQ(s1.state_machine->getStorage().container["/hello"].data, "world");
-    EXPECT_EQ(s2.state_machine->getStorage().container["/hello"].data, "world");
-    EXPECT_EQ(s3.state_machine->getStorage().container["/hello"].data, "world");
-
-    std::shared_ptr<Coordination::ZooKeeperGetRequest> get_request = std::make_shared<Coordination::ZooKeeperGetRequest>();
-    get_request->path = "/hello";
-    auto entry2 = getZooKeeperLogEntry(session_id, get_request);
-    auto ret_leader_get = s2.raft_instance->append_entries({entry2});
-
-    EXPECT_TRUE(ret_leader_get->get_accepted()) << "failed to replicate create entry: " << ret_leader_get->get_result_code();
-    EXPECT_EQ(ret_leader_get->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate create entry: " << ret_leader_get->get_result_code();
-
-    auto * result_get = ret_leader_get.get();
-
-    auto get_responses = getZooKeeperResponses(result_get->get(), get_request);
-
-    EXPECT_EQ(get_responses.size(), 1);
-    EXPECT_EQ(get_responses[0].session_id, 34);
-    EXPECT_EQ(get_responses[0].response->getOpNum(), Coordination::OpNum::Get);
-    EXPECT_EQ(dynamic_cast<Coordination::ZooKeeperGetResponse *>(get_responses[0].response.get())->data, "world");
-
-
-    NuKeeperRaftServer s4(4, "localhost", 44450);
-    nuraft::srv_config fourth_config(4, "localhost:44450");
-    auto ret4 = s2.raft_instance->add_srv(fourth_config);
-    while (s4.raft_instance->get_leader() != 2)
-    {
-        std::cout << "Waiting s1 to join to s2 quorum\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    /// Applied snapshot
-    EXPECT_EQ(s4.raft_instance->get_leader(), 2);
-
-    while (s4.state_machine->getStorage().container.count("/hello") == 0)
-    {
-        std::cout << "Waiting s4 to apply entry\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    EXPECT_EQ(s4.state_machine->getStorage().container["/hello"].data, "world");
-
-    s1.launcher.shutdown(5);
-    s2.launcher.shutdown(5);
-    s3.launcher.shutdown(5);
-    s4.launcher.shutdown(5);
-}
-
-# endif
-
-#endif
-
 #endif
diff --git a/src/Server/NuKeeperTCPHandler.cpp b/src/Server/NuKeeperTCPHandler.cpp
index 31ffc744aaa..e855e2c68f7 100644
--- a/src/Server/NuKeeperTCPHandler.cpp
+++ b/src/Server/NuKeeperTCPHandler.cpp
@@ -45,36 +45,6 @@ struct PollResult
     bool error{false};
 };
 
-/// Queue with mutex. As simple as possible.
-class ThreadSafeResponseQueue
-{
-private:
-    mutable std::mutex queue_mutex;
-    std::queue<Coordination::ZooKeeperResponsePtr> queue;
-public:
-    void push(const Coordination::ZooKeeperResponsePtr & response)
-    {
-        std::lock_guard lock(queue_mutex);
-        queue.push(response);
-    }
-    bool tryPop(Coordination::ZooKeeperResponsePtr & response)
-    {
-        std::lock_guard lock(queue_mutex);
-        if (!queue.empty())
-        {
-            response = queue.front();
-            queue.pop();
-            return true;
-        }
-        return false;
-    }
-    size_t size() const
-    {
-        std::lock_guard lock(queue_mutex);
-        return queue.size();
-    }
-};
-
 struct SocketInterruptablePollWrapper
 {
     int sockfd;
diff --git a/src/Server/NuKeeperTCPHandler.h b/src/Server/NuKeeperTCPHandler.h
index 641d2f78e1f..241867a1d99 100644
--- a/src/Server/NuKeeperTCPHandler.h
+++ b/src/Server/NuKeeperTCPHandler.h
@@ -16,6 +16,7 @@
 #include <Coordination/NuKeeperStorageDispatcher.h>
 #include <IO/WriteBufferFromPocoSocket.h>
 #include <IO/ReadBufferFromPocoSocket.h>
+#include <Coordination/ThreadSafeQueue.h>
 #include <unordered_map>
 
 namespace DB
@@ -23,7 +24,9 @@ namespace DB
 
 struct SocketInterruptablePollWrapper;
 using SocketInterruptablePollWrapperPtr = std::unique_ptr<SocketInterruptablePollWrapper>;
-class ThreadSafeResponseQueue;
+
+using ThreadSafeResponseQueue = ThreadSafeQueue<Coordination::ZooKeeperResponsePtr>;
+
 using ThreadSafeResponseQueuePtr = std::unique_ptr<ThreadSafeResponseQueue>;
 
 class NuKeeperTCPHandler : public Poco::Net::TCPServerConnection

From 7e3186c826e5e673368a953a76b14c8369414956 Mon Sep 17 00:00:00 2001
From: templarzq <templarzq@163.com>
Date: Mon, 8 Feb 2021 21:18:48 +0800
Subject: [PATCH 0810/1238] optimize aggfunc column data copy (#19407)

* optimize agg column data copy

* add aggfunc col data copy test

* fix null warning

* fix style

* fix style

* fix style

* fix style

* fix copy same dat to same pos bug

* fix style

* fix style

* Update ColumnAggregateFunction.cpp

fix type check

* control record buf size

* fix performance

* fix performance

* fix destroy data speed

* decrease run time.

* fix warning

* decrease test run time

* Update aggfunc_col_data_copy.xml

* replace unordered_map with HashMap

* fix compile err

* fix compile err

* fix compile err

* fix compile err

* fix compile err

* fix compile err

* fix compile err

* fix compile err

* fix compile err

* fix hash map compile err

* fix compile err

* fix compile err

* fix format

* faster test

Co-authored-by: centos7 <templarzq@localhost.localdomain>
---
 src/Columns/ColumnAggregateFunction.cpp     | 52 +++++++++++++++++++--
 src/Columns/ColumnAggregateFunction.h       | 15 ++++++
 src/Common/HashTable/HashTable.h            |  9 +++-
 tests/performance/aggfunc_col_data_copy.xml | 24 ++++++++++
 tests/performance/reinterpret_as.xml        | 24 +++++-----
 5 files changed, 105 insertions(+), 19 deletions(-)
 create mode 100644 tests/performance/aggfunc_col_data_copy.xml

diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp
index d0a5e120a07..9562dc647c9 100644
--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@@ -75,8 +75,28 @@ void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_)
 ColumnAggregateFunction::~ColumnAggregateFunction()
 {
     if (!func->hasTrivialDestructor() && !src)
-        for (auto * val : data)
-            func->destroy(val);
+    {
+        if (copiedDataInfo.empty())
+        {
+            for (auto * val : data)
+            {
+                func->destroy(val);
+            }
+        }
+        else
+        {
+            size_t pos;
+            for (Map::iterator it = copiedDataInfo.begin(), it_end = copiedDataInfo.end(); it != it_end; ++it)
+            {
+                pos = it->getValue().second;
+                if (data[pos] != nullptr)
+                {
+                    func->destroy(data[pos]);
+                    data[pos] = nullptr;
+                }
+            }
+        }
+    }
 }
 
 void ColumnAggregateFunction::addArena(ConstArenaPtr arena_)
@@ -455,14 +475,37 @@ void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
     ///  (only as a whole, see comment above).
     ensureOwnership();
     insertDefault();
-    insertMergeFrom(from, n);
+    insertCopyFrom(assert_cast<const ColumnAggregateFunction &>(from).data[n]);
 }
 
 void ColumnAggregateFunction::insertFrom(ConstAggregateDataPtr place)
 {
     ensureOwnership();
     insertDefault();
-    insertMergeFrom(place);
+    insertCopyFrom(place);
+}
+
+void ColumnAggregateFunction::insertCopyFrom(ConstAggregateDataPtr place)
+{
+    Map::LookupResult result;
+    result = copiedDataInfo.find(place);
+    if (result == nullptr)
+    {
+        copiedDataInfo[place] = data.size()-1;
+        func->merge(data.back(), place, &createOrGetArena());
+    }
+    else
+    {
+        size_t pos = result->getValue().second;
+        if (pos != data.size() - 1)
+        {
+            data[data.size() - 1] = data[pos];
+        }
+        else /// insert same data to same pos, merge them.
+        {
+            func->merge(data.back(), place, &createOrGetArena());
+        }
+    }
 }
 
 void ColumnAggregateFunction::insertMergeFrom(ConstAggregateDataPtr place)
@@ -697,5 +740,4 @@ MutableColumnPtr ColumnAggregateFunction::cloneResized(size_t size) const
         return cloned_col;
     }
 }
-
 }
diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h
index cd45cf583a0..a1aa9e29a39 100644
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@@ -13,6 +13,8 @@
 
 #include <Functions/FunctionHelpers.h>
 
+#include <Common/HashTable/HashMap.h>
+
 namespace DB
 {
 
@@ -82,6 +84,17 @@ private:
     /// Name of the type to distinguish different aggregation states.
     String type_string;
 
+    /// MergedData records, used to avoid duplicated data copy.
+    ///key: src pointer, val:  pos in current column.
+    using Map = HashMap<
+        ConstAggregateDataPtr,
+        size_t,
+        DefaultHash<ConstAggregateDataPtr>,
+        HashTableGrower<3>,
+        HashTableAllocatorWithStackMemory<sizeof(std::pair<ConstAggregateDataPtr, size_t>) * (1 << 3)>>;
+
+    Map copiedDataInfo;
+
     ColumnAggregateFunction() {}
 
     /// Create a new column that has another column as a source.
@@ -140,6 +153,8 @@ public:
 
     void insertFrom(ConstAggregateDataPtr place);
 
+    void insertCopyFrom(ConstAggregateDataPtr place);
+
     /// Merge state at last row with specified state in another column.
     void insertMergeFrom(ConstAggregateDataPtr place);
 
diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h
index 15fa09490e6..bd4643261d3 100644
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@@ -69,11 +69,16 @@ namespace ZeroTraits
 {
 
 template <typename T>
-bool check(const T x) { return x == 0; }
+inline bool check(const T x) { return x == 0; }
 
 template <typename T>
-void set(T & x) { x = 0; }
+inline void set(T & x) { x = 0; }
 
+template <>
+inline bool check(const char * x) { return x == nullptr; }
+
+template <>
+inline void set(const char *& x){ x = nullptr; }
 }
 
 
diff --git a/tests/performance/aggfunc_col_data_copy.xml b/tests/performance/aggfunc_col_data_copy.xml
new file mode 100644
index 00000000000..111f7959d58
--- /dev/null
+++ b/tests/performance/aggfunc_col_data_copy.xml
@@ -0,0 +1,24 @@
+<test max_ignored_relative_change="0.2">
+  <create_query>drop table if EXISTS test_bm2;</create_query>  
+  <create_query>drop table if EXISTS test_bm_join2;</create_query>  
+  <create_query>create table test_bm2(
+        dim UInt64,
+        id UInt64)
+        ENGINE = MergeTree()
+        ORDER BY( dim )
+        SETTINGS index_granularity = 8192;
+  </create_query>  
+  <create_query>  
+   create table test_bm_join2(
+     dim UInt64,
+     ids AggregateFunction(groupBitmap, UInt64) ) 
+   ENGINE = MergeTree()
+   ORDER BY(dim)
+   SETTINGS index_granularity = 8192;
+  </create_query>  
+  <fill_query>insert into test_bm2 SELECT 1,number FROM numbers(0, 1000)</fill_query>  
+  <fill_query>insert into test_bm_join2 SELECT 1, bitmapBuild(range(toUInt64(0),toUInt64(11000000)))</fill_query>  
+  <query>select a.dim,bitmapCardinality(b.ids) from test_bm2 a left join test_bm_join2 b using(dim)</query>  
+  <drop_query>drop table if exists test_bm2</drop_query> 
+  <drop_query>drop table if exists test_bm_join2</drop_query> 
+</test>
diff --git a/tests/performance/reinterpret_as.xml b/tests/performance/reinterpret_as.xml
index 6e1a0385319..6ef152bc552 100644
--- a/tests/performance/reinterpret_as.xml
+++ b/tests/performance/reinterpret_as.xml
@@ -19,7 +19,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(20000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -38,7 +38,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(20000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -76,7 +76,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(20000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -95,7 +95,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(20000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -115,7 +115,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(20000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -134,7 +134,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(20000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -153,7 +153,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(20000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -172,7 +172,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(20000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -191,7 +191,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(20000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -210,7 +210,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(20000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -230,7 +230,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(2000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -249,7 +249,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(100000000)
+        FROM numbers_mt(20000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>

From 2413d6bd381b79f680399feca023f4a6b7873f9c Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 8 Feb 2021 16:26:06 +0300
Subject: [PATCH 0811/1238] Test multinode watches

---
 .../test_testkeeper_multinode/test.py         | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index caba7ecddd9..ff001fb75ee 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -42,6 +42,70 @@ def test_simple_replicated_table(started_cluster):
     assert node3.query("SELECT COUNT() FROM t") == "10\n"
 
 
+def get_fake_zk(nodename):
+    _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=30.0)
+    def reset_last_zxid_listener(state):
+        print("Fake zk callback called for state", state)
+        _fake_zk_instance.last_zxid = 0
+
+        _fake_zk_instance.add_listener(reset_last_zxid_listener)
+    _fake_zk_instance.start()
+    return _fake_zk_instance
+
+def test_watch_on_follower(started_cluster):
+    try:
+        node1_zk = get_fake_zk("node1")
+        node2_zk = get_fake_zk("node2")
+        node3_zk = get_fake_zk("node3")
+
+        node1_zk.create("/test_data_watches")
+        node2_zk.set("/test_data_watches", b"hello")
+        node3_zk.set("/test_data_watches", b"world")
+
+        node1_data = None
+        def node1_callback(event):
+            print("node1 data watch called")
+            nonlocal node1_data
+            node1_data = event
+
+        node1_zk.get("/test_data_watches", watch=node1_callback)
+
+        node2_data = None
+        def node2_callback(event):
+            print("node2 data watch called")
+            nonlocal node2_data
+            node2_data = event
+
+        node2_zk.get("/test_data_watches", watch=node2_callback)
+
+        node3_data = None
+        def node3_callback(event):
+            print("node3 data watch called")
+            nonlocal node3_data
+            node3_data = event
+
+        node3_zk.get("/test_data_watches", watch=node3_callback)
+
+        node1_zk.set("/test_data_watches", b"somevalue")
+        time.sleep(3)
+
+        print(node1_data)
+        print(node2_data)
+        print(node3_data)
+
+        assert node1_data == node2_data
+        assert node3_data == node2_data
+
+    finally:
+        try:
+            for zk_conn in [node1_zk, node2_zk, node3_zk]:
+                zk_conn.stop()
+                zk_conn.close()
+        except:
+            pass
+
+
+
 # in extremely rare case it can take more than 5 minutes in debug build with sanitizer
 @pytest.mark.timeout(600)
 def test_blocade_leader(started_cluster):

From bfc703692ad5d90bb1f43836752e4f4668ba1c4b Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 08:48:43 -0500
Subject: [PATCH 0812/1238] Starting to add LDAP docs.

---
 .../external-authenticators/index.md          |   9 ++
 .../external-authenticators/ldap.md           | 145 ++++++++++++++++++
 .../sql-reference/statements/create/user.md   |   3 +-
 3 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 docs/en/operations/external-authenticators/index.md
 create mode 100644 docs/en/operations/external-authenticators/ldap.md

diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md
new file mode 100644
index 00000000000..10c2ea91eb9
--- /dev/null
+++ b/docs/en/operations/external-authenticators/index.md
@@ -0,0 +1,9 @@
+---
+toc_folder_title: External User Authenticators and Directories
+toc_priority: 48
+toc_title: Introduction
+---
+
+# External User Authenticators and Directories {#external-authenticators}
+
+ClickHouse supports authenticating and managing users using external services such as [LDAP](#external-authenticators-ldap).
diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md
new file mode 100644
index 00000000000..fd5f2e578ce
--- /dev/null
+++ b/docs/en/operations/external-authenticators/ldap.md
@@ -0,0 +1,145 @@
+# LDAP {#external-authenticators-ldap} 
+
+LDAP server can be used to authenticate ClickHouse users. There are two different approaches for doing this:
+
+- use LDAP as an external authenticator for existing users, which are defined in `users.xml` or in local access control paths
+- use LDAP as an external user directory and allow locally undefined users to be authenticated if they exist on the LDAP server
+
+For both of these approaches, an internally named LDAP server must be defined in the ClickHouse config 
+so that other parts of config are able to refer to it.
+
+## Server Definition {#ldap-server-definition}
+
+To define LDAP server you must add `ldap_servers` section to the `config.xml`. For example,
+
+```xml
+<yandex>
+    <!- ... -->
+    <ldap_servers>
+        <my_ldap_server>
+            <host>localhost</host>
+            <port>636</port>
+            <bind_dn>uid={user_name},ou=users,dc=example,dc=com</bind_dn>
+            <verification_cooldown>300</verification_cooldown>
+            <enable_tls>yes</enable_tls>
+            <tls_minimum_protocol_version>tls1.2</tls_minimum_protocol_version>
+            <tls_require_cert>demand</tls_require_cert>
+            <tls_cert_file>/path/to/tls_cert_file</tls_cert_file>
+            <tls_key_file>/path/to/tls_key_file</tls_key_file>
+            <tls_ca_cert_file>/path/to/tls_ca_cert_file</tls_ca_cert_file>
+            <tls_ca_cert_dir>/path/to/tls_ca_cert_dir</tls_ca_cert_dir>
+            <tls_cipher_suite>ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384</tls_cipher_suite>
+        </my_ldap_server>
+    </ldap_servers>
+</yandex>
+```
+
+Note, that you can define multiple LDAP servers inside `ldap_servers` section using distinct names.
+
+Parameters:
+
+- `host` - LDAP server hostname or IP, this parameter is mandatory and cannot be empty.
+- `port` - LDAP server port, default is `636` if `enable_tls` is set to `true`, `389` otherwise.
+- `bind_dn` - template used to construct the DN to bind to.
+  - The resulting DN will be constructed by replacing all `{user_name}` substrings of the template with the actual user name during each authentication attempt.
+- `verification_cooldown` - a period of time, in seconds, after a successful bind attempt, during which the user will be assumed to be successfully authenticated for all consecutive requests without contacting the LDAP server.
+  - Specify `0` (the default) to disable caching and force contacting the LDAP server for each authentication request.
+- `enable_tls` - flag to trigger use of secure connection to the LDAP server.
+  - Specify `no` for plain text `ldap://` protocol (not recommended).
+  - Specify `yes` for LDAP over SSL/TLS `ldaps://` protocol (recommended, the default).
+  - Specify `starttls` for legacy StartTLS protocol (plain text `ldap://` protocol, upgraded to TLS).
+- `tls_minimum_protocol_version` - the minimum protocol version of SSL/TLS.
+  - Accepted values are: `ssl2`, `ssl3`, `tls1.0`, `tls1.1`, `tls1.2` (the default).
+- `tls_require_cert` - SSL/TLS peer certificate verification behavior.
+  - Accepted values are: `never`, `allow`, `try`, `demand` (the default).
+- `tls_cert_file` - path to certificate file.
+- `tls_key_file` - path to certificate key file.
+- `tls_ca_cert_file` - path to CA certificate file.
+- `tls_ca_cert_dir` - path to the directory containing CA certificates.
+- `tls_cipher_suite` - allowed cipher suite (in OpenSSL notation).
+
+## External Authenticator {#ldap-external-authenticator}
+
+A remote LDAP server can be used as a method for verifying the passwords for locally defined users (users defined in `users.xml` or in local access control paths). In order to achieve this, specify previously defined LDAP server name instead of `password` or similar sections in the user definition.
+
+At each login attempt, ClickHouse will try to "bind" to the specified DN (see `bind_dn` sections in LDAP server config in `config.xml`) at the LDAP server using the provided credentials, and, if successful, the user will be considered authenticated. This is often called "simple bind" method.
+
+Example (goes into `users.xml`):
+
+```xml
+<yandex>
+    <!- ... -->
+    <users>
+        <!- ... -->
+        <my_user>
+            <!- ... -->
+            <ldap>
+                <server>my_ldap_server</server>
+            </ldap>
+        </my_user>
+    </users>
+</yandex>
+```
+
+Note, that now, once user `my_user` refers to `my_ldap_server`, this LDAP server must be configured in the main `config.xml` file as described previously.
+
+When SQL-driven Access Control and Account Management is enabled in ClickHouse, users that are identified by LDAP servers can also be created using queries.
+
+Example (execute in ClickHouse client):
+
+```sql
+CREATE USER my_user IDENTIFIED WITH ldap_server BY 'my_ldap_server'
+```
+
+## Exernal User Directory {#ldap-external-user-directory}
+
+A remote LDAP server can be used as a source of user definitions, in addition to the locally defined users. In order to achieve this, specify previously defined LDAP server name in `ldap` section inside `users_directories` section in main `config.xml` file.
+
+At each login attempt, ClickHouse will try to find the user definition locally and authenticate it as usual, but if the user is not defined, ClickHouse will assume it exists in the external LDAP directory, and will try to "bind" to the specified DN (see `bind_dn` sections in LDAP server config in `config.xml`) at the LDAP server using the provided credentials, and, if successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in `roles`. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then assigned to the user if `role_mapping` section is also configured. All this implies that the SQL-driven Access Control and Account Management is enabled in ClickHouse and roles are created using `CREATE ROLE ...` queries.
+
+Example (goes into `config.xml`):
+
+```xml
+<yandex>
+    <!- ... -->
+    <user_directories>
+        <!- ... -->
+        <ldap>
+            <server>my_ldap_server</server>
+            <roles>
+                <my_local_role1 />
+                <my_local_role2 />
+            </roles>
+            <role_mapping>
+                <base_dn>ou=groups,dc=example,dc=com</base_dn>
+                <scope>subtree</scope>
+                <search_filter>(&amp;(objectClass=groupOfNames)(member={bind_dn}))</search_filter>
+                <attribute>cn</attribute>
+                <prefix>clickhouse_</prefix>
+            </role_mapping>
+        </ldap>
+    </user_directories>
+</yandex>
+```
+
+Note, that now, once `my_ldap_server` is referred from `ldap` inside `user_directories` section, this LDAP server must be configured in the main `config.xml` file as described previously.
+
+Parameters:
+
+- `server` - one of LDAP server names defined in `ldap_servers` config section above. This parameter is mandatory and cannot be empty.
+- `roles` - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server.
+  - If no roles are specified here or assigned during role mapping (below), user will not be able to perform any actions after authentication.
+- `role_mapping` - section with LDAP search parameters and mapping rules.
+  - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` and the name of the logged in user. For each entry found during that search, the value of the specified attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the value becomes the name of a local role defined in ClickHouse, which is expected to be created beforehand by `CREATE ROLE ...` command.
+
+  - There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied.
+    - `base_dn` - template used to construct the base DN for the LDAP search.
+      - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}` substrings of the template with the actual user name and bind DN during each LDAP search.
+    - `scope` - scope of the LDAP search.
+      - Accepted values are: `base`, `one_level`, `children`, `subtree` (the default).
+    - `search_filter` - template used to construct the search filter for the LDAP search.
+      - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}`  substrings of the template with the actual user name, bind DN, and base DN during each LDAP search.
+      - Note, that the special characters must be escaped properly in XML.
+    - `attribute` - attribute name whose values will be returned by the LDAP search.
+    - `prefix` - prefix, that will be expected to be in front of each string in the original list of strings returned by the LDAP search. Prefix will be removed from the original strings and resulting strings will be treated as local role names. Empty, by default.
+
diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md
index d5343cce7be..c1a52e3b864 100644
--- a/docs/en/sql-reference/statements/create/user.md
+++ b/docs/en/sql-reference/statements/create/user.md
@@ -12,7 +12,7 @@ Syntax:
 ``` sql
 CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] 
         [, name2 [ON CLUSTER cluster_name2] ...]
-    [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}]
+    [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH|LDAP_SERVER}] BY {'password'|'hash'}]
     [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
     [DEFAULT ROLE role [,...]]
     [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
@@ -30,6 +30,7 @@ There are multiple ways of user identification:
 -   `IDENTIFIED WITH sha256_hash BY 'hash'`
 -   `IDENTIFIED WITH double_sha1_password BY 'qwerty'`
 -   `IDENTIFIED WITH double_sha1_hash BY 'hash'`
+-   `IDENTIFIED WITH ldap_server BY 'server'`
 
 ## User Host {#user-host}
 

From d57613aa188e38f70d386cc53cdf1eb51bd90f55 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 8 Feb 2021 16:50:03 +0300
Subject: [PATCH 0813/1238] Fix 0_o build in arcadia

---
 src/Coordination/InMemoryStateManager.h        | 2 +-
 src/Coordination/LoggerWrapper.h               | 2 +-
 src/Coordination/NuKeeperServer.h              | 2 +-
 src/Coordination/NuKeeperStateMachine.h        | 2 +-
 src/Coordination/ReadBufferFromNuraftBuffer.h  | 2 +-
 src/Coordination/SummingStateMachine.h         | 2 +-
 src/Coordination/WriteBufferFromNuraftBuffer.h | 2 +-
 src/Coordination/tests/gtest_for_build.cpp     | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/Coordination/InMemoryStateManager.h b/src/Coordination/InMemoryStateManager.h
index 32eea343465..7446073c9c9 100644
--- a/src/Coordination/InMemoryStateManager.h
+++ b/src/Coordination/InMemoryStateManager.h
@@ -3,7 +3,7 @@
 #include <Core/Types.h>
 #include <string>
 #include <Coordination/InMemoryLogStore.h>
-#include <libnuraft/nuraft.hxx>
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
 
 namespace DB
 {
diff --git a/src/Coordination/LoggerWrapper.h b/src/Coordination/LoggerWrapper.h
index fcc24edea14..c8da2372a91 100644
--- a/src/Coordination/LoggerWrapper.h
+++ b/src/Coordination/LoggerWrapper.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <libnuraft/nuraft.hxx>
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
 #include <common/logger_useful.h>
 
 namespace DB
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index 5646bbbd002..6fa2ae44ce2 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <libnuraft/nuraft.hxx>
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
 #include <Coordination/InMemoryLogStore.h>
 #include <Coordination/InMemoryStateManager.h>
 #include <Coordination/NuKeeperStateMachine.h>
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index 6dfb9ff4c3a..b12903b6929 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Coordination/NuKeeperStorage.h>
-#include <libnuraft/nuraft.hxx>
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
 #include <common/logger_useful.h>
 #include <Coordination/ThreadSafeQueue.h>
 
diff --git a/src/Coordination/ReadBufferFromNuraftBuffer.h b/src/Coordination/ReadBufferFromNuraftBuffer.h
index cc01d3c8f39..3817e217881 100644
--- a/src/Coordination/ReadBufferFromNuraftBuffer.h
+++ b/src/Coordination/ReadBufferFromNuraftBuffer.h
@@ -1,7 +1,7 @@
 #pragma once
 #include <IO/ReadBufferFromMemory.h>
 
-#include <libnuraft/nuraft.hxx>
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
 
 namespace DB
 {
diff --git a/src/Coordination/SummingStateMachine.h b/src/Coordination/SummingStateMachine.h
index 9aca02c6bdc..c8594ba7e8d 100644
--- a/src/Coordination/SummingStateMachine.h
+++ b/src/Coordination/SummingStateMachine.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <libnuraft/nuraft.hxx>
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
 #include <Core/Types.h>
 #include <atomic>
 #include <map>
diff --git a/src/Coordination/WriteBufferFromNuraftBuffer.h b/src/Coordination/WriteBufferFromNuraftBuffer.h
index 47a01fbc2a4..d037a0e6a27 100644
--- a/src/Coordination/WriteBufferFromNuraftBuffer.h
+++ b/src/Coordination/WriteBufferFromNuraftBuffer.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <IO/WriteBuffer.h>
-#include <libnuraft/nuraft.hxx>
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
 
 namespace DB
 {
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index baba7fc115e..82affd38062 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -20,7 +20,7 @@
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
 #include <Common/ZooKeeper/ZooKeeperIO.h>
 #include <Common/Exception.h>
-#include <libnuraft/nuraft.hxx>
+#include <libnuraft/nuraft.hxx> // Y_IGNORE
 #include <thread>
 
 
From 5ea35a9ff64121a22f853d49c70b0ef97c3eac90 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Mon, 8 Feb 2021 16:56:52 +0300
Subject: [PATCH 0814/1238] Update part_log.md

---
 docs/en/operations/system-tables/part_log.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md
index a85be5b210e..08269a2dc48 100644
--- a/docs/en/operations/system-tables/part_log.md
+++ b/docs/en/operations/system-tables/part_log.md
@@ -6,7 +6,7 @@ This table contains information about events that occurred with [data parts](../
 
 The `system.part_log` table contains the following columns:
 
--   `query_id` ([String](../../sql-reference/data-types/string.md)) — Id of the query to insert data parts.
+-   `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part.
 -   `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values:
     -   `NEW_PART` — Inserting of a new data part.
     -   `MERGE_PARTS` — Merging of data parts.

From b46e3e77bd89f651b73909a9d6c23b5248585016 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Mon, 8 Feb 2021 16:57:42 +0300
Subject: [PATCH 0815/1238] Update part_log.md

---
 docs/ru/operations/system-tables/part_log.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/system-tables/part_log.md b/docs/ru/operations/system-tables/part_log.md
index d214cc8f899..bba4fda6135 100644
--- a/docs/ru/operations/system-tables/part_log.md
+++ b/docs/ru/operations/system-tables/part_log.md
@@ -6,7 +6,7 @@
 
 Столбцы:
 
--   `query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса на вставку кусков данных.
+-   `query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса `INSERT`, создавшего этот кусок.
 -   `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип события. Столбец может содержать одно из следующих значений:
     -   `NEW_PART` — вставка нового куска.
     -   `MERGE_PARTS` — слияние кусков.

From 54b8950817771a11297ad692052ba9744f7f3f2c Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 8 Feb 2021 17:27:59 +0300
Subject: [PATCH 0816/1238] Update mongodb.md

---
 docs/ru/engines/table-engines/integrations/mongodb.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/ru/engines/table-engines/integrations/mongodb.md b/docs/ru/engines/table-engines/integrations/mongodb.md
index dd5fa03390d..65dc6fb6952 100644
--- a/docs/ru/engines/table-engines/integrations/mongodb.md
+++ b/docs/ru/engines/table-engines/integrations/mongodb.md
@@ -5,7 +5,7 @@ toc_title: MongoDB
 
 # MongoDB {#mongodb}
 
-Движок MongoDB работает только на чтение данных, он поддерживает запросы `SELECT` над данными, хранящимися на серверах MongoDB. Движок MongoDB поддерживает только плоские типы данных (простые, не вложенные).
+Движок таблиц MongoDB позволяет читать данные из коллекций СУБД MongoDB. В таблицах допустимы только плоские (не вложенные) типы данных. Запись (`INSERT`-запросы) не поддерживаются.
 
 ## Создание таблицы {#creating-a-table}
 
@@ -24,7 +24,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
 
 -   `database` — имя базы данных на удалённом сервере.
 
--   `table` — имя таблицы на удалённом сервере.
+-   `collection` — имя коллекции на удалённом сервере.
 
 -   `user` — пользователь MongoDB.
 
@@ -32,14 +32,14 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
 
 ## Примеры использования {#usage-example}
 
-Таблица в ClickHouse, которая получает данные из таблицы MongoDB:
+Таблица в ClickHouse для чтения данных из колекции MongoDB:
 
 ``` text
 CREATE TABLE mongo_table
 (
     key UInt64, 
     data String
-) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'test', 'clickhouse');
+) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
 ```
 
 Запрос к таблице:

From f34bec4be4229e06698af31af7119a8b7a8ad276 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 8 Feb 2021 17:30:15 +0300
Subject: [PATCH 0817/1238] Update mongodb.md

---
 docs/en/engines/table-engines/integrations/mongodb.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md
index 2f1ae394521..e648a13b5e0 100644
--- a/docs/en/engines/table-engines/integrations/mongodb.md
+++ b/docs/en/engines/table-engines/integrations/mongodb.md
@@ -5,7 +5,7 @@ toc_title: MongoDB
 
 # MongoDB {#mongodb}
 
-MongoDB engine is read-only, it allows to perform `SELECT` queries on data, stored on a remote MongoDB server. MongoDB engine supports onle flat fields (primitive, not nested types).
+MongoDB engine is read-only table engine which allows to read data (`SELECT` queries) from remote MongoDB collection. Engine supports only non-nested data types. `INSERT` queries are not supported.
 
 ## Creating a Table {#creating-a-table}
 
@@ -24,7 +24,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
 
 -   `database` — Remote database name.
 
--   `table` — Remote table name.
+-   `collection` — Remote collection name.
 
 -   `user` — MongoDB user.
 
@@ -32,14 +32,14 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
 
 ## Usage Example {#usage-example}
 
-Table in ClickHouse, retrieving data from the MongoDB table:
+Table in ClickHouse which allows to read data from MongoDB collection:
 
 ``` text
 CREATE TABLE mongo_table
 (
     key UInt64, 
     data String
-) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'test', 'clickhouse');
+) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
 ```
 
 Query:

From a4cbe59b6562a263935c67cb7202326846274aea Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 8 Feb 2021 17:30:59 +0300
Subject: [PATCH 0818/1238] Update mongodb.md

---
 docs/ru/engines/table-engines/integrations/mongodb.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/engines/table-engines/integrations/mongodb.md b/docs/ru/engines/table-engines/integrations/mongodb.md
index 65dc6fb6952..0765b3909de 100644
--- a/docs/ru/engines/table-engines/integrations/mongodb.md
+++ b/docs/ru/engines/table-engines/integrations/mongodb.md
@@ -5,7 +5,7 @@ toc_title: MongoDB
 
 # MongoDB {#mongodb}
 
-Движок таблиц MongoDB позволяет читать данные из коллекций СУБД MongoDB. В таблицах допустимы только плоские (не вложенные) типы данных. Запись (`INSERT`-запросы) не поддерживаются.
+Движок таблиц MongoDB позволяет читать данные из коллекций СУБД MongoDB. В таблицах допустимы только плоские (не вложенные) типы данных. Запись (`INSERT`-запросы) не поддерживается.
 
 ## Создание таблицы {#creating-a-table}
 

From f2feeb9b192d6d9444d09822a37c9fab103fbc91 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 8 Feb 2021 18:20:12 +0300
Subject: [PATCH 0819/1238] Missing fix

---
 src/Coordination/InMemoryLogStore.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Coordination/InMemoryLogStore.h b/src/Coordination/InMemoryLogStore.h
index 37f76f056ba..425b056a81d 100644
--- a/src/Coordination/InMemoryLogStore.h
+++ b/src/Coordination/InMemoryLogStore.h
@@ -4,7 +4,7 @@
 #include <map>
 #include <mutex>
 #include <Core/Types.h>
-#include <libnuraft/log_store.hxx>
+#include <libnuraft/log_store.hxx> // Y_IGNORE
 
 namespace DB
 {

From 3eb58ba267ffe4532cd4811d39b8c4a813bf4bda Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Mon, 8 Feb 2021 18:20:34 +0300
Subject: [PATCH 0820/1238] Update argmin.md

---
 docs/en/sql-reference/aggregate-functions/reference/argmin.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
index 0c5b39aebe3..2fe9a313260 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
@@ -33,7 +33,7 @@ Type: matches `arg` type.
 
 For tuple in the input:
 
--   Tuple with minimum `val` value.
+-   Tuple `(arg, val)`, where `val` is the minimum value and `arg` is a corresponding value.
 
 Type: [Tuple](../../../sql-reference/data-types/tuple.md).
 

From f30e8a164c19bed39dff53b262af2765c0a74610 Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Mon, 8 Feb 2021 18:20:51 +0300
Subject: [PATCH 0821/1238] Update
 docs/en/sql-reference/aggregate-functions/reference/argmax.md

Co-authored-by: tavplubix <tavplubix@gmail.com>
---
 docs/en/sql-reference/aggregate-functions/reference/argmax.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
index 8412e00458f..9899c731ce9 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
@@ -33,7 +33,7 @@ Type: matches `arg` type.
 
 For tuple in the input:
 
--   Tuple with maximum `val` value.
+-   Tuple `(arg, val)`, where `val` is the maximum value and `arg` is a corresponding value.
 
 Type: [Tuple](../../../sql-reference/data-types/tuple.md).
 

From 22f9c821178435e48d3253eb568134b39e568827 Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Mon, 8 Feb 2021 18:23:58 +0300
Subject: [PATCH 0822/1238] Update argmax.md

---
 docs/ru/sql-reference/aggregate-functions/reference/argmax.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
index 35167512d36..f44e65831a9 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
@@ -33,7 +33,7 @@ argMax(tuple(arg, val))
 
 Если передан кортеж:
 
--   Кортеж с максимальным значением `val`.
+-   Кортеж `(arg, val)` c максимальным значением `val` и соответствующим ему `arg`.
 
 Тип: [Tuple](../../../sql-reference/data-types/tuple.md).
 

From 79a83c3438291e0f8aacf7103f4ef07942422e36 Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Mon, 8 Feb 2021 18:25:36 +0300
Subject: [PATCH 0823/1238] Update argmin.md

---
 docs/ru/sql-reference/aggregate-functions/reference/argmin.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
index 63b7d5c8200..8c25b79f92a 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
@@ -33,7 +33,7 @@ argMin(tuple(arg, val))
 
 Если передан кортеж:
 
--   Кортеж с минимальным значением `val`.
+-   Кортеж `(arg, val)` c минимальным значением `val` и соответствующим ему `arg`.
 
 Тип: [Tuple](../../../sql-reference/data-types/tuple.md).
 

From c98baf3496ffa06918c5080b39e4cf33512b0eeb Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 13 Jan 2021 11:03:07 -0800
Subject: [PATCH 0824/1238] PartLog - add field event_time_microseconds column

---
 src/Interpreters/PartLog.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp
index bf63b6f49c7..cde57dba7c6 100644
--- a/src/Interpreters/PartLog.cpp
+++ b/src/Interpreters/PartLog.cpp
@@ -4,6 +4,7 @@
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeEnum.h>
@@ -37,6 +38,7 @@ Block PartLogElement::createBlock()
         {ColumnInt8::create(),   std::move(event_type_datatype),       "event_type"},
         {ColumnUInt16::create(), std::make_shared<DataTypeDate>(),     "event_date"},
         {ColumnUInt32::create(), std::make_shared<DataTypeDateTime>(), "event_time"},
+        {ColumnUInt64::create(), std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds"},
         {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "duration_ms"},
 
         {ColumnString::create(), std::make_shared<DataTypeString>(),   "database"},

From f1d2804bb0ffd4b0cd037ae3ddafd31805ecc461 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 13 Jan 2021 11:26:06 -0800
Subject: [PATCH 0825/1238] PartLog - populate event_time s & us from same
 timespec

---
 src/Interpreters/PartLog.cpp | 25 +++++++++++++++++++++----
 src/Interpreters/PartLog.h   |  1 +
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp
index cde57dba7c6..1be1aa07a29 100644
--- a/src/Interpreters/PartLog.cpp
+++ b/src/Interpreters/PartLog.cpp
@@ -99,13 +99,25 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const
 }
 
 
-bool PartLog::addNewPart(Context & current_context, const MutableDataPartPtr & part, UInt64 elapsed_ns, const ExecutionStatus & execution_status)
+bool PartLog::addNewPart(
+    Context & current_context, const MutableDataPartPtr & part, UInt64 elapsed_ns, const ExecutionStatus & execution_status)
 {
     return addNewParts(current_context, {part}, elapsed_ns, execution_status);
 }
 
-bool PartLog::addNewParts(Context & current_context, const PartLog::MutableDataPartsVector & parts, UInt64 elapsed_ns,
-                          const ExecutionStatus & execution_status)
+inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
+{
+    return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
+}
+
+
+inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
+{
+    return std::chrono::duration_cast<std::chrono::seconds>(timepoint.time_since_epoch()).count();
+}
+
+bool PartLog::addNewParts(
+    Context & current_context, const PartLog::MutableDataPartsVector & parts, UInt64 elapsed_ns, const ExecutionStatus & execution_status)
 {
     if (parts.empty())
         return true;
@@ -129,7 +141,12 @@ bool PartLog::addNewParts(Context & current_context, const PartLog::MutableDataP
                 elem.query_id.insert(0, query_id.data, query_id.size);
 
             elem.event_type = PartLogElement::NEW_PART;
-            elem.event_time = time(nullptr);
+
+            // construct event_time and event_time_microseconds using the same time point
+            // so that the two times will always be equal up to a precision of a second.
+            const auto time_now = std::chrono::system_clock::now();
+            elem.event_time = time_in_seconds(time_now);
+            elem.event_time_microseconds = time_in_microseconds(time_now);
             elem.duration_ms = elapsed_ns / 1000000;
 
             elem.database_name = table_id.database_name;
diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h
index a06d28f1f12..9744deb71d0 100644
--- a/src/Interpreters/PartLog.h
+++ b/src/Interpreters/PartLog.h
@@ -23,6 +23,7 @@ struct PartLogElement
     Type event_type = NEW_PART;
 
     time_t event_time = 0;
+    UInt64 event_time_microseconds = 0;
     UInt64 duration_ms = 0;
 
     String database_name;

From 19cf5ca6c0dd95b46a26c80dfbd5402004a9cda4 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 3 Feb 2021 00:14:46 -0800
Subject: [PATCH 0826/1238] PartLog - try fix event_time_microseconds column

---
 src/Interpreters/PartLog.cpp | 7 ++++++-
 src/Interpreters/PartLog.h   | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp
index 1be1aa07a29..1ef368b252d 100644
--- a/src/Interpreters/PartLog.cpp
+++ b/src/Interpreters/PartLog.cpp
@@ -31,14 +31,18 @@ Block PartLogElement::createBlock()
             {"MovePart",      static_cast<Int8>(MOVE_PART)},
         }
     );
+    ColumnsWithTypeAndName columns_with_type_and_name;
 
     return
     {
+
         {ColumnString::create(), std::make_shared<DataTypeString>(),   "query_id"},
         {ColumnInt8::create(),   std::move(event_type_datatype),       "event_type"},
         {ColumnUInt16::create(), std::make_shared<DataTypeDate>(),     "event_date"},
+
         {ColumnUInt32::create(), std::make_shared<DataTypeDateTime>(), "event_time"},
-        {ColumnUInt64::create(), std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds"},
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime64>(6),"event_time_microseconds"),
+
         {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "duration_ms"},
 
         {ColumnString::create(), std::make_shared<DataTypeString>(),   "database"},
@@ -71,6 +75,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const
     columns[i++]->insert(event_type);
     columns[i++]->insert(DateLUT::instance().toDayNum(event_time));
     columns[i++]->insert(event_time);
+    columns[i++]->insert(event_time_microseconds);
     columns[i++]->insert(duration_ms);
 
     columns[i++]->insert(database_name);
diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h
index 9744deb71d0..c946d6ce85f 100644
--- a/src/Interpreters/PartLog.h
+++ b/src/Interpreters/PartLog.h
@@ -23,7 +23,7 @@ struct PartLogElement
     Type event_type = NEW_PART;
 
     time_t event_time = 0;
-    UInt64 event_time_microseconds = 0;
+    Decimal64 event_time_microseconds = 0;
     UInt64 duration_ms = 0;
 
     String database_name;

From aa7f5cfe79e70d00f039b60f20f11bd7ffca7be7 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 3 Feb 2021 00:28:44 -0800
Subject: [PATCH 0827/1238] PartLog - use ColumnsWithTypeAndName for all
 columns

---
 src/Interpreters/PartLog.cpp | 40 +++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp
index 1ef368b252d..0f2922b37f0 100644
--- a/src/Interpreters/PartLog.cpp
+++ b/src/Interpreters/PartLog.cpp
@@ -1,6 +1,4 @@
 #include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnArray.h>
-#include <Columns/ColumnString.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDateTime.h>
@@ -36,34 +34,34 @@ Block PartLogElement::createBlock()
     return
     {
 
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "query_id"},
-        {ColumnInt8::create(),   std::move(event_type_datatype),       "event_type"},
-        {ColumnUInt16::create(), std::make_shared<DataTypeDate>(),     "event_date"},
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(),   "query_id"),
+        columns_with_type_and_name.emplace_back(  std::move(event_type_datatype),       "event_type"),
+        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeDate>(),     "event_date"),
 
-        {ColumnUInt32::create(), std::make_shared<DataTypeDateTime>(), "event_time"},
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time"),
         columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime64>(6),"event_time_microseconds"),
 
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "duration_ms"},
+        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeUInt64>(),   "duration_ms"),
 
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "database"},
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "table"},
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "part_name"},
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "partition_id"},
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "path_on_disk"},
+        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeString>(),   "database"),
+        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeString>(),   "table"),
+        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeString>(),   "part_name"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(),   "partition_id"),
+        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeString>(),   "path_on_disk"),
 
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "rows"},
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "size_in_bytes"}, // On disk
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(),   "rows"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(),   "size_in_bytes"), // On disk
 
         /// Merge-specific info
-        {ColumnArray::create(ColumnString::create()), std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "merged_from"},
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "bytes_uncompressed"}, // Result bytes
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "read_rows"},
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "read_bytes"},
-        {ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(),   "peak_memory_usage"},
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "merged_from"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(),   "bytes_uncompressed"), // Result bytes
+        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeUInt64>(),   "read_rows"),
+        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeUInt64>(),   "read_bytes"),
+        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeUInt64>(),   "peak_memory_usage"),
 
         /// Is there an error during the execution or commit
-        {ColumnUInt16::create(), std::make_shared<DataTypeUInt16>(),   "error"},
-        {ColumnString::create(), std::make_shared<DataTypeString>(),   "exception"},
+        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeUInt16>(),   "error"),
+        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeString>(),   "exception"),
     };
 }
 

From 46c843b0045064fea0ebd9c1a09d89b5cf9563d4 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 3 Feb 2021 00:29:34 -0800
Subject: [PATCH 0828/1238] PartLog - fix formatting

---
 src/Interpreters/PartLog.cpp | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp
index 0f2922b37f0..4c18ae449ab 100644
--- a/src/Interpreters/PartLog.cpp
+++ b/src/Interpreters/PartLog.cpp
@@ -29,25 +29,25 @@ Block PartLogElement::createBlock()
             {"MovePart",      static_cast<Int8>(MOVE_PART)},
         }
     );
+
     ColumnsWithTypeAndName columns_with_type_and_name;
 
-    return
-    {
+    return {
 
-        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(),   "query_id"),
-        columns_with_type_and_name.emplace_back(  std::move(event_type_datatype),       "event_type"),
-        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeDate>(),     "event_date"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "query_id"),
+        columns_with_type_and_name.emplace_back(std::move(event_type_datatype), "event_type"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDate>(), "event_date"),
 
         columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time"),
-        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime64>(6),"event_time_microseconds"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds"),
 
-        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeUInt64>(),   "duration_ms"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "duration_ms"),
 
-        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeString>(),   "database"),
-        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeString>(),   "table"),
-        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeString>(),   "part_name"),
-        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(),   "partition_id"),
-        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeString>(),   "path_on_disk"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "database"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "table"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "part_name"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "partition_id"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "path_on_disk"),
 
         columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(),   "rows"),
         columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(),   "size_in_bytes"), // On disk

From cb65515dc77d8cd21546ccbb5c7ef5eac799bc44 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 3 Feb 2021 06:55:52 -0800
Subject: [PATCH 0829/1238] PartLog - fix style checks

---
 src/Interpreters/PartLog.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp
index 4c18ae449ab..860666a0035 100644
--- a/src/Interpreters/PartLog.cpp
+++ b/src/Interpreters/PartLog.cpp
@@ -49,19 +49,19 @@ Block PartLogElement::createBlock()
         columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "partition_id"),
         columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "path_on_disk"),
 
-        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(),   "rows"),
-        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(),   "size_in_bytes"), // On disk
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "rows"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "size_in_bytes"), // On disk
 
         /// Merge-specific info
         columns_with_type_and_name.emplace_back(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "merged_from"),
-        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(),   "bytes_uncompressed"), // Result bytes
-        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeUInt64>(),   "read_rows"),
-        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeUInt64>(),   "read_bytes"),
-        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeUInt64>(),   "peak_memory_usage"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "bytes_uncompressed"), // Result bytes
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "read_rows"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "read_bytes"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "peak_memory_usage"),
 
         /// Is there an error during the execution or commit
-        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeUInt16>(),   "error"),
-        columns_with_type_and_name.emplace_back( std::make_shared<DataTypeString>(),   "exception"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt16>(), "error"),
+        columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "exception"),
     };
 }
 

From 60c8dfa22c061e95df3a84f4dbc37053171d3053 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 3 Feb 2021 07:03:40 -0800
Subject: [PATCH 0830/1238] PartLog - populate event_time_microseconds for
 MergeParts

---
 src/Storages/MergeTree/MergeTreeData.cpp | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 7cdf4f7b9cd..40bd11428c7 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3675,6 +3675,17 @@ bool MergeTreeData::canReplacePartition(const DataPartPtr & src_part) const
     return true;
 }
 
+inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
+{
+    return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
+}
+
+
+inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
+{
+    return std::chrono::duration_cast<std::chrono::seconds>(timepoint.time_since_epoch()).count();
+}
+
 void MergeTreeData::writePartLog(
     PartLogElement::Type type,
     const ExecutionStatus & execution_status,
@@ -3697,7 +3708,12 @@ try
     part_log_elem.error = static_cast<UInt16>(execution_status.code);
     part_log_elem.exception = execution_status.message;
 
-    part_log_elem.event_time = time(nullptr);
+    // construct event_time and event_time_microseconds using the same time point
+    // so that the two times will always be equal up to a precision of a second.
+    const auto time_now = std::chrono::system_clock::now();
+    part_log_elem.event_time = time_in_seconds(time_now);
+    part_log_elem.event_time_microseconds = time_in_microseconds(time_now);
+
     /// TODO: Stop stopwatch in outer code to exclude ZK timings and so on
     part_log_elem.duration_ms = elapsed_ns / 1000000;
 

From 072e0802b58d64a9b79a57546c5e556144b85c80 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 3 Feb 2021 07:20:51 -0800
Subject: [PATCH 0831/1238] PartLog - Docs

---
 docs/en/operations/system-tables/part_log.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md
index 08269a2dc48..579fdaefb0a 100644
--- a/docs/en/operations/system-tables/part_log.md
+++ b/docs/en/operations/system-tables/part_log.md
@@ -16,6 +16,8 @@ The `system.part_log` table contains the following columns:
     -   `MOVE_PART` — Moving the data part from the one disk to another one.
 -   `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
 -   `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
+-   `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds precision.
+
 -   `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration.
 -   `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database the data part is in.
 -   `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table the data part is in.
@@ -47,6 +49,7 @@ query_id:                      983ad9c7-28d5-4ae1-844e-603116b7de31
 event_type:                    NewPart
 event_date:                    2021-02-02
 event_time:                    2021-02-02 11:14:28
+event_time_microseconds:                    2021-02-02 11:14:28.861919
 duration_ms:                   35
 database:                      default
 table:                         log_mt_2

From 6aa86fb533fb7516c8f0936b6333b0c898238bfb Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 3 Feb 2021 08:12:10 -0800
Subject: [PATCH 0832/1238] PartLog - add tests for event_time_microseconds
 field

---
 ...event_time_microseconds_part_log.reference |  1 +
 ...01686_event_time_microseconds_part_log.sql | 31 +++++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 tests/queries/0_stateless/01686_event_time_microseconds_part_log.reference
 create mode 100644 tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql

diff --git a/tests/queries/0_stateless/01686_event_time_microseconds_part_log.reference b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.reference
new file mode 100644
index 00000000000..9766475a418
--- /dev/null
+++ b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.reference
@@ -0,0 +1 @@
+ok
diff --git a/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql
new file mode 100644
index 00000000000..0eb57c949f3
--- /dev/null
+++ b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql
@@ -0,0 +1,31 @@
+DROP TABLE IF EXISTS table_with_single_pk;
+
+CREATE TABLE table_with_single_pk
+(
+  key UInt8,
+  value String
+)
+ENGINE = MergeTree
+ORDER BY key;
+
+INSERT INTO table_with_single_pk SELECT number, toString(number % 10) FROM numbers(10000000);
+
+SYSTEM FLUSH LOGS;
+
+WITH (
+        (
+            SELECT event_time_microseconds
+            FROM system.part_log
+            ORDER BY event_time DESC
+            LIMIT 1
+        ) AS time_with_microseconds,
+        (
+            SELECT event_time
+            FROM system.part_log
+            ORDER BY event_time DESC
+            LIMIT 1
+        ) AS time
+    )
+SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(time)) = 0, 'ok', 'fail');
+
+DROP TABLE IF EXISTS table_with_single_pk;

From 14fe627d325f0aa44841f0acd2d853050579cefe Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Mon, 8 Feb 2021 07:37:47 -0800
Subject: [PATCH 0833/1238] Tests - accept review suggestion

---
 ...01686_event_time_microseconds_part_log.sql | 20 ++++++-------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql
index 0eb57c949f3..a1b419527d4 100644
--- a/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql
+++ b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql
@@ -13,19 +13,11 @@ INSERT INTO table_with_single_pk SELECT number, toString(number % 10) FROM numbe
 SYSTEM FLUSH LOGS;
 
 WITH (
-        (
-            SELECT event_time_microseconds
-            FROM system.part_log
-            ORDER BY event_time DESC
-            LIMIT 1
-        ) AS time_with_microseconds,
-        (
-            SELECT event_time
-            FROM system.part_log
-            ORDER BY event_time DESC
-            LIMIT 1
-        ) AS time
-    )
-SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(time)) = 0, 'ok', 'fail');
+         SELECT (event_time, event_time_microseconds)
+         FROM system.part_log
+         ORDER BY event_time DESC
+         LIMIT 1
+    ) AS time
+SELECT if(dateDiff('second', toDateTime(time.2), toDateTime(time.1)) = 0, 'ok', 'fail');
 
 DROP TABLE IF EXISTS table_with_single_pk;

From 00bb0e6b35906fa994e2e60cf2323ab7b1cd93de Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Mon, 8 Feb 2021 18:46:48 +0300
Subject: [PATCH 0834/1238] Skip send_crash_reports test with TSAN

---
 tests/integration/test_send_crash_reports/test.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_send_crash_reports/test.py b/tests/integration/test_send_crash_reports/test.py
index a9b141ebfd3..e22cc9681a6 100644
--- a/tests/integration/test_send_crash_reports/test.py
+++ b/tests/integration/test_send_crash_reports/test.py
@@ -24,14 +24,17 @@ def started_node():
 
 
 def test_send_segfault(started_node, ):
+    if started_node.is_built_with_thread_sanitizer():
+        pytest.skip("doesn't fit in timeouts for stacktrace generation")
+
     started_node.copy_file_to_container(os.path.join(SCRIPT_DIR, "fake_sentry_server.py"), "/fake_sentry_server.py")
     started_node.exec_in_container(["bash", "-c", "python3 /fake_sentry_server.py > /fake_sentry_server.log 2>&1"], detach=True, user="root")
-    time.sleep(1)
+    time.sleep(0.5)
     started_node.exec_in_container(["bash", "-c", "pkill -11 clickhouse"], user="root")
 
     result = None
     for attempt in range(1, 6):
-        time.sleep(attempt)
+        time.sleep(0.25 * attempt)
         result = started_node.exec_in_container(['cat', fake_sentry_server.RESULT_PATH], user='root')
         if result == 'OK':
             break

From ddbcfa8b4d3bc6fbedf76c52761ff5cc2cbe4182 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Mon, 8 Feb 2021 07:48:50 -0800
Subject: [PATCH 0835/1238] Tests - add to arcadia skip list

---
 tests/queries/0_stateless/arcadia_skip_list.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 160f10eb413..13fe2a31c28 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -202,3 +202,4 @@
 01674_executable_dictionary_implicit_key
 01686_rocksdb
 01683_dist_INSERT_block_structure_mismatch
+01686_event_time_microseconds_part_log

From 46f2b4063f7033573977d57a554783fa9c9b5c9c Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Tue, 9 Feb 2021 00:25:24 +0800
Subject: [PATCH 0836/1238] Fix CTE in INSERT-SELECT

---
 src/Interpreters/InterpreterSelectQuery.cpp                | 2 +-
 src/Interpreters/SelectQueryOptions.h                      | 2 ++
 tests/queries/0_stateless/01711_cte_subquery_fix.reference | 0
 tests/queries/0_stateless/01711_cte_subquery_fix.sql       | 2 ++
 4 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01711_cte_subquery_fix.reference
 create mode 100644 tests/queries/0_stateless/01711_cte_subquery_fix.sql

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 4b89273cd86..1158007d957 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -294,7 +294,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     }
 
     // Only propagate WITH elements to subqueries if we're not a subquery
-    if (options.subquery_depth == 0)
+    if (!options.is_subquery)
     {
         if (context->getSettingsRef().enable_global_with_statement)
             ApplyWithAliasVisitor().visit(query_ptr);
diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h
index 611c2b1601e..124b5a6daa0 100644
--- a/src/Interpreters/SelectQueryOptions.h
+++ b/src/Interpreters/SelectQueryOptions.h
@@ -33,6 +33,7 @@ struct SelectQueryOptions
     bool ignore_quota = false;
     bool ignore_limits = false;
     bool is_internal = false;
+    bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select
 
     SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0)
         : to_stage(stage), subquery_depth(depth)
@@ -46,6 +47,7 @@ struct SelectQueryOptions
         SelectQueryOptions out = *this;
         out.to_stage = QueryProcessingStage::Complete;
         ++out.subquery_depth;
+        out.is_subquery = true;
         return out;
     }
 
diff --git a/tests/queries/0_stateless/01711_cte_subquery_fix.reference b/tests/queries/0_stateless/01711_cte_subquery_fix.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01711_cte_subquery_fix.sql b/tests/queries/0_stateless/01711_cte_subquery_fix.sql
new file mode 100644
index 00000000000..19eed6cc612
--- /dev/null
+++ b/tests/queries/0_stateless/01711_cte_subquery_fix.sql
@@ -0,0 +1,2 @@
+create or replace table t engine = Memory as with cte as (select * from numbers(10)) select * from cte;
+drop table t;

From a8f37e7704d96058af56b5160a235cac74dad3e8 Mon Sep 17 00:00:00 2001
From: Nicolae Vartolomei <nv@nvartolomei.com>
Date: Mon, 8 Feb 2021 17:19:35 +0000
Subject: [PATCH 0837/1238] Remove dead code

---
 src/Storages/MergeTree/MergeTreeData.cpp | 12 ------------
 src/Storages/MergeTree/MergeTreeData.h   |  3 ---
 2 files changed, 15 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 7cdf4f7b9cd..0c6bcd3f6fd 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3754,18 +3754,6 @@ MergeTreeData::CurrentlyMovingPartsTagger::~CurrentlyMovingPartsTagger()
     }
 }
 
-bool MergeTreeData::selectPartsAndMove()
-{
-    if (parts_mover.moves_blocker.isCancelled())
-        return false;
-
-    auto moving_tagger = selectPartsForMove();
-    if (moving_tagger->parts_to_move.empty())
-        return false;
-
-    return moveParts(std::move(moving_tagger));
-}
-
 std::optional<JobAndPool> MergeTreeData::getDataMovingJob()
 {
     if (parts_mover.moves_blocker.isCancelled())
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 425dcbfb316..a432ec46d71 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -916,9 +916,6 @@ protected:
     /// Moves part to specified space, used in ALTER ... MOVE ... queries
     bool movePartsToSpace(const DataPartsVector & parts, SpacePtr space);
 
-    /// Selects parts for move and moves them, used in background process
-    bool selectPartsAndMove();
-
 
 private:
     /// RAII Wrapper for atomic work with currently moving parts

From e312ef72281dc5b034343d0ff33035fbf1a7a7ef Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 12:29:45 -0500
Subject: [PATCH 0838/1238] Updating LDAP docs.

---
 .../external-authenticators/index.md          | 12 ++++-
 .../external-authenticators/ldap.md           | 53 ++++++++++++-------
 2 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md
index 10c2ea91eb9..f06c1de8ec7 100644
--- a/docs/en/operations/external-authenticators/index.md
+++ b/docs/en/operations/external-authenticators/index.md
@@ -6,4 +6,14 @@ toc_title: Introduction
 
 # External User Authenticators and Directories {#external-authenticators}
 
-ClickHouse supports authenticating and managing users using external services such as [LDAP](#external-authenticators-ldap).
+ClickHouse supports authenticating and managing users using external services.
+
+The following external authenticators and directories are supported.
+
+## External Authenticators
+
+- [LDAP](#ldap-external-authenticator)
+
+## External User Directories
+
+- [LDAP](#ldap-external-user-directory)
diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md
index fd5f2e578ce..7ad1fd68b74 100644
--- a/docs/en/operations/external-authenticators/ldap.md
+++ b/docs/en/operations/external-authenticators/ldap.md
@@ -41,8 +41,11 @@ Parameters:
 - `host` - LDAP server hostname or IP, this parameter is mandatory and cannot be empty.
 - `port` - LDAP server port, default is `636` if `enable_tls` is set to `true`, `389` otherwise.
 - `bind_dn` - template used to construct the DN to bind to.
-  - The resulting DN will be constructed by replacing all `{user_name}` substrings of the template with the actual user name during each authentication attempt.
-- `verification_cooldown` - a period of time, in seconds, after a successful bind attempt, during which the user will be assumed to be successfully authenticated for all consecutive requests without contacting the LDAP server.
+  - The resulting DN will be constructed by replacing all `{user_name}` substrings of the
+    template with the actual user name during each authentication attempt.
+- `verification_cooldown` - a period of time, in seconds, after a successful bind attempt,
+  during which the user will be assumed to be successfully authenticated for all consecutive
+  requests without contacting the LDAP server.
   - Specify `0` (the default) to disable caching and force contacting the LDAP server for each authentication request.
 - `enable_tls` - flag to trigger use of secure connection to the LDAP server.
   - Specify `no` for plain text `ldap://` protocol (not recommended).
@@ -58,13 +61,14 @@ Parameters:
 - `tls_ca_cert_dir` - path to the directory containing CA certificates.
 - `tls_cipher_suite` - allowed cipher suite (in OpenSSL notation).
 
-## External Authenticator {#ldap-external-authenticator}
+## Using LDAP As External Authenticator {#ldap-external-authenticator}
 
-A remote LDAP server can be used as a method for verifying the passwords for locally defined users (users defined in `users.xml` or in local access control paths). In order to achieve this, specify previously defined LDAP server name instead of `password` or similar sections in the user definition.
+A remote LDAP server can be used as a method for verifying passwords for locally defined users (users defined in `users.xml` or in local access control paths). In order to achieve this, specify previously defined LDAP server name instead of `password` or similar sections in the user definition.
 
-At each login attempt, ClickHouse will try to "bind" to the specified DN (see `bind_dn` sections in LDAP server config in `config.xml`) at the LDAP server using the provided credentials, and, if successful, the user will be considered authenticated. This is often called "simple bind" method.
+At each login attempt, ClickHouse will try to "bind" to the specified DN defined by the `bind_dn` parameter
+in the [LDAP server definition](#ldap-server-definition) using the provided credentials, and if successful, the user will be considered authenticated. This is often called a "simple bind" method.
 
-Example (goes into `users.xml`):
+For example,
 
 ```xml
 <yandex>
@@ -81,21 +85,20 @@ Example (goes into `users.xml`):
 </yandex>
 ```
 
-Note, that now, once user `my_user` refers to `my_ldap_server`, this LDAP server must be configured in the main `config.xml` file as described previously.
+Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously.
 
-When SQL-driven Access Control and Account Management is enabled in ClickHouse, users that are identified by LDAP servers can also be created using queries.
+When SQL-driven [Access Control and Account Management](#access-control) is enabled in ClickHouse, users that are identified by LDAP servers can also be created using the [CRATE USER](#create-user-statement) statement.
 
-Example (execute in ClickHouse client):
 
 ```sql
 CREATE USER my_user IDENTIFIED WITH ldap_server BY 'my_ldap_server'
 ```
 
-## Exernal User Directory {#ldap-external-user-directory}
+## Using LDAP As Exernal User Directory {#ldap-external-user-directory}
 
-A remote LDAP server can be used as a source of user definitions, in addition to the locally defined users. In order to achieve this, specify previously defined LDAP server name in `ldap` section inside `users_directories` section in main `config.xml` file.
+In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. In order to achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section in of the `config.xml` file.
 
-At each login attempt, ClickHouse will try to find the user definition locally and authenticate it as usual, but if the user is not defined, ClickHouse will assume it exists in the external LDAP directory, and will try to "bind" to the specified DN (see `bind_dn` sections in LDAP server config in `config.xml`) at the LDAP server using the provided credentials, and, if successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in `roles`. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then assigned to the user if `role_mapping` section is also configured. All this implies that the SQL-driven Access Control and Account Management is enabled in ClickHouse and roles are created using `CREATE ROLE ...` queries.
+At each login attempt, ClickHouse will try to find the user definition locally and authenticate it as usual, but if the user is not defined, ClickHouse will assume it exists in the external LDAP directory, and will try to "bind" to the specified DN at the LDAP server using the provided credentials, and if successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](#access-control) is enabled in ClickHouse and roles are created using the [CREATE ROLE](#create-role-statement) statement.
 
 Example (goes into `config.xml`):
 
@@ -122,24 +125,34 @@ Example (goes into `config.xml`):
 </yandex>
 ```
 
-Note, that now, once `my_ldap_server` is referred from `ldap` inside `user_directories` section, this LDAP server must be configured in the main `config.xml` file as described previously.
+Note that `my_ldap_server` referred in the `ldap` section inside the `user_directories` section must be a previously
+defined LDAP server that is configured in the `config.xml` (see [LDAP Server Definition](#ldap-server-definition)).
 
 Parameters:
 
-- `server` - one of LDAP server names defined in `ldap_servers` config section above. This parameter is mandatory and cannot be empty.
+- `server` - one of LDAP server names defined in `ldap_servers` config section above.
+  This parameter is mandatory and cannot be empty.
 - `roles` - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server.
-  - If no roles are specified here or assigned during role mapping (below), user will not be able to perform any actions after authentication.
+  - If no roles are specified here or assigned during role mapping (below), user will not be able
+    to perform any actions after authentication.
 - `role_mapping` - section with LDAP search parameters and mapping rules.
-  - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` and the name of the logged in user. For each entry found during that search, the value of the specified attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the value becomes the name of a local role defined in ClickHouse, which is expected to be created beforehand by `CREATE ROLE ...` command.
-
+  - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter`
+    and the name of the logged in user. For each entry found during that search, the value of the specified
+    attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed,
+    and the rest of the value becomes the name of a local role defined in ClickHouse,
+    which is expected to be created beforehand by the [CREATE ROLE](#create-role-statement) statement.
   - There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied.
     - `base_dn` - template used to construct the base DN for the LDAP search.
-      - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}` substrings of the template with the actual user name and bind DN during each LDAP search.
+      - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}`
+        substrings of the template with the actual user name and bind DN during each LDAP search.
     - `scope` - scope of the LDAP search.
       - Accepted values are: `base`, `one_level`, `children`, `subtree` (the default).
     - `search_filter` - template used to construct the search filter for the LDAP search.
-      - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}`  substrings of the template with the actual user name, bind DN, and base DN during each LDAP search.
+      - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}`
+        substrings of the template with the actual user name, bind DN, and base DN during each LDAP search.
       - Note, that the special characters must be escaped properly in XML.
     - `attribute` - attribute name whose values will be returned by the LDAP search.
-    - `prefix` - prefix, that will be expected to be in front of each string in the original list of strings returned by the LDAP search. Prefix will be removed from the original strings and resulting strings will be treated as local role names. Empty, by default.
+    - `prefix` - prefix, that will be expected to be in front of each string in the original
+      list of strings returned by the LDAP search. Prefix will be removed from the original
+      strings and resulting strings will be treated as local role names. Empty, by default.
 

From 9d9055681c8c5536d3dec4974cf42c90490f1efb Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 12:35:18 -0500
Subject: [PATCH 0839/1238] Small changes to LDAP docs.

---
 docs/en/operations/external-authenticators/index.md | 4 ++--
 docs/en/operations/external-authenticators/ldap.md  | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md
index f06c1de8ec7..3387bbbdc05 100644
--- a/docs/en/operations/external-authenticators/index.md
+++ b/docs/en/operations/external-authenticators/index.md
@@ -10,10 +10,10 @@ ClickHouse supports authenticating and managing users using external services.
 
 The following external authenticators and directories are supported.
 
-## External Authenticators
+External Authenticators:
 
 - [LDAP](#ldap-external-authenticator)
 
-## External User Directories
+External User Directories:
 
 - [LDAP](#ldap-external-user-directory)
diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md
index 7ad1fd68b74..03be357a12a 100644
--- a/docs/en/operations/external-authenticators/ldap.md
+++ b/docs/en/operations/external-authenticators/ldap.md
@@ -8,7 +8,7 @@ LDAP server can be used to authenticate ClickHouse users. There are two differen
 For both of these approaches, an internally named LDAP server must be defined in the ClickHouse config 
 so that other parts of config are able to refer to it.
 
-## Server Definition {#ldap-server-definition}
+## LDAP Server Definition {#ldap-server-definition}
 
 To define LDAP server you must add `ldap_servers` section to the `config.xml`. For example,
 
@@ -61,7 +61,7 @@ Parameters:
 - `tls_ca_cert_dir` - path to the directory containing CA certificates.
 - `tls_cipher_suite` - allowed cipher suite (in OpenSSL notation).
 
-## Using LDAP As External Authenticator {#ldap-external-authenticator}
+## LDAP External Authenticator {#ldap-external-authenticator}
 
 A remote LDAP server can be used as a method for verifying passwords for locally defined users (users defined in `users.xml` or in local access control paths). In order to achieve this, specify previously defined LDAP server name instead of `password` or similar sections in the user definition.
 
@@ -94,7 +94,7 @@ When SQL-driven [Access Control and Account Management](#access-control) is enab
 CREATE USER my_user IDENTIFIED WITH ldap_server BY 'my_ldap_server'
 ```
 
-## Using LDAP As Exernal User Directory {#ldap-external-user-directory}
+## LDAP Exernal User Directory {#ldap-external-user-directory}
 
 In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. In order to achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section in of the `config.xml` file.
 

From c336cac30657062ea72bb6b91d6ca18bbe434f3a Mon Sep 17 00:00:00 2001
From: Nicolae Vartolomei <nv@nvartolomei.com>
Date: Mon, 8 Feb 2021 17:36:58 +0000
Subject: [PATCH 0840/1238] Remove dead code

---
 src/Storages/MergeTree/MergeTreeData.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index a432ec46d71..efa6919dc9b 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -465,9 +465,6 @@ public:
     DataPartsVector removePartsInRangeFromWorkingSet(const MergeTreePartInfo & drop_range, bool clear_without_timeout,
                                                      bool skip_intersecting_parts, DataPartsLock & lock);
 
-    /// Renames the part to detached/<prefix>_<part> and removes it from working set.
-    void removePartsFromWorkingSetAndCloneToDetached(const DataPartsVector & parts, bool clear_without_timeout, const String & prefix = "");
-
     /// Renames the part to detached/<prefix>_<part> and removes it from data_parts,
     //// so it will not be deleted in clearOldParts.
     /// If restore_covered is true, adds to the working set inactive parts, which were merged into the deleted part.

From 3c94e4d6f4b5e7c8ee048d6325d6275775d35426 Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 14:01:33 -0500
Subject: [PATCH 0841/1238] Changing index.md

---
 docs/en/operations/external-authenticators/index.md | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md
index 3387bbbdc05..fb8483fa341 100644
--- a/docs/en/operations/external-authenticators/index.md
+++ b/docs/en/operations/external-authenticators/index.md
@@ -8,12 +8,6 @@ toc_title: Introduction
 
 ClickHouse supports authenticating and managing users using external services.
 
-The following external authenticators and directories are supported.
+The following external authenticators and directories are supported:
 
-External Authenticators:
-
-- [LDAP](#ldap-external-authenticator)
-
-External User Directories:
-
-- [LDAP](#ldap-external-user-directory)
+- [LDAP](#external-authenticators-ldap) [Authenticator](#ldap-external-authenticator) and [Directory](#ldap-external-user-directory)

From ce91c257b29f0a6d39f807cd745c56c75af3c87c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 8 Feb 2021 22:07:30 +0300
Subject: [PATCH 0842/1238] Lockless SYSTEM FLUSH DISTRIBUTED

Right now SYSTEM FLUSH DISTRIBUTED will block:
- INSERT into this Distributed table (requireDirectoryMonitor())
- SELECT * FROM system.distribution_queue
---
 src/Storages/StorageDistributed.cpp | 26 ++++++++++++--------------
 src/Storages/StorageDistributed.h   |  5 +----
 2 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 02ee70dc8f4..8605013c65d 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -681,7 +681,7 @@ void StorageDistributed::truncate(const ASTPtr &, const StorageMetadataPtr &, co
 
     for (auto it = cluster_nodes_data.begin(); it != cluster_nodes_data.end();)
     {
-        it->second.shutdownAndDropAllData();
+        it->second.directory_monitor->shutdownAndDropAllData();
         it = cluster_nodes_data.erase(it);
     }
 
@@ -799,16 +799,6 @@ ClusterPtr StorageDistributed::getOptimizedCluster(const Context & context, cons
     return cluster;
 }
 
-void StorageDistributed::ClusterNodeData::flushAllData() const
-{
-    directory_monitor->flushAllData();
-}
-
-void StorageDistributed::ClusterNodeData::shutdownAndDropAllData() const
-{
-    directory_monitor->shutdownAndDropAllData();
-}
-
 IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, const ColumnWithTypeAndName & result)
 {
     const auto & slot_to_shard = cluster->getSlotToShard();
@@ -894,11 +884,19 @@ ActionLock StorageDistributed::getActionLock(StorageActionBlockType type)
 
 void StorageDistributed::flushClusterNodesAllData()
 {
-    std::lock_guard lock(cluster_nodes_mutex);
+    std::vector<std::shared_ptr<StorageDistributedDirectoryMonitor>> directory_monitors;
+
+    {
+        std::lock_guard lock(cluster_nodes_mutex);
+
+        directory_monitors.reserve(cluster_nodes_data.size());
+        for (auto & node : cluster_nodes_data)
+            directory_monitors.push_back(node.second.directory_monitor);
+    }
 
     /// TODO: Maybe it should be executed in parallel
-    for (auto & node : cluster_nodes_data)
-        node.second.flushAllData();
+    for (auto & node : directory_monitors)
+        node->flushAllData();
 }
 
 void StorageDistributed::rename(const String & new_path_to_table_data, const StorageID & new_table_id)
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index 585efafddfb..928b6297297 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -200,11 +200,8 @@ protected:
 
     struct ClusterNodeData
     {
-        std::unique_ptr<StorageDistributedDirectoryMonitor> directory_monitor;
+        std::shared_ptr<StorageDistributedDirectoryMonitor> directory_monitor;
         ConnectionPoolPtr connection_pool;
-
-        void flushAllData() const;
-        void shutdownAndDropAllData() const;
     };
     std::unordered_map<std::string, ClusterNodeData> cluster_nodes_data;
     mutable std::mutex cluster_nodes_mutex;

From e0a806c18023d45e7199c45040af233a2df2a1c4 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Mon, 8 Feb 2021 22:10:27 +0300
Subject: [PATCH 0843/1238] Delete filterPushDown.cpp

---
 .../Optimizations/filterPushDown.cpp          | 22 -------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 src/Processors/QueryPlan/Optimizations/filterPushDown.cpp

diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
deleted file mode 100644
index 0d651897bf8..00000000000
--- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <Processors/QueryPlan/Optimizations/Optimizations.h>
-#include <Processors/QueryPlan/FilterStep.h>
-
-namespace DB::QueryPlanOptimizations
-{
-
-size_t tryPushDownLimit(QueryPlan::Node * node, QueryPlan::Nodes &)
-{
-    auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
-    if (!filter_step)
-        return 0;
-
-    QueryPlan::Node * child_node = node->children.front();
-    auto & child = child_node->step;
-
-    if (const auto * adding_const_column = typeid_cast<const AddingConstColumnStep *>(child.get()))
-    {
-
-    }
-}
-
-}

From 78c1d69b8c55a651f77f630e34e582dabb006f1f Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Mon, 8 Feb 2021 22:36:17 +0300
Subject: [PATCH 0844/1238] better code

---
 src/Common/CurrentMetrics.cpp                 |  1 -
 src/Databases/DatabaseOnDisk.cpp              | 54 +++++++++++++++++++
 src/Databases/DatabaseOnDisk.h                |  2 +
 src/Databases/DatabaseOrdinary.cpp            | 50 +----------------
 src/Databases/DatabaseReplicated.cpp          | 13 ++---
 src/Databases/DatabaseReplicatedWorker.cpp    |  2 +-
 src/Interpreters/ClientInfo.h                 |  1 -
 src/Interpreters/Context.cpp                  |  1 -
 src/Interpreters/Context.h                    |  1 -
 src/Interpreters/DDLTask.cpp                  |  4 +-
 src/Interpreters/DDLTask.h                    |  5 +-
 src/Interpreters/DDLWorker.cpp                |  2 -
 src/Interpreters/InterpreterAlterQuery.cpp    | 13 +++--
 src/Interpreters/InterpreterCreateQuery.cpp   | 41 +++++---------
 src/Interpreters/InterpreterCreateQuery.h     |  3 ++
 src/Interpreters/InterpreterDropQuery.cpp     | 13 ++++-
 src/Interpreters/InterpreterRenameQuery.cpp   |  2 +-
 .../MergeTree/registerStorageMergeTree.cpp    |  2 +-
 src/Storages/StorageReplicatedMergeTree.cpp   | 32 +++++------
 src/Storages/StorageReplicatedMergeTree.h     |  4 +-
 src/Storages/System/StorageSystemClusters.cpp |  2 +-
 src/Storages/System/StorageSystemClusters.h   |  2 +-
 .../test_replicated_database/test.py          | 11 +++-
 ...8_ddl_dictionaries_concurrent_requrests.sh |  4 +-
 tests/queries/skip_list.json                  |  6 +++
 25 files changed, 146 insertions(+), 125 deletions(-)

diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index c524467d8ca..4fb2709c8e4 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -15,7 +15,6 @@
     M(BackgroundSchedulePoolTask, "Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc.") \
     M(BackgroundBufferFlushSchedulePoolTask, "Number of active tasks in BackgroundBufferFlushSchedulePool. This pool is used for periodic Buffer flushes") \
     M(BackgroundDistributedSchedulePoolTask, "Number of active tasks in BackgroundDistributedSchedulePool. This pool is used for distributed sends that is done in background.") \
-    M(BackgroundReplicatedSchedulePoolTask, "Number of active tasks in BackgroundReplicatedSchedulePoolTask. The pool is used by replicated database for executing DDL log coming from other replicas. One task corresponds to one replicated database") \
     M(BackgroundMessageBrokerSchedulePoolTask, "Number of active tasks in BackgroundProcessingPool for message streaming") \
     M(CacheDictionaryUpdateQueueBatches, "Number of 'batches' (a set of keys) in update queue in CacheDictionaries.") \
     M(CacheDictionaryUpdateQueueKeys, "Exact number of keys in update queue in CacheDictionaries.") \
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index 275f5bd3976..a03cb33591c 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -129,6 +129,60 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query)
     return statement_buf.str();
 }
 
+void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata)
+{
+    auto & ast_create_query = query->as<ASTCreateQuery &>();
+
+    bool has_structure = ast_create_query.columns_list && ast_create_query.columns_list->columns;
+    if (ast_create_query.as_table_function && !has_structure)
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function"
+                                                     " and doesn't have structure in metadata", backQuote(ast_create_query.table));
+
+    assert(has_structure);
+    ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
+    ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);
+    ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints);
+
+    ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
+    ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
+    ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
+
+    if (metadata.select.select_query)
+    {
+        query->replace(ast_create_query.select, metadata.select.select_query);
+    }
+
+    /// MaterializedView is one type of CREATE query without storage.
+    if (ast_create_query.storage)
+    {
+        ASTStorage & storage_ast = *ast_create_query.storage;
+
+        bool is_extended_storage_def
+            = storage_ast.partition_by || storage_ast.primary_key || storage_ast.order_by || storage_ast.sample_by || storage_ast.settings;
+
+        if (is_extended_storage_def)
+        {
+            if (metadata.sorting_key.definition_ast)
+                storage_ast.set(storage_ast.order_by, metadata.sorting_key.definition_ast);
+
+            if (metadata.primary_key.definition_ast)
+                storage_ast.set(storage_ast.primary_key, metadata.primary_key.definition_ast);
+
+            if (metadata.sampling_key.definition_ast)
+                storage_ast.set(storage_ast.sample_by, metadata.sampling_key.definition_ast);
+
+            if (metadata.table_ttl.definition_ast)
+                storage_ast.set(storage_ast.ttl_table, metadata.table_ttl.definition_ast);
+            else if (storage_ast.ttl_table != nullptr) /// TTL was removed
+                storage_ast.ttl_table = nullptr;
+
+            if (metadata.settings_changes)
+                storage_ast.set(storage_ast.settings, metadata.settings_changes);
+        }
+    }
+}
+
+
 DatabaseOnDisk::DatabaseOnDisk(
     const String & name,
     const String & metadata_path_,
diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h
index b8cc1f60e66..60a50ac4539 100644
--- a/src/Databases/DatabaseOnDisk.h
+++ b/src/Databases/DatabaseOnDisk.h
@@ -25,6 +25,8 @@ std::pair<String, StoragePtr> createTableFromAST(
   */
 String getObjectDefinitionFromCreateQuery(const ASTPtr & query);
 
+void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata);
+
 
 /* Class to provide basic operations with tables when metadata is stored on disk in .sql files.
  */
diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp
index 49bec28e4a1..d859578eb46 100644
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@@ -272,55 +272,7 @@ void DatabaseOrdinary::alterTable(const Context & context, const StorageID & tab
         0,
         context.getSettingsRef().max_parser_depth);
 
-    auto & ast_create_query = ast->as<ASTCreateQuery &>();
-
-    bool has_structure = ast_create_query.columns_list && ast_create_query.columns_list->columns;
-    if (ast_create_query.as_table_function && !has_structure)
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function"
-                                                     " and doesn't have structure in metadata", backQuote(table_name));
-
-    assert(has_structure);
-    ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
-    ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);
-    ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints);
-
-    ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
-    ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
-    ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
-
-    if (metadata.select.select_query)
-    {
-        ast->replace(ast_create_query.select, metadata.select.select_query);
-    }
-
-    /// MaterializedView is one type of CREATE query without storage.
-    if (ast_create_query.storage)
-    {
-        ASTStorage & storage_ast = *ast_create_query.storage;
-
-        bool is_extended_storage_def
-            = storage_ast.partition_by || storage_ast.primary_key || storage_ast.order_by || storage_ast.sample_by || storage_ast.settings;
-
-        if (is_extended_storage_def)
-        {
-            if (metadata.sorting_key.definition_ast)
-                storage_ast.set(storage_ast.order_by, metadata.sorting_key.definition_ast);
-
-            if (metadata.primary_key.definition_ast)
-                storage_ast.set(storage_ast.primary_key, metadata.primary_key.definition_ast);
-
-            if (metadata.sampling_key.definition_ast)
-                storage_ast.set(storage_ast.sample_by, metadata.sampling_key.definition_ast);
-
-            if (metadata.table_ttl.definition_ast)
-                storage_ast.set(storage_ast.ttl_table, metadata.table_ttl.definition_ast);
-            else if (storage_ast.ttl_table != nullptr) /// TTL was removed
-                storage_ast.ttl_table = nullptr;
-
-            if (metadata.settings_changes)
-                storage_ast.set(storage_ast.settings, metadata.settings_changes);
-        }
-    }
+    applyMetadataChangesToCreateQuery(ast, metadata);
 
     statement = getObjectDefinitionFromCreateQuery(ast);
     {
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index a134ba5dec7..4a6058afcd0 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -134,6 +134,7 @@ std::pair<String, String> DatabaseReplicated::parseFullReplicaName(const String
 
 ClusterPtr DatabaseReplicated::getCluster() const
 {
+    /// TODO Maintain up-to-date Cluster and allow to use it in Distributed tables
     Strings hosts;
     Strings host_ids;
 
@@ -149,6 +150,7 @@ ClusterPtr DatabaseReplicated::getCluster() const
         if (hosts.empty())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "No hosts found");
         Int32 cver = stat.cversion;
+        std::sort(hosts.begin(), hosts.end());
 
         std::vector<zkutil::ZooKeeper::FutureGet> futures;
         futures.reserve(hosts.size());
@@ -174,7 +176,6 @@ ClusterPtr DatabaseReplicated::getCluster() const
 
     assert(!hosts.empty());
     assert(hosts.size() == host_ids.size());
-    std::sort(hosts.begin(), hosts.end());
     String current_shard = parseFullReplicaName(hosts.front()).first;
     std::vector<Strings> shards;
     shards.emplace_back();
@@ -327,9 +328,7 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query, const Context & query_
     if (query_context.getSettingsRef().distributed_ddl_task_timeout == 0)
         return io;
 
-    //FIXME need list of all replicas, we can obtain it from zk
-    Strings hosts_to_wait;
-    hosts_to_wait.emplace_back(getFullReplicaName());
+    Strings hosts_to_wait = getZooKeeper()->getChildren(zookeeper_path + "/replicas");
     auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, query_context, hosts_to_wait);
     io.in = std::move(stream);
     return io;
@@ -338,7 +337,7 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query, const Context & query_
 
 void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 from_snapshot)
 {
-    LOG_WARNING(log, "Will recover replica");
+    //LOG_WARNING(log, "Will recover replica");
 
     //FIXME drop old tables
 
@@ -355,7 +354,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
 
         Context query_context = global_context;
         query_context.makeQueryContext();
-        query_context.getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY;
+        query_context.getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
         query_context.setCurrentDatabase(database_name);
         query_context.setCurrentQueryId(""); // generate random query_id
 
@@ -436,6 +435,8 @@ void DatabaseReplicated::renameTable(const Context & context, const String & tab
     {
         if (this != &to_database)
             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Moving tables between databases is not supported for Replicated engine");
+        if (table_name == to_table_name)
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot rename table to itself");
         if (!isTableExist(table_name, context))
             throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", table_name);
         if (exchange && !to_database.isTableExist(to_table_name, context))
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index 748305922b7..dd9dc322f9d 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -48,7 +48,7 @@ void DatabaseReplicatedDDLWorker::initializeReplication()
     UInt32 our_log_ptr = parse<UInt32>(current_zookeeper->get(database->replica_path + "/log_ptr"));
     UInt32 max_log_ptr = parse<UInt32>(current_zookeeper->get(database->zookeeper_path + "/max_log_ptr"));
     UInt32 logs_to_keep = parse<UInt32>(current_zookeeper->get(database->zookeeper_path + "/logs_to_keep"));
-    if (our_log_ptr + logs_to_keep < max_log_ptr)
+    if (our_log_ptr == 0 || our_log_ptr + logs_to_keep < max_log_ptr)
         database->recoverLostReplica(current_zookeeper, 0);
 }
 
diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h
index cacbed44c42..d2b7beb7d8c 100644
--- a/src/Interpreters/ClientInfo.h
+++ b/src/Interpreters/ClientInfo.h
@@ -42,7 +42,6 @@ public:
         NO_QUERY = 0,            /// Uninitialized object.
         INITIAL_QUERY = 1,
         SECONDARY_QUERY = 2,    /// Query that was initiated by another query for distributed or ON CLUSTER query execution.
-        REPLICATED_LOG_QUERY = 3, /// Query from replicated DDL log.
     };
 
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 83804125cd4..10619e3ad9a 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -79,7 +79,6 @@ namespace CurrentMetrics
     extern const Metric BackgroundSchedulePoolTask;
     extern const Metric BackgroundBufferFlushSchedulePoolTask;
     extern const Metric BackgroundDistributedSchedulePoolTask;
-    extern const Metric BackgroundReplicatedSchedulePoolTask;
     extern const Metric BackgroundMessageBrokerSchedulePoolTask;
 }
 
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 906efcc6dba..636255d6190 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -622,7 +622,6 @@ public:
     BackgroundSchedulePool & getSchedulePool() const;
     BackgroundSchedulePool & getMessageBrokerSchedulePool() const;
     BackgroundSchedulePool & getDistributedSchedulePool() const;
-    BackgroundSchedulePool & getReplicatedSchedulePool() const;
 
     /// Has distributed_ddl configuration or not.
     bool hasDistributedDDL() const;
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 9737167fa4c..9e379443364 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -296,7 +296,7 @@ String DatabaseReplicatedTask::getShardID() const
 std::unique_ptr<Context> DatabaseReplicatedTask::makeQueryContext(Context & from_context)
 {
     auto query_context = DDLTaskBase::makeQueryContext(from_context);
-    query_context->getClientInfo().query_kind = ClientInfo::QueryKind::REPLICATED_LOG_QUERY; //FIXME why do we need separate query kind?
+    query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
     query_context->setCurrentDatabase(database->getDatabaseName());
 
     auto txn = std::make_shared<MetadataTransaction>();
@@ -340,7 +340,7 @@ void MetadataTransaction::commit()
     assert(state == CREATED);
     state = FAILED;
     current_zookeeper->multi(ops);
-    state = COMMITED;
+    state = COMMITTED;
 }
 
 }
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 552f4919765..43d9fa1c0ae 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -144,7 +144,7 @@ struct MetadataTransaction
     enum State
     {
         CREATED,
-        COMMITED,
+        COMMITTED,
         FAILED
     };
 
@@ -154,10 +154,11 @@ struct MetadataTransaction
     bool is_initial_query;
     Coordination::Requests ops;
 
-    void addOps(Coordination::Requests & other_ops)
+    void moveOpsTo(Coordination::Requests & other_ops)
     {
         std::move(ops.begin(), ops.end(), std::back_inserter(other_ops));
         ops.clear();
+        state = COMMITTED;
     }
 
     void commit();
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index f0cc3370211..665bacf9d6d 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -42,7 +42,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int NOT_IMPLEMENTED;
     extern const int LOGICAL_ERROR;
     extern const int TIMEOUT_EXCEEDED;
     extern const int UNFINISHED;
@@ -51,7 +50,6 @@ namespace ErrorCodes
     extern const int CANNOT_ASSIGN_ALTER;
     extern const int CANNOT_ALLOCATE_MEMORY;
     extern const int MEMORY_LIMIT_EXCEEDED;
-    extern const int INCORRECT_QUERY;
 }
 
 
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index cee9b9083ea..402f05895bc 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -28,6 +28,7 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int INCORRECT_QUERY;
+    extern const int NOT_IMPLEMENTED;
 }
 
 
@@ -49,7 +50,7 @@ BlockIO InterpreterAlterQuery::execute()
     auto table_id = context.resolveStorageID(alter, Context::ResolveOrdinary);
 
     DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
-    if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+    if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
     {
         auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name);
         guard->releaseTableLock();
@@ -60,8 +61,6 @@ BlockIO InterpreterAlterQuery::execute()
     auto alter_lock = table->lockForAlter(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
     auto metadata_snapshot = table->getInMemoryMetadataPtr();
 
-    //FIXME commit MetadataTransaction for all ALTER kinds. Now its' implemented only for metadata alter.
-
     /// Add default database to table identifiers that we can encounter in e.g. default expressions,
     /// mutation expression, etc.
     AddDefaultDatabaseVisitor visitor(table_id.getDatabaseName());
@@ -95,6 +94,14 @@ BlockIO InterpreterAlterQuery::execute()
             throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR);
     }
 
+    if (typeid_cast<DatabaseReplicated *>(database.get()))
+    {
+        int command_types_count = !mutation_commands.empty() + !partition_commands.empty() + !live_view_commands.empty() + !alter_commands.empty();
+        if (1 < command_types_count)
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "For Replicated databases it's not allowed "
+                                                         "to execute ALTERs of different types in single query");
+    }
+
     if (!mutation_commands.empty())
     {
         MutationsInterpreter(table, metadata_snapshot, mutation_commands, context, false).validate();
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 376bf8417ff..bbe8526ae5b 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -149,7 +149,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
                 engine = makeASTFunction("Replicated",
                                      std::make_shared<ASTLiteral>(fmt::format("/clickhouse/db/{}/", create.database)),
                                      std::make_shared<ASTLiteral>("s1"),
-                                     std::make_shared<ASTLiteral>("r1"));
+                                     std::make_shared<ASTLiteral>("r" + toString(getpid())));
         }
 
         engine->no_empty_args = true;
@@ -573,8 +573,9 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(AS
     /// Set the table engine if it was not specified explicitly.
     setEngine(create);
 
-    create.as_database.clear();
-    create.as_table.clear();
+    assert(as_database_saved.empty() && as_table_saved.empty());
+    std::swap(create.as_database, as_database_saved);
+    std::swap(create.as_table, as_table_saved);
 
     return properties;
 }
@@ -722,7 +723,7 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
     const auto * kind = create.is_dictionary ? "Dictionary" : "Table";
     const auto * kind_upper = create.is_dictionary ? "DICTIONARY" : "TABLE";
 
-    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind == ClientInfo::QueryKind::REPLICATED_LOG_QUERY && !internal)
+    if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && !internal)
     {
         if (create.uuid == UUIDHelpers::Nil)
             throw Exception("Table UUID is not specified in DDL log", ErrorCodes::LOGICAL_ERROR);
@@ -753,7 +754,6 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
     }
     else
     {
-        assert(context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY);
         bool is_on_cluster = context.getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;
         if (create.uuid != UUIDHelpers::Nil && !is_on_cluster)
             throw Exception(ErrorCodes::INCORRECT_QUERY,
@@ -850,7 +850,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
                                 "Data directory {} must be inside {} to attach it", String(data_path), String(user_files));
         }
     }
-    else if (create.attach && !create.attach_short_syntax && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+    else if (create.attach && !create.attach_short_syntax && context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
     {
         auto * log = &Poco::Logger::get("InterpreterCreateQuery");
         LOG_WARNING(log, "ATTACH TABLE query with full table definition is not recommended: "
@@ -874,16 +874,6 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     /// Set and retrieve list of columns, indices and constraints. Set table engine if needed. Rewrite query in canonical way.
     TableProperties properties = setProperties(create);
 
-    /// DDL log for replicated databases can not
-    /// contain the right database name for every replica
-    /// therefore for such queries the AST database
-    /// field is modified right before an actual execution
-    if (context.getClientInfo().query_kind == ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
-    {
-        create.database = current_database;
-    }
-
-    //TODO make code better if possible
     DatabasePtr database;
     bool need_add_to_database = !create.temporary;
     if (need_add_to_database)
@@ -893,7 +883,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     {
         auto guard = DatabaseCatalog::instance().getDDLGuard(create.database, create.table);
         database = DatabaseCatalog::instance().getDatabase(create.database);
-        if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+        if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
         {
             assertOrSetUUID(create, database);
             guard->releaseTableLock();
@@ -930,9 +920,6 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
         guard = DatabaseCatalog::instance().getDDLGuard(create.database, create.table);
 
         database = DatabaseCatalog::instance().getDatabase(create.database);
-        //TODO do we need it?
-        if (database->getEngineName() == "Replicated" && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
-            throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed");
         assertOrSetUUID(create, database);
 
         /// Table can be created before or it can be created concurrently in another thread, while we were waiting in DDLGuard.
@@ -1107,9 +1094,10 @@ BlockIO InterpreterCreateQuery::createDictionary(ASTCreateQuery & create)
     auto guard = DatabaseCatalog::instance().getDDLGuard(database_name, dictionary_name);
     DatabasePtr database = DatabaseCatalog::instance().getDatabase(database_name);
 
-    if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+    if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
     {
-        assertOrSetUUID(create, database);
+        if (!create.attach)
+            assertOrSetUUID(create, database);
         guard->releaseTableLock();
         return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr, context);
     }
@@ -1266,15 +1254,14 @@ AccessRightsElements InterpreterCreateQuery::getRequiredAccess() const
     return required_access;
 }
 
-void InterpreterCreateQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, const Context &) const
+void InterpreterCreateQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, const Context &) const
 {
-    const auto & create = ast->as<const ASTCreateQuery &>();
     elem.query_kind = "Create";
-    if (!create.as_table.empty())
+    if (!as_table_saved.empty())
     {
-        String database = backQuoteIfNeed(create.as_database.empty() ? context.getCurrentDatabase() : create.as_database);
+        String database = backQuoteIfNeed(as_database_saved.empty() ? context.getCurrentDatabase() : as_database_saved);
         elem.query_databases.insert(database);
-        elem.query_tables.insert(database + "." + backQuoteIfNeed(create.as_table));
+        elem.query_tables.insert(database + "." + backQuoteIfNeed(as_table_saved));
     }
 }
 
diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h
index c109b0b7760..d88357fe412 100644
--- a/src/Interpreters/InterpreterCreateQuery.h
+++ b/src/Interpreters/InterpreterCreateQuery.h
@@ -95,5 +95,8 @@ private:
     /// Is this an internal query - not from the user.
     bool internal = false;
     bool force_attach = false;
+
+    mutable String as_database_saved;
+    mutable String as_table_saved;
 };
 }
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index e6943f06e06..ae76e8efd46 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -129,7 +129,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
         /// Prevents recursive drop from drop database query. The original query must specify a table.
         bool is_drop_or_detach_database = query_ptr->as<ASTDropQuery>()->table.empty();
         bool is_replicated_ddl_query = typeid_cast<DatabaseReplicated *>(database.get()) &&
-                                       context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY &&
+                                       context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY &&
                                        !is_drop_or_detach_database;
         if (is_replicated_ddl_query)
         {
@@ -137,6 +137,13 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
                 throw Exception(ErrorCodes::INCORRECT_QUERY, "DETACH TABLE is not allowed for Replicated databases. "
                                                              "Use DETACH TABLE PERMANENTLY or SYSTEM RESTART REPLICA");
 
+            if (query.kind == ASTDropQuery::Kind::Detach)
+                context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id);
+            else if (query.kind == ASTDropQuery::Kind::Truncate)
+                context.checkAccess(AccessType::TRUNCATE, table_id);
+            else if (query.kind == ASTDropQuery::Kind::Drop)
+                context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id);
+
             ddl_guard->releaseTableLock();
             table.reset();
             return typeid_cast<DatabaseReplicated *>(database.get())->propose(query.clone(), context);
@@ -214,13 +221,15 @@ BlockIO InterpreterDropQuery::executeToDictionary(
 
     bool is_drop_or_detach_database = query_ptr->as<ASTDropQuery>()->table.empty();
     bool is_replicated_ddl_query = typeid_cast<DatabaseReplicated *>(database.get()) &&
-                                   context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY &&
+                                   context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY &&
                                    !is_drop_or_detach_database;
     if (is_replicated_ddl_query)
     {
         if (kind == ASTDropQuery::Kind::Detach)
             throw Exception(ErrorCodes::INCORRECT_QUERY, "DETACH DICTIONARY is not allowed for Replicated databases.");
 
+        context.checkAccess(AccessType::DROP_DICTIONARY, database_name, dictionary_name);
+
         ddl_guard->releaseTableLock();
         return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr, context);
     }
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index 5bfc144e014..b9d7faac73c 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -80,7 +80,7 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
             database_catalog.assertTableDoesntExist(StorageID(elem.to_database_name, elem.to_table_name), context);
 
         DatabasePtr database = database_catalog.getDatabase(elem.from_database_name);
-        if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
+        if (typeid_cast<DatabaseReplicated *>(database.get()) && context.getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
         {
             if (1 < descriptions.size())
                 throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Database {} is Replicated, "
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 1d68f788a42..8377e37b07a 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -454,7 +454,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
 
         /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries
         bool is_on_cluster = args.local_context.getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;
-        bool is_replicated_database = args.local_context.getClientInfo().query_kind == ClientInfo::QueryKind::REPLICATED_LOG_QUERY &&
+        bool is_replicated_database = args.local_context.getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY &&
                                       DatabaseCatalog::instance().getDatabase(args.table_id.database_name)->getEngineName() == "Replicated";
         bool allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach;
 
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index a4b83e365d1..3295be311d1 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -4284,24 +4284,12 @@ void StorageReplicatedMergeTree::alter(
 
         if (auto txn = query_context.getMetadataTransaction())
         {
-            txn->addOps(ops);
+            txn->moveOpsTo(ops);
             /// NOTE: IDatabase::alterTable(...) is called when executing ALTER_METADATA queue entry without query context,
             /// so we have to update metadata of DatabaseReplicated here.
-            /// It also may cause "Table columns structure in ZooKeeper is different" error on server startup
-            /// even for Ordinary and Atomic databases.
             String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_id.table_name);
             auto ast = DatabaseCatalog::instance().getDatabase(table_id.database_name)->getCreateTableQuery(table_id.table_name, query_context);
-            auto & ast_create_query = ast->as<ASTCreateQuery &>();
-
-            //FIXME copy-paste
-            ASTPtr new_columns = InterpreterCreateQuery::formatColumns(future_metadata.columns);
-            ASTPtr new_indices = InterpreterCreateQuery::formatIndices(future_metadata.secondary_indices);
-            ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(future_metadata.constraints);
-
-            ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
-            ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
-            ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
-
+            applyMetadataChangesToCreateQuery(ast, future_metadata);
             ops.emplace_back(zkutil::makeSetRequest(metadata_zk_path, getObjectDefinitionFromCreateQuery(ast), -1));
         }
 
@@ -4450,7 +4438,7 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & partition, bool de
     else
     {
         String partition_id = getPartitionIDFromQuery(partition, query_context);
-        did_drop = dropAllPartsInPartition(*zookeeper, partition_id, entry, detach);
+        did_drop = dropAllPartsInPartition(*zookeeper, partition_id, entry, query_context, detach);
     }
 
     if (did_drop)
@@ -4474,7 +4462,7 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & partition, bool de
 
 
 void StorageReplicatedMergeTree::truncate(
-    const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder & table_lock)
+    const ASTPtr &, const StorageMetadataPtr &, const Context & query_context, TableExclusiveLockHolder & table_lock)
 {
     table_lock.release();   /// Truncate is done asynchronously.
 
@@ -4490,7 +4478,7 @@ void StorageReplicatedMergeTree::truncate(
     {
         LogEntry entry;
 
-        if (dropAllPartsInPartition(*zookeeper, partition_id, entry, false))
+        if (dropAllPartsInPartition(*zookeeper, partition_id, entry, query_context, false))
             waitForAllReplicasToProcessLogEntry(entry);
     }
 }
@@ -5274,6 +5262,9 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, const
         requests.emplace_back(zkutil::makeCreateRequest(
             mutations_path + "/", mutation_entry.toString(), zkutil::CreateMode::PersistentSequential));
 
+        if (auto txn = query_context.getMetadataTransaction())
+            txn->moveOpsTo(requests);
+
         Coordination::Responses responses;
         Coordination::Error rc = zookeeper->tryMulti(requests, responses);
 
@@ -5775,6 +5766,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
             }
         }
 
+        if (auto txn = context.getMetadataTransaction())
+            txn->moveOpsTo(ops);
+
         ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1));  /// Just update version
         ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential));
 
@@ -6243,7 +6237,7 @@ bool StorageReplicatedMergeTree::dropPart(
 }
 
 bool StorageReplicatedMergeTree::dropAllPartsInPartition(
-    zkutil::ZooKeeper & zookeeper, String & partition_id, LogEntry & entry, bool detach)
+    zkutil::ZooKeeper & zookeeper, String & partition_id, LogEntry & entry, const Context & query_context, bool detach)
 {
     MergeTreePartInfo drop_range_info;
     if (!getFakePartCoveringAllPartsInPartition(partition_id, drop_range_info))
@@ -6275,6 +6269,8 @@ bool StorageReplicatedMergeTree::dropAllPartsInPartition(
     Coordination::Requests ops;
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential));
     ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1));  /// Just update version.
+    if (auto txn = query_context.getMetadataTransaction())
+        txn->moveOpsTo(ops);
     Coordination::Responses responses = zookeeper.multi(ops);
 
     String log_znode_path = dynamic_cast<const Coordination::CreateResponse &>(*responses.front()).path_created;
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index 6db05294b63..a1a70ada9b2 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -134,7 +134,7 @@ public:
       */
     void drop() override;
 
-    void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override;
+    void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context & query_context, TableExclusiveLockHolder &) override;
 
     void checkTableCanBeRenamed() const override;
 
@@ -577,7 +577,7 @@ private:
 
     bool dropPart(zkutil::ZooKeeperPtr & zookeeper, String part_name, LogEntry & entry, bool detach, bool throw_if_noop);
     bool dropAllPartsInPartition(
-        zkutil::ZooKeeper & zookeeper, String & partition_id, LogEntry & entry, bool detach);
+        zkutil::ZooKeeper & zookeeper, String & partition_id, LogEntry & entry, const Context & query_context, bool detach);
 
     // Partition helpers
     void dropPartition(const ASTPtr & partition, bool detach, bool drop_part, const Context & query_context, bool throw_if_noop) override;
diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp
index 62ad1c5150f..7e16deb6d22 100644
--- a/src/Storages/System/StorageSystemClusters.cpp
+++ b/src/Storages/System/StorageSystemClusters.cpp
@@ -41,7 +41,7 @@ void StorageSystemClusters::fillData(MutableColumns & res_columns, const Context
     }
 }
 
-void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster) const
+void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster)
 {
     const String & cluster_name = name_and_cluster.first;
     const ClusterPtr & cluster = name_and_cluster.second;
diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h
index 68282f1b1fe..4f2a843999f 100644
--- a/src/Storages/System/StorageSystemClusters.h
+++ b/src/Storages/System/StorageSystemClusters.h
@@ -29,7 +29,7 @@ protected:
     using NameAndCluster = std::pair<String, std::shared_ptr<Cluster>>;
 
     void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const override;
-    void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster) const;
+    static void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster);
 };
 
 }
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 2471228b55e..2a5a7f4716e 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -147,7 +147,16 @@ def test_alters_from_different_replicas(started_cluster):
 
     main_node.query("SYSTEM FLUSH DISTRIBUTED testdb.dist")
     main_node.query("ALTER TABLE testdb.concurrent_test UPDATE StartDate = addYears(StartDate, 1) WHERE 1")
-    main_node.query("ALTER TABLE testdb.concurrent_test DELETE WHERE UserID % 2")
+    res = main_node.query("ALTER TABLE testdb.concurrent_test DELETE WHERE UserID % 2")
+    assert "shard1|replica1" in res and "shard1|replica2" in res and "shard1|replica3" in res
+    assert "shard2|replica1" in res and "shard2|replica2" in res
+
+    expected = "1\t1\tmain_node\n" \
+               "1\t2\tdummy_node\n" \
+               "1\t3\tcompeting_node\n" \
+               "2\t1\tsnapshotting_node\n" \
+               "2\t2\tsnapshot_recovering_node\n"
+    assert main_node.query("SELECT shard_num, replica_num, host_name FROM system.clusters WHERE cluster='testdb'") == expected
 
     # test_drop_and_create_replica
     main_node.query("DROP DATABASE testdb")
diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh b/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh
index bc13e44934a..025fe51e2a9 100755
--- a/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh
+++ b/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh
@@ -113,8 +113,8 @@ timeout $TIMEOUT bash -c thread7 2> /dev/null &
 wait
 $CLICKHOUSE_CLIENT -q "SELECT 'Still alive'"
 
-$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY database_for_dict.dict1"
-$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY database_for_dict.dict2"
+$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY IF NOT EXISTS database_for_dict.dict1"
+$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY IF NOT EXISTS database_for_dict.dict2"
 
 $CLICKHOUSE_CLIENT -n -q "
     DROP TABLE table_for_dict1;
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 4c6927f575a..1c5136b6bde 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -103,6 +103,12 @@
         "memory_tracking",     /// FIXME remove it before merge
         "memory_tracking",
         "memory_usage",
+        "01188_attach_table_from_pat",
+        "01110_dictionary_layout_without_arguments",
+        "01018_ddl_dictionaries_create",
+        "01018_ddl_dictionaries_select",
+        "01414_freeze_does_not_prevent_alters",
+        "01018_ddl_dictionaries_bad_queries",
         "01686_rocksdb",
         "01550_mutation_subquery",
         "01070_mutations_with_dependencies",

From b8baf3a4432166fa66c243236962b9a42a3855bb Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 8 Feb 2021 22:40:44 +0300
Subject: [PATCH 0845/1238] Fix some warnings

---
 src/Coordination/NuKeeperStateMachine.cpp | 2 +-
 src/Coordination/NuKeeperStateMachine.h   | 2 +-
 src/Coordination/NuKeeperStorage.cpp      | 1 -
 src/Server/NuKeeperTCPHandler.h           | 2 +-
 4 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 7896caad568..9be8e889fa3 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -43,7 +43,7 @@ nuraft::ptr<nuraft::buffer> writeResponses(NuKeeperStorage::ResponsesForSessions
 }
 
 
-NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, long tick_time)
+NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, int64_t tick_time)
     : storage(tick_time)
     , responses_queue(responses_queue_)
     , last_committed_idx(0)
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index b12903b6929..5f3065ee144 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -13,7 +13,7 @@ using ResponsesQueue = ThreadSafeQueue<NuKeeperStorage::ResponseForSession>;
 class NuKeeperStateMachine : public nuraft::state_machine
 {
 public:
-    NuKeeperStateMachine(ResponsesQueue & responses_queue_, long tick_time = 500);
+    NuKeeperStateMachine(ResponsesQueue & responses_queue_, int64_t tick_time = 500);
 
     nuraft::ptr<nuraft::buffer> pre_commit(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; }
 
diff --git a/src/Coordination/NuKeeperStorage.cpp b/src/Coordination/NuKeeperStorage.cpp
index ef59e717b4c..a86b7432cbf 100644
--- a/src/Coordination/NuKeeperStorage.cpp
+++ b/src/Coordination/NuKeeperStorage.cpp
@@ -132,7 +132,6 @@ struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
             else
             {
                 NuKeeperStorage::Node created_node;
-                created_node.seq_num = 0;
                 created_node.stat.czxid = zxid;
                 created_node.stat.mzxid = zxid;
                 created_node.stat.ctime = std::chrono::system_clock::now().time_since_epoch() / std::chrono::milliseconds(1);
diff --git a/src/Server/NuKeeperTCPHandler.h b/src/Server/NuKeeperTCPHandler.h
index 241867a1d99..03a857ad1d7 100644
--- a/src/Server/NuKeeperTCPHandler.h
+++ b/src/Server/NuKeeperTCPHandler.h
@@ -41,7 +41,7 @@ private:
     std::shared_ptr<NuKeeperStorageDispatcher> nu_keeper_storage_dispatcher;
     Poco::Timespan operation_timeout;
     Poco::Timespan session_timeout;
-    int64_t session_id;
+    int64_t session_id{-1};
     Stopwatch session_stopwatch;
     SocketInterruptablePollWrapperPtr poll_wrapper;
 

From fd396d1d36600acb6efedb8bdb957e3359454ef7 Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 16:08:32 -0500
Subject: [PATCH 0846/1238] Starting to add documentation for live views.

---
 .../sql-reference/statements/create/view.md   | 79 +++++++++++++++++++
 docs/en/sql-reference/statements/watch.md     | 68 ++++++++++++++++
 2 files changed, 147 insertions(+)
 create mode 100644 docs/en/sql-reference/statements/watch.md

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 4370735b8d9..a9fe48ed6ac 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -62,3 +62,82 @@ The execution of [ALTER](../../../sql-reference/statements/alter/index.md) queri
 Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query.
 
 There isn’t a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md).
+
+## Live View (Experimental) {#live-view)
+
+!!! important "Important"
+    This is an experimental feature that may change in backwards-incompatible ways in the future releases.
+    Enable usage of live views and `WATCH` query using `set allow_experimental_live_view = 1`.
+
+
+```sql
+CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
+```
+
+Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query
+and are updated any time the result of the query changes. Query result as well as partial result
+needed to combine with new data are stored in memory providing increased performance
+for repeated queries. Live views can provide push notifications
+when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
+
+Live views are triggered by insert into the innermost table specified in the query. 
+
+!!! info "Note"
+    [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table.
+
+!!! info "Note"
+    Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md)
+    or a [system table](../../../operations/system-tables/index.md)
+    will not trigger a live view. See [WITH REFRESH](#live-view-with-refresh) to enable periodic
+    updates of a live view.
+
+Live views work similarly to how a query in a distributed table works. But instead of combining partial results
+from different servers they combine partial result from current data with partial result from the new data.
+When a live view query includes a subquery then the cached partial result is only stored for the innermost subquery.
+
+!!! info "Note"
+   Only queries where one can combine partial result from the old data plus partial result from the new data will work.
+   Live view will not work for queries that require the complete data set to compute the final result.
+
+You can execute [SELECT](../../../sql-reference/statements/select/index.md) query on a live view
+in the same way as for any regular view or a table. If the query result is cached 
+it will return the result immediately without running the stored query on the underlying tables.
+
+### Force Refresh {#live-view-alter-refresh}
+
+You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement.
+
+### With Timeout {#live-view-with-timeout}
+
+When a live view is create with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified
+number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query. 
+
+```sql
+CREATE LIVE VIEW [db.]table_name WITH TIMEOUT value_in_sec AS SELECT ...
+```
+
+### With Refresh {#live-view-with-refresh}
+
+When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed
+after the specified number of seconds elapse since the last refresh or trigger.
+
+```sql
+CREATE LIVE VIEW [db.]table_name WITH REFRESH value_in_sec AS SELECT ...
+```
+
+You can combine `WITH TIMEOUT` and `WITH REFRESH` clauses using an `AND`. 
+
+```sql
+CREATE LIVE VIEW [db.]table_name WITH TIMEOUT value_in_sec AND REFRESH value_in_sec AS SELECT ...
+```
+
+### Settings {#live-view-settings}
+
+You can use the following settings to control the behaviour of live views.
+
+- `allow_experimental_live_view` - enable live views. Default `0`.
+- `live_view_heartbeat_interval` - the heartbeat interval in seconds to indicate live query is alive
+- `max_live_view_insert_blocks_before_refresh` - maximum number of inserted blocks after which
+   mergeable blocks are dropped and query is re-executed. Default `64`.
+-  `temporary_live_view_timeout` - interval after which live view with timeout is deleted. Default `0`.
+-  `periodic_live_view_refresh` - interval after which periodically refreshed live view is forced to refresh. Default `0`.
diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md
new file mode 100644
index 00000000000..b09147f15eb
--- /dev/null
+++ b/docs/en/sql-reference/statements/watch.md
@@ -0,0 +1,68 @@
+---
+toc_priority: 53
+toc_title: WATCH
+---
+
+# WATCH Statement {#watch}
+
+!!! important "Important"
+    This is an experimental feature that may change in backwards-incompatible ways in the future releases.
+    Enable live views and `WATCH` query using `set allow_experimental_live_view = 1`.
+
+
+``` sql
+WATCH [db.]live_view
+[EVENTS]
+[LIMIT n]
+[FORMAT format]
+```
+
+The `WATCH` query performs continuous data retrieval from a [live view](./create/view.md#live-view) table.
+Unless the `LIMIT` clause is specified it provides an infinite stream of query results
+from a live view.
+
+```sql
+WATCH [db.]live_view
+```
+
+The virtual `_version` column in the query result indicates the current result version.
+
+By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../../sql-reference/statements/insert-into.md)
+it can be forwarded to a different table.
+
+```sql
+INSERT INTO [db.]table WATCH [db.]live_view ...
+```
+
+## EVENTS Clause
+
+The `EVENTS` clause can be used to obtain a short form of the `WATCH` query
+where instead of the query result, you will just get the latest query
+result version.
+
+```sql
+WATCH [db.]live_view EVENTS LIMIT 1
+```
+
+## LIMIT Clause {#limit-clause}
+
+The `LIMIT n` clause species the number of updates the `WATCH` query should wait
+for before terminating. The value of `0`
+indicates that the `WATCH` query should not wait for any new query results
+and therefore will return immediately once query is evaluated.
+
+```sql
+WATCH [db.]live_view LIMIT 1
+```
+
+## FORMAT Clause {#format-clause}
+
+The `FORMAT` clause works the same way as for the [SELECT](./select/index.md#format-clause).
+
+### JSONEachRowWithProgress
+
+The `JSONEachRowWithProgress` format should be used when watching [live view](./create/view.md#live-view) 
+tables over the HTTP interface. The progress messages will be added to the output
+to keep the long-lived HTTP connection alive until the query result changes. 
+The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
+

From 2e113a0faf9f264853289d9e2ba61ea7913a4d4a Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 16:24:05 -0500
Subject: [PATCH 0847/1238] Update to live view docs.

---
 .../en/sql-reference/statements/create/view.md |  8 ++++----
 docs/en/sql-reference/statements/watch.md      | 18 +++++++++---------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index a9fe48ed6ac..381dbbfe08a 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -136,8 +136,8 @@ CREATE LIVE VIEW [db.]table_name WITH TIMEOUT value_in_sec AND REFRESH value_in_
 You can use the following settings to control the behaviour of live views.
 
 - `allow_experimental_live_view` - enable live views. Default `0`.
-- `live_view_heartbeat_interval` - the heartbeat interval in seconds to indicate live query is alive
+- `live_view_heartbeat_interval` - the heartbeat interval in seconds to indicate live query is alive. Default `15` seconds.
 - `max_live_view_insert_blocks_before_refresh` - maximum number of inserted blocks after which
-   mergeable blocks are dropped and query is re-executed. Default `64`.
--  `temporary_live_view_timeout` - interval after which live view with timeout is deleted. Default `0`.
--  `periodic_live_view_refresh` - interval after which periodically refreshed live view is forced to refresh. Default `0`.
+   mergeable blocks are dropped and query is re-executed. Default `64` inserts.
+-  `temporary_live_view_timeout` - interval after which live view with timeout is deleted. Default `5` seconds.
+-  `periodic_live_view_refresh` - interval after which periodically refreshed live view is forced to refresh. Default `60` seconds.
diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md
index b09147f15eb..5cf10cdd5a0 100644
--- a/docs/en/sql-reference/statements/watch.md
+++ b/docs/en/sql-reference/statements/watch.md
@@ -27,7 +27,7 @@ WATCH [db.]live_view
 
 The virtual `_version` column in the query result indicates the current result version.
 
-By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../../sql-reference/statements/insert-into.md)
+By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../sql-reference/statements/insert-into.md)
 it can be forwarded to a different table.
 
 ```sql
@@ -37,7 +37,7 @@ INSERT INTO [db.]table WATCH [db.]live_view ...
 ## EVENTS Clause
 
 The `EVENTS` clause can be used to obtain a short form of the `WATCH` query
-where instead of the query result, you will just get the latest query
+where instead of the query result you will just get the latest query
 result version.
 
 ```sql
@@ -47,7 +47,8 @@ WATCH [db.]live_view EVENTS LIMIT 1
 ## LIMIT Clause {#limit-clause}
 
 The `LIMIT n` clause species the number of updates the `WATCH` query should wait
-for before terminating. The value of `0`
+for before terminating. By default there is no limit on the number of updates and therefore
+the query will not terminate. The value of `0`
 indicates that the `WATCH` query should not wait for any new query results
 and therefore will return immediately once query is evaluated.
 
@@ -59,10 +60,9 @@ WATCH [db.]live_view LIMIT 1
 
 The `FORMAT` clause works the same way as for the [SELECT](./select/index.md#format-clause).
 
-### JSONEachRowWithProgress
-
-The `JSONEachRowWithProgress` format should be used when watching [live view](./create/view.md#live-view) 
-tables over the HTTP interface. The progress messages will be added to the output
-to keep the long-lived HTTP connection alive until the query result changes. 
-The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
+!!! info "Note"
+    The [JSONEachRowWithProgress](../../interfaces/formats/#jsoneachrowwithprogress) format should be used when watching [live view](./create/view.md#live-view) 
+    tables over the HTTP interface. The progress messages will be added to the output
+    to keep the long-lived HTTP connection alive until the query result changes. 
+    The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
 

From d7f5ea784096ae0fe0049c9e2dcefff1ca059cfc Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 16:25:07 -0500
Subject: [PATCH 0848/1238] Adding experimental note to the watch query.

---
 docs/en/sql-reference/statements/watch.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md
index 5cf10cdd5a0..b89cc63375c 100644
--- a/docs/en/sql-reference/statements/watch.md
+++ b/docs/en/sql-reference/statements/watch.md
@@ -3,7 +3,7 @@ toc_priority: 53
 toc_title: WATCH
 ---
 
-# WATCH Statement {#watch}
+# WATCH Statement (Experimental) {#watch}
 
 !!! important "Important"
     This is an experimental feature that may change in backwards-incompatible ways in the future releases.

From cd097e250b1544cceb487f4e950243a1c039269d Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 16:29:47 -0500
Subject: [PATCH 0849/1238] Fix type in live view reference.

---
 docs/en/sql-reference/statements/create/view.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 381dbbfe08a..0fdb36249ac 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -63,7 +63,7 @@ Views look the same as normal tables. For example, they are listed in the result
 
 There isn’t a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md).
 
-## Live View (Experimental) {#live-view)
+## Live View (Experimental) {#live-view}
 
 !!! important "Important"
     This is an experimental feature that may change in backwards-incompatible ways in the future releases.

From e703390fcedd679d76c315a7562365968cd681a4 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Tue, 9 Feb 2021 00:31:30 +0300
Subject: [PATCH 0850/1238] Update reinterpret_as.xml

---
 tests/performance/reinterpret_as.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/performance/reinterpret_as.xml b/tests/performance/reinterpret_as.xml
index 6ef152bc552..17c5fd9da11 100644
--- a/tests/performance/reinterpret_as.xml
+++ b/tests/performance/reinterpret_as.xml
@@ -95,7 +95,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(10000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -210,7 +210,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(10000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>

From 4d4f20142e572e90182318a17714610fcabbfe53 Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Tue, 9 Feb 2021 00:54:00 +0300
Subject: [PATCH 0851/1238] Update
 docs/ru/operations/settings/merge-tree-settings.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/ru/operations/settings/merge-tree-settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md
index b8adc73c7ce..bfc0b0a2644 100644
--- a/docs/ru/operations/settings/merge-tree-settings.md
+++ b/docs/ru/operations/settings/merge-tree-settings.md
@@ -191,6 +191,6 @@ Eсли суммарное число активных кусков во все
 
 -   Любое положительное целое число.
 
-Значение по умолчанию: -1 (неограничено).
+Значение по умолчанию: -1 (неограниченно).
 
 [Original article](https://clickhouse.tech/docs/ru/operations/settings/merge_tree_settings/) <!--hide-->

From 52e9b9d73974d3f4b277fb0f37d14b1a0c29e1e9 Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 16:56:25 -0500
Subject: [PATCH 0852/1238] Minor updates to the live view docs.

---
 .../sql-reference/statements/create/view.md   | 29 ++++++++++++-------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 0fdb36249ac..5a5c77534fb 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -103,6 +103,10 @@ You can execute [SELECT](../../../sql-reference/statements/select/index.md) quer
 in the same way as for any regular view or a table. If the query result is cached 
 it will return the result immediately without running the stored query on the underlying tables.
 
+```sql
+SELECT * FROM [db.]live_view WHERE ...
+```
+
 ### Force Refresh {#live-view-alter-refresh}
 
 You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement.
@@ -110,34 +114,39 @@ You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRE
 ### With Timeout {#live-view-with-timeout}
 
 When a live view is create with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified
-number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query. 
+number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query
+that was watching the live view. 
 
 ```sql
-CREATE LIVE VIEW [db.]table_name WITH TIMEOUT value_in_sec AS SELECT ...
+CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
 ```
 
+If the timeout value is not specified then the value specified by the `temporary_live_view_timeout` setting is used.
+
 ### With Refresh {#live-view-with-refresh}
 
 When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed
 after the specified number of seconds elapse since the last refresh or trigger.
 
 ```sql
-CREATE LIVE VIEW [db.]table_name WITH REFRESH value_in_sec AS SELECT ...
+CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ...
 ```
 
-You can combine `WITH TIMEOUT` and `WITH REFRESH` clauses using an `AND`. 
+If the refresh value is not specified then the value specified by the `periodic_live_view_refresh` setting is used.
+
+You can combine `WITH TIMEOUT` and `WITH REFRESH` clauses using an `AND` clause. 
 
 ```sql
-CREATE LIVE VIEW [db.]table_name WITH TIMEOUT value_in_sec AND REFRESH value_in_sec AS SELECT ...
+CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
 ```
 
 ### Settings {#live-view-settings}
 
 You can use the following settings to control the behaviour of live views.
 
-- `allow_experimental_live_view` - enable live views. Default `0`.
-- `live_view_heartbeat_interval` - the heartbeat interval in seconds to indicate live query is alive. Default `15` seconds.
+- `allow_experimental_live_view` - enable live views. Default is `0`.
+- `live_view_heartbeat_interval` - the heartbeat interval in seconds to indicate live query is alive. Default is `15` seconds.
 - `max_live_view_insert_blocks_before_refresh` - maximum number of inserted blocks after which
-   mergeable blocks are dropped and query is re-executed. Default `64` inserts.
--  `temporary_live_view_timeout` - interval after which live view with timeout is deleted. Default `5` seconds.
--  `periodic_live_view_refresh` - interval after which periodically refreshed live view is forced to refresh. Default `60` seconds.
+   mergeable blocks are dropped and query is re-executed. Default is `64` inserts.
+- `temporary_live_view_timeout` - interval after which live view with timeout is deleted. Default is `5` seconds.
+- `periodic_live_view_refresh` - interval after which periodically refreshed live view is forced to refresh. Default is `60` seconds.

From d737ffbe0c448d77be6f40fd812fea1bb6c6c55c Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 16:59:39 -0500
Subject: [PATCH 0853/1238] Adding event clause reference.

---
 docs/en/sql-reference/statements/watch.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md
index b89cc63375c..480841cf1b9 100644
--- a/docs/en/sql-reference/statements/watch.md
+++ b/docs/en/sql-reference/statements/watch.md
@@ -34,7 +34,7 @@ it can be forwarded to a different table.
 INSERT INTO [db.]table WATCH [db.]live_view ...
 ```
 
-## EVENTS Clause
+## EVENTS Clause {#events-clause}
 
 The `EVENTS` clause can be used to obtain a short form of the `WATCH` query
 where instead of the query result you will just get the latest query

From 0270b96ffb48d305ea2125aca995c5046fff842f Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 17:18:37 -0500
Subject: [PATCH 0854/1238] Adding example of using WATCH and WATCH ... EVENTS
 to live view description.

---
 docs/en/sql-reference/statements/create/view.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 5a5c77534fb..3544ad93aa5 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -99,6 +99,18 @@ When a live view query includes a subquery then the cached partial result is onl
    Only queries where one can combine partial result from the old data plus partial result from the new data will work.
    Live view will not work for queries that require the complete data set to compute the final result.
 
+You can watch for changes in the live view query result using the [WATCH](../../../sql-reference/statements/watch.md) query
+
+```sql
+WATCH [db.]live_view
+```
+
+or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause to just get change events.
+
+```sql
+WATCH [db.]live_view EVENTS
+```
+
 You can execute [SELECT](../../../sql-reference/statements/select/index.md) query on a live view
 in the same way as for any regular view or a table. If the query result is cached 
 it will return the result immediately without running the stored query on the underlying tables.

From 5769822c53aeca7ba772b8966322235a5e5192fe Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 17:28:31 -0500
Subject: [PATCH 0855/1238] Fixing rendering.

---
 .../sql-reference/statements/create/view.md   | 36 +++++--------------
 docs/en/sql-reference/statements/watch.md     | 24 ++++---------
 2 files changed, 15 insertions(+), 45 deletions(-)

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 3544ad93aa5..1d6621ff67d 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -74,30 +74,17 @@ There isn’t a separate query for deleting views. To delete a view, use [DROP T
 CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
 ```
 
-Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query
-and are updated any time the result of the query changes. Query result as well as partial result
-needed to combine with new data are stored in memory providing increased performance
-for repeated queries. Live views can provide push notifications
-when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
+Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance
+for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
 
 Live views are triggered by insert into the innermost table specified in the query. 
 
-!!! info "Note"
-    [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table.
+Live views work similarly to how a query in a distributed table works. But instead of combining partial results from different servers they combine partial result from current data with partial result from the new data. When a live view query includes a subquery then the cached partial result is only stored for the innermost subquery.
 
 !!! info "Note"
-    Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md)
-    or a [system table](../../../operations/system-tables/index.md)
-    will not trigger a live view. See [WITH REFRESH](#live-view-with-refresh) to enable periodic
-    updates of a live view.
-
-Live views work similarly to how a query in a distributed table works. But instead of combining partial results
-from different servers they combine partial result from current data with partial result from the new data.
-When a live view query includes a subquery then the cached partial result is only stored for the innermost subquery.
-
-!!! info "Note"
-   Only queries where one can combine partial result from the old data plus partial result from the new data will work.
-   Live view will not work for queries that require the complete data set to compute the final result.
+    - [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table.
+    - Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md) or a [system table](../../../operations/system-tables/index.md) will not trigger a live view. See [WITH REFRESH](#live-view-with-refresh) to enable periodic updates of a live view.
+    - Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result.
 
 You can watch for changes in the live view query result using the [WATCH](../../../sql-reference/statements/watch.md) query
 
@@ -111,9 +98,7 @@ or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause
 WATCH [db.]live_view EVENTS
 ```
 
-You can execute [SELECT](../../../sql-reference/statements/select/index.md) query on a live view
-in the same way as for any regular view or a table. If the query result is cached 
-it will return the result immediately without running the stored query on the underlying tables.
+You can execute [SELECT](../../../sql-reference/statements/select/index.md) query on a live view in the same way as for any regular view or a table. If the query result is cached it will return the result immediately without running the stored query on the underlying tables.
 
 ```sql
 SELECT * FROM [db.]live_view WHERE ...
@@ -125,9 +110,7 @@ You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRE
 
 ### With Timeout {#live-view-with-timeout}
 
-When a live view is create with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified
-number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query
-that was watching the live view. 
+When a live view is create with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query that was watching the live view. 
 
 ```sql
 CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
@@ -137,8 +120,7 @@ If the timeout value is not specified then the value specified by the `temporary
 
 ### With Refresh {#live-view-with-refresh}
 
-When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed
-after the specified number of seconds elapse since the last refresh or trigger.
+When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger.
 
 ```sql
 CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ...
diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md
index 480841cf1b9..10d2a2715fb 100644
--- a/docs/en/sql-reference/statements/watch.md
+++ b/docs/en/sql-reference/statements/watch.md
@@ -17,9 +17,7 @@ WATCH [db.]live_view
 [FORMAT format]
 ```
 
-The `WATCH` query performs continuous data retrieval from a [live view](./create/view.md#live-view) table.
-Unless the `LIMIT` clause is specified it provides an infinite stream of query results
-from a live view.
+The `WATCH` query performs continuous data retrieval from a [live view](./create/view.md#live-view) table. Unless the `LIMIT` clause is specified it provides an infinite stream of query results from a live view.
 
 ```sql
 WATCH [db.]live_view
@@ -27,8 +25,7 @@ WATCH [db.]live_view
 
 The virtual `_version` column in the query result indicates the current result version.
 
-By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../sql-reference/statements/insert-into.md)
-it can be forwarded to a different table.
+By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../sql-reference/statements/insert-into.md) it can be forwarded to a different table.
 
 ```sql
 INSERT INTO [db.]table WATCH [db.]live_view ...
@@ -36,9 +33,7 @@ INSERT INTO [db.]table WATCH [db.]live_view ...
 
 ## EVENTS Clause {#events-clause}
 
-The `EVENTS` clause can be used to obtain a short form of the `WATCH` query
-where instead of the query result you will just get the latest query
-result version.
+The `EVENTS` clause can be used to obtain a short form of the `WATCH` query where instead of the query result you will just get the latest query result version.
 
 ```sql
 WATCH [db.]live_view EVENTS LIMIT 1
@@ -46,14 +41,10 @@ WATCH [db.]live_view EVENTS LIMIT 1
 
 ## LIMIT Clause {#limit-clause}
 
-The `LIMIT n` clause species the number of updates the `WATCH` query should wait
-for before terminating. By default there is no limit on the number of updates and therefore
-the query will not terminate. The value of `0`
-indicates that the `WATCH` query should not wait for any new query results
-and therefore will return immediately once query is evaluated.
+The `LIMIT n` clause species the number of updates the `WATCH` query should wait for before terminating. By default there is no limit on the number of updates and therefore the query will not terminate. The value of `0` indicates that the `WATCH` query should not wait for any new query results and therefore will return immediately once query is evaluated.
 
 ```sql
-WATCH [db.]live_view LIMIT 1
+WATCH [db.]live_view LIMIT 2
 ```
 
 ## FORMAT Clause {#format-clause}
@@ -61,8 +52,5 @@ WATCH [db.]live_view LIMIT 1
 The `FORMAT` clause works the same way as for the [SELECT](./select/index.md#format-clause).
 
 !!! info "Note"
-    The [JSONEachRowWithProgress](../../interfaces/formats/#jsoneachrowwithprogress) format should be used when watching [live view](./create/view.md#live-view) 
-    tables over the HTTP interface. The progress messages will be added to the output
-    to keep the long-lived HTTP connection alive until the query result changes. 
-    The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
+    The [JSONEachRowWithProgress](../../interfaces/formats/#jsoneachrowwithprogress) format should be used when watching [live view](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
 

From a56ffcee1830e3452eaf064696cc8b8508b28ac5 Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 17:53:15 -0500
Subject: [PATCH 0856/1238] Fixing links in WATCH query docs.

---
 docs/en/sql-reference/statements/watch.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md
index 10d2a2715fb..71f26d71e85 100644
--- a/docs/en/sql-reference/statements/watch.md
+++ b/docs/en/sql-reference/statements/watch.md
@@ -17,7 +17,7 @@ WATCH [db.]live_view
 [FORMAT format]
 ```
 
-The `WATCH` query performs continuous data retrieval from a [live view](./create/view.md#live-view) table. Unless the `LIMIT` clause is specified it provides an infinite stream of query results from a live view.
+The `WATCH` query performs continuous data retrieval from a [live view](./create/view.md#live-view) table. Unless the `LIMIT` clause is specified it provides an infinite stream of query results from a [live view](./create/view.md#live-view).
 
 ```sql
 WATCH [db.]live_view
@@ -49,8 +49,8 @@ WATCH [db.]live_view LIMIT 2
 
 ## FORMAT Clause {#format-clause}
 
-The `FORMAT` clause works the same way as for the [SELECT](./select/index.md#format-clause).
+The `FORMAT` clause works the same way as for the [SELECT](./select/format.md).
 
 !!! info "Note"
-    The [JSONEachRowWithProgress](../../interfaces/formats/#jsoneachrowwithprogress) format should be used when watching [live view](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
+    The [JSONEachRowWithProgress](../../../interfaces/formats/#jsoneachrowwithprogress) format should be used when watching [live view](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
 

From 3d2788e1b5b622f96fd15dd4636eba30984d39fb Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Mon, 8 Feb 2021 19:23:32 -0500
Subject: [PATCH 0857/1238] Fixes and updates to live view docs.

---
 .../sql-reference/statements/create/view.md   | 21 ++++++++++++++-----
 docs/en/sql-reference/statements/watch.md     |  2 +-
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 1d6621ff67d..662a4b54754 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -74,17 +74,20 @@ There isn’t a separate query for deleting views. To delete a view, use [DROP T
 CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
 ```
 
-Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance
-for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
+Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
 
 Live views are triggered by insert into the innermost table specified in the query. 
 
 Live views work similarly to how a query in a distributed table works. But instead of combining partial results from different servers they combine partial result from current data with partial result from the new data. When a live view query includes a subquery then the cached partial result is only stored for the innermost subquery.
 
-!!! info "Note"
+!!! info "Limitations"
     - [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table.
-    - Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md) or a [system table](../../../operations/system-tables/index.md) will not trigger a live view. See [WITH REFRESH](#live-view-with-refresh) to enable periodic updates of a live view.
-    - Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result.
+    - Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md), [system table](../../../operations/system-tables/index.md), a [normal view](#normal), or a [materialized view](#materialized) will not trigger a live view.
+    - Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result or aggregations where the state of the aggregation must be preserved.
+    - Does not work with replicated or distributed tables where inserts are performed on different nodes.
+    - Can't be triggered by multiple tables.
+
+    See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround.
 
 You can watch for changes in the live view query result using the [WATCH](../../../sql-reference/statements/watch.md) query
 
@@ -133,6 +136,14 @@ You can combine `WITH TIMEOUT` and `WITH REFRESH` clauses using an `AND` clause.
 ```sql
 CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
 ```
+### Usage
+
+Most common uses of live view tables include:
+
+- Providing push notifications for query result changes to avoid polling.
+- Caching results of most frequent queries to provide immediate query results.
+- Watching for table changes and triggering a follow-up select queries.
+- Watching metrics from system tables using periodic refresh.
 
 ### Settings {#live-view-settings}
 
diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md
index 71f26d71e85..07b050d4c4e 100644
--- a/docs/en/sql-reference/statements/watch.md
+++ b/docs/en/sql-reference/statements/watch.md
@@ -49,7 +49,7 @@ WATCH [db.]live_view LIMIT 2
 
 ## FORMAT Clause {#format-clause}
 
-The `FORMAT` clause works the same way as for the [SELECT](./select/format.md).
+The `FORMAT` clause works the same way as for the [SELECT](../../sql-reference/statements/select/format.md#format-clause).
 
 !!! info "Note"
     The [JSONEachRowWithProgress](../../../interfaces/formats/#jsoneachrowwithprogress) format should be used when watching [live view](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.

From 8911a038a06b4dd77cec73243a58d980aeaf3724 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Tue, 9 Feb 2021 11:46:06 +0800
Subject: [PATCH 0858/1238] Another fix

---
 src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp | 2 +-
 src/Interpreters/SelectQueryOptions.h               | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
index affd9d0678f..e6061aabe94 100644
--- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
+++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
@@ -96,7 +96,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
 
         ASTPtr subquery_select = subquery.children.at(0);
 
-        auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1);
+        auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1, true);
         options.analyze(data.only_analyze);
 
         auto interpreter = InterpreterSelectWithUnionQuery(subquery_select, subquery_context, options);
diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h
index 124b5a6daa0..b21d27ef5c8 100644
--- a/src/Interpreters/SelectQueryOptions.h
+++ b/src/Interpreters/SelectQueryOptions.h
@@ -35,8 +35,8 @@ struct SelectQueryOptions
     bool is_internal = false;
     bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select
 
-    SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0)
-        : to_stage(stage), subquery_depth(depth)
+    SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0, bool is_subquery_ = false)
+        : to_stage(stage), subquery_depth(depth), is_subquery(is_subquery_)
     {
     }
 

From b297e0ef361d737f3b9e24485097c9a234720401 Mon Sep 17 00:00:00 2001
From: Russ Frank <rf@oden.io>
Date: Mon, 8 Feb 2021 22:48:56 -0500
Subject: [PATCH 0859/1238] feedback: use references, dont support decimal,
 rearrange struct members

---
 .../AggregateFunctionDeltaSum.cpp             | 10 ++--
 .../AggregateFunctionDeltaSum.h               | 59 +++++++++++++------
 .../0_stateless/01700_deltasum.reference      |  4 +-
 tests/queries/0_stateless/01700_deltasum.sql  |  2 +
 4 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
index aeb2549e826..231b730d1aa 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
@@ -30,13 +30,13 @@ AggregateFunctionPtr createAggregateFunctionDeltaSum(
 
     DataTypePtr data_type = arguments[0];
 
-    if (!isNumber(data_type))
+    if (isInteger(data_type) || isFloat(data_type))
+        return AggregateFunctionPtr(createWithNumericType<AggregationFunctionDeltaSum>(
+            *data_type, arguments, params));
+    else
         throw Exception("Illegal type " + arguments[0]->getName() + " of argument for aggregate function " + name,
-                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-
-    return AggregateFunctionPtr(createWithNumericType<AggregationFunctionDeltaSum>(*arguments[0], arguments, params));
+            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 }
-
 }
 
 void registerAggregateFunctionDeltaSum(AggregateFunctionFactory & factory)
diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
index 7d384438912..af745165379 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
@@ -15,14 +15,17 @@
 
 namespace DB
 {
+template <typename T>
+using DecimalOrVectorCol = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
+
 template <typename T>
 struct AggregationFunctionDeltaSumData
 {
     T sum = 0;
-    bool seen_last = false;
     T last = 0;
-    bool seen_first = false;
     T first = 0;
+    bool seen_last = false;
+    bool seen_first = false;
 };
 
 template <typename T>
@@ -32,9 +35,11 @@ class AggregationFunctionDeltaSum final
 public:
     AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params)
         : IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{arguments, params}
-    {
-        // empty constructor
-    }
+    {}
+
+    AggregationFunctionDeltaSum()
+        : IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{}
+    {}
 
     String getName() const override { return "deltaSum"; }
 
@@ -42,7 +47,7 @@ public:
 
     void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
-        auto value = static_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
+        auto value = assert_cast<const DecimalOrVectorCol<T> &>(*columns[0]).getData()[row_num];
 
         if ((this->data(place).last < value) && this->data(place).seen_last)
         {
@@ -61,24 +66,42 @@ public:
 
     void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
     {
-        if ((this->data(place).last < this->data(rhs).first) && this->data(place).seen_last && this->data(rhs).seen_first)
+        auto place_data = &this->data(place);
+        auto rhs_data = &this->data(rhs);
+
+        if ((place_data->last < rhs_data->first) && place_data->seen_last && rhs_data->seen_first)
         {
-            this->data(place).sum += this->data(rhs).sum + (this->data(rhs).first - this->data(place).last);
-            this->data(place).last = this->data(rhs).last;
+            // If the lhs last number seen is less than the first number the rhs saw, the lhs is before
+            // the rhs, for example [0, 2] [4, 7]. So we want to add the deltasums, but also add the
+            // difference between lhs last number and rhs first number (the 2 and 4). Then we want to
+            // take last value from the rhs, so first and last become 0 and 7.
+
+            place_data->sum += rhs_data->sum + (rhs_data->first - place_data->last);
+            place_data->last = rhs_data->last;
         }
-        else if ((this->data(rhs).last < this->data(place).first && this->data(rhs).seen_last && this->data(place).seen_first))
+        else if ((rhs_data->last < place_data->first && rhs_data->seen_last && place_data->seen_first))
         {
-            this->data(place).sum += this->data(rhs).sum + (this->data(place).first - this->data(rhs).last);
-            this->data(place).first = this->data(rhs).first;
+            // In the opposite scenario, the lhs comes after the rhs, e.g. [4, 6] [1, 2]. Since we
+            // assume the input interval states are sorted by time, we assume this is a counter
+            // reset, and therefore do *not* add the difference between our first value and the 
+            // rhs last value.
+
+            place_data->sum += rhs_data->sum;
+            place_data->first = rhs_data->first;
         }
-        else
+        else if (rhs_data->seen_first)
         {
-            this->data(place).sum += this->data(rhs).sum;
-            this->data(place).first = this->data(rhs).first;
-            this->data(place).seen_first = this->data(rhs).seen_first;
-            this->data(place).last = this->data(rhs).last;
-            this->data(place).seen_last = this->data(rhs).seen_last;
+            // If we're here then the lhs is an empty state and the rhs does have some state, so
+            // we'll just take that state.
+
+            place_data->first = rhs_data->first;
+            place_data->seen_first = rhs_data->seen_first;
+            place_data->last = rhs_data->last;
+            place_data->seen_last = rhs_data->seen_last;
+            place_data->sum = rhs_data->sum;
         }
+
+        // Otherwise lhs either has data or is unitialized, so we don't need to modify its values.
     }
 
     void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
diff --git a/tests/queries/0_stateless/01700_deltasum.reference b/tests/queries/0_stateless/01700_deltasum.reference
index d442bc1de2e..be5b176c627 100644
--- a/tests/queries/0_stateless/01700_deltasum.reference
+++ b/tests/queries/0_stateless/01700_deltasum.reference
@@ -4,4 +4,6 @@
 7
 7
 5
-5
+2
+2.25
+6.5
diff --git a/tests/queries/0_stateless/01700_deltasum.sql b/tests/queries/0_stateless/01700_deltasum.sql
index a1447cd3c7c..93edb2e477d 100644
--- a/tests/queries/0_stateless/01700_deltasum.sql
+++ b/tests/queries/0_stateless/01700_deltasum.sql
@@ -5,3 +5,5 @@ select deltaSum(arrayJoin([1, 2, 3, 0, 3, 3, 3, 3, 3, 4, 2, 3]));
 select deltaSum(arrayJoin([1, 2, 3, 0, 0, 0, 0, 3, 3, 3, 3, 3, 4, 2, 3]));
 select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([0, 1])) as rows union all select deltaSumState(arrayJoin([4, 5])) as rows);
 select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([4, 5])) as rows union all select deltaSumState(arrayJoin([0, 1])) as rows);
+select deltaSum(arrayJoin([2.25, 3, 4.5]));
+select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([0.1, 0.3, 0.5])) as rows union all select deltaSumState(arrayJoin([4.1, 5.1, 6.6])) as rows);

From 2cde36a155d681c1c6c6cce3e08b86732edfd984 Mon Sep 17 00:00:00 2001
From: Russ Frank <rf@oden.io>
Date: Mon, 8 Feb 2021 22:53:45 -0500
Subject: [PATCH 0860/1238] dont allow col type to be decimal

---
 src/AggregateFunctions/AggregateFunctionDeltaSum.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
index af745165379..9bca24a63ca 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
@@ -15,8 +15,6 @@
 
 namespace DB
 {
-template <typename T>
-using DecimalOrVectorCol = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
 
 template <typename T>
 struct AggregationFunctionDeltaSumData
@@ -47,7 +45,7 @@ public:
 
     void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
-        auto value = assert_cast<const DecimalOrVectorCol<T> &>(*columns[0]).getData()[row_num];
+        auto value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
 
         if ((this->data(place).last < value) && this->data(place).seen_last)
         {

From 79214897de378b4c4324778c0d01fb92842f6b41 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Tue, 9 Feb 2021 10:33:17 +0300
Subject: [PATCH 0861/1238] Run queries in storage join twice (issue #16991)

---
 .../01051_new_any_join_engine.reference       | 31 +++++++++++++++++++
 .../0_stateless/01051_new_any_join_engine.sql | 23 ++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/tests/queries/0_stateless/01051_new_any_join_engine.reference b/tests/queries/0_stateless/01051_new_any_join_engine.reference
index 635ae641a63..a20fea88656 100644
--- a/tests/queries/0_stateless/01051_new_any_join_engine.reference
+++ b/tests/queries/0_stateless/01051_new_any_join_engine.reference
@@ -29,3 +29,34 @@ anti left
 3	a4	
 anti right
 5		b6
+any left
+0	a1	
+1	a2	
+2	a3	b1
+3	a4	
+4	a5	b3
+any inner
+2	a3	b1
+4	a5	b3
+any right
+2	a3	b1
+2	a3	b2
+4	a5	b3
+4	a5	b4
+4	a5	b5
+5		b6
+semi left
+2	a3	b1
+4	a5	b3
+semi right
+2	a3	b1
+2	a3	b2
+4	a5	b3
+4	a5	b4
+4	a5	b5
+anti left
+0	a1	
+1	a2	
+3	a4	
+anti right
+5		b6
diff --git a/tests/queries/0_stateless/01051_new_any_join_engine.sql b/tests/queries/0_stateless/01051_new_any_join_engine.sql
index 8662d8532d4..a687a6494b5 100644
--- a/tests/queries/0_stateless/01051_new_any_join_engine.sql
+++ b/tests/queries/0_stateless/01051_new_any_join_engine.sql
@@ -57,6 +57,29 @@ SELECT * FROM t1 ANTI LEFT JOIN anti_left_join j USING(x) ORDER BY x, str, s;
 SELECT 'anti right';
 SELECT * FROM t1 ANTI RIGHT JOIN anti_right_join j USING(x) ORDER BY x, str, s;
 
+-- run queries once more time (issue #16991)
+
+SELECT 'any left';
+SELECT * FROM t1 ANY LEFT JOIN any_left_join j USING(x) ORDER BY x, str, s;
+
+SELECT 'any inner';
+SELECT * FROM t1 ANY INNER JOIN any_inner_join j USING(x) ORDER BY x, str, s;
+
+SELECT 'any right';
+SELECT * FROM t1 ANY RIGHT JOIN any_right_join j USING(x) ORDER BY x, str, s;
+
+SELECT 'semi left';
+SELECT * FROM t1 SEMI LEFT JOIN semi_left_join j USING(x) ORDER BY x, str, s;
+
+SELECT 'semi right';
+SELECT * FROM t1 SEMI RIGHT JOIN semi_right_join j USING(x) ORDER BY x, str, s;
+
+SELECT 'anti left';
+SELECT * FROM t1 ANTI LEFT JOIN anti_left_join j USING(x) ORDER BY x, str, s;
+
+SELECT 'anti right';
+SELECT * FROM t1 ANTI RIGHT JOIN anti_right_join j USING(x) ORDER BY x, str, s;
+
 DROP TABLE t1;
 
 DROP TABLE any_left_join;

From 241769ee7ca5c04f5fcb7f11c8b6b974434db98c Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Tue, 9 Feb 2021 11:21:26 +0300
Subject: [PATCH 0862/1238] Handle syntax error for ARRAY JOIN with no args

---
 src/Interpreters/TreeRewriter.cpp                  | 14 +++++++++-----
 .../queries/0_stateless/00011_array_join_alias.sql |  3 ++-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index ce4103e97ec..c4fc40785f6 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -40,13 +40,14 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int EMPTY_NESTED_TABLE;
-    extern const int LOGICAL_ERROR;
-    extern const int INVALID_JOIN_ON_EXPRESSION;
     extern const int EMPTY_LIST_OF_COLUMNS_QUERIED;
-    extern const int NOT_IMPLEMENTED;
-    extern const int UNKNOWN_IDENTIFIER;
+    extern const int EMPTY_NESTED_TABLE;
     extern const int EXPECTED_ALL_OR_ANY;
+    extern const int INVALID_JOIN_ON_EXPRESSION;
+    extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int UNKNOWN_IDENTIFIER;
 }
 
 namespace
@@ -327,6 +328,9 @@ void getArrayJoinedColumns(ASTPtr & query, TreeRewriterResult & result, const AS
         /// to get the correct number of rows.
         if (result.array_join_result_to_source.empty())
         {
+            if (select_query->arrayJoinExpressionList()->children.empty())
+                throw DB::Exception("ARRAY JOIN requires an argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
             ASTPtr expr = select_query->arrayJoinExpressionList()->children.at(0);
             String source_name = expr->getColumnName();
             String result_name = expr->getAliasOrColumnName();
diff --git a/tests/queries/0_stateless/00011_array_join_alias.sql b/tests/queries/0_stateless/00011_array_join_alias.sql
index 228038c1509..5eafeddb8fe 100644
--- a/tests/queries/0_stateless/00011_array_join_alias.sql
+++ b/tests/queries/0_stateless/00011_array_join_alias.sql
@@ -1 +1,2 @@
-SELECT x, a FROM (SELECT arrayJoin(['Hello', 'Goodbye']) AS x, [1, 2, 3] AS arr) ARRAY JOIN arr AS a
+SELECT x, a FROM (SELECT arrayJoin(['Hello', 'Goodbye']) AS x, [1, 2, 3] AS arr) ARRAY JOIN; -- { serverError 42 }
+SELECT x, a FROM (SELECT arrayJoin(['Hello', 'Goodbye']) AS x, [1, 2, 3] AS arr) ARRAY JOIN arr AS a;

From 15ce3dc1124445c1edd7dde26ca998ed67816150 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 9 Feb 2021 11:24:39 +0300
Subject: [PATCH 0863/1238] Fix access control manager destruction order

---
 src/Interpreters/Context.cpp | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 5c99d39dc2e..ca4a313da62 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -331,7 +331,7 @@ struct ContextShared
     mutable std::optional<ExternalModelsLoader> external_models_loader;
     String default_profile_name;                            /// Default profile name used for default values.
     String system_profile_name;                             /// Profile used by system processes
-    AccessControlManager access_control_manager;
+    std::unique_ptr<AccessControlManager> access_control_manager;
     mutable UncompressedCachePtr uncompressed_cache;        /// The cache of decompressed blocks.
     mutable MarkCachePtr mark_cache;                        /// Cache of marks in compressed files.
     ProcessList process_list;                               /// Executing queries at the moment.
@@ -388,7 +388,8 @@ struct ContextShared
     Context::ConfigReloadCallback config_reload_callback;
 
     ContextShared()
-        : macros(std::make_unique<Macros>())
+        : access_control_manager(std::make_unique<AccessControlManager>())
+        , macros(std::make_unique<Macros>())
     {
         /// TODO: make it singleton (?)
         static std::atomic<size_t> num_calls{0};
@@ -434,6 +435,7 @@ struct ContextShared
         /// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference).
         /// TODO: Get rid of this.
 
+        access_control_manager.reset();
         system_logs.reset();
         embedded_dictionaries.reset();
         external_dictionaries_loader.reset();
@@ -640,7 +642,7 @@ void Context::setConfig(const ConfigurationPtr & config)
 {
     auto lock = getLock();
     shared->config = config;
-    shared->access_control_manager.setExternalAuthenticatorsConfig(*shared->config);
+    shared->access_control_manager->setExternalAuthenticatorsConfig(*shared->config);
 }
 
 const Poco::Util::AbstractConfiguration & Context::getConfigRef() const
@@ -652,25 +654,25 @@ const Poco::Util::AbstractConfiguration & Context::getConfigRef() const
 
 AccessControlManager & Context::getAccessControlManager()
 {
-    return shared->access_control_manager;
+    return *shared->access_control_manager;
 }
 
 const AccessControlManager & Context::getAccessControlManager() const
 {
-    return shared->access_control_manager;
+    return *shared->access_control_manager;
 }
 
 void Context::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config)
 {
     auto lock = getLock();
-    shared->access_control_manager.setExternalAuthenticatorsConfig(config);
+    shared->access_control_manager->setExternalAuthenticatorsConfig(config);
 }
 
 void Context::setUsersConfig(const ConfigurationPtr & config)
 {
     auto lock = getLock();
     shared->users_config = config;
-    shared->access_control_manager.setUsersConfig(*shared->users_config);
+    shared->access_control_manager->setUsersConfig(*shared->users_config);
 }
 
 ConfigurationPtr Context::getUsersConfig()

From 798ffdc71948f617a3fa5c43409ccaa792db2432 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 9 Feb 2021 11:37:55 +0300
Subject: [PATCH 0864/1238] Fix build.

---
 src/Interpreters/inplaceBlockConversions.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Interpreters/inplaceBlockConversions.h b/src/Interpreters/inplaceBlockConversions.h
index 3c224968315..8f72e5fe862 100644
--- a/src/Interpreters/inplaceBlockConversions.h
+++ b/src/Interpreters/inplaceBlockConversions.h
@@ -2,6 +2,7 @@
 
 #include <unordered_map>
 #include <string>
+#include <memory>
 
 
 namespace DB

From 8da9f150de807e5d8181c32ddd50e7250ffcb1d2 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 9 Feb 2021 11:51:17 +0300
Subject: [PATCH 0865/1238] Fix race in system.parts

---
 src/Storages/System/StorageSystemParts.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp
index bc5a96c6159..45e1663cb93 100644
--- a/src/Storages/System/StorageSystemParts.cpp
+++ b/src/Storages/System/StorageSystemParts.cpp
@@ -193,8 +193,9 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto
         add_ttl_info_map(part->ttl_infos.rows_where_ttl);
 
         /// _state column should be the latest.
+        /// Do not use part->getState*, it can be changed from different thread
         if (has_state_column)
-            columns_[i++]->insert(part->stateString());
+            columns_[i++]->insert(IMergeTreeDataPart::stateToString(part_state));
     }
 }
 

From cb1194d37a9cfd32ad44aae041a382f77a37edaf Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 9 Feb 2021 12:05:54 +0300
Subject: [PATCH 0866/1238] Add final to some classes

---
 src/IO/WriteBufferFromArena.h                | 2 +-
 src/Interpreters/DDLWorker.cpp               | 2 +-
 src/Interpreters/InterpreterKillQueryQuery.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/IO/WriteBufferFromArena.h b/src/IO/WriteBufferFromArena.h
index b5fd9fac5a3..0e8a11fb5d6 100644
--- a/src/IO/WriteBufferFromArena.h
+++ b/src/IO/WriteBufferFromArena.h
@@ -13,7 +13,7 @@ namespace DB
   *
   * While using this object, no other allocations in arena are possible.
   */
-class WriteBufferFromArena : public WriteBuffer
+class WriteBufferFromArena final : public WriteBuffer
 {
 private:
     Arena & arena;
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index e18b92b4bd5..05370a6a3b7 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -1203,7 +1203,7 @@ void DDLWorker::runCleanupThread()
 }
 
 
-class DDLQueryStatusInputStream : public IBlockInputStream
+class DDLQueryStatusInputStream final : public IBlockInputStream
 {
 public:
 
diff --git a/src/Interpreters/InterpreterKillQueryQuery.h b/src/Interpreters/InterpreterKillQueryQuery.h
index e1ea23d3dec..788703f8e6d 100644
--- a/src/Interpreters/InterpreterKillQueryQuery.h
+++ b/src/Interpreters/InterpreterKillQueryQuery.h
@@ -12,7 +12,7 @@ class Context;
 class AccessRightsElements;
 
 
-class InterpreterKillQueryQuery : public IInterpreter
+class InterpreterKillQueryQuery final : public IInterpreter
 {
 public:
     InterpreterKillQueryQuery(const ASTPtr & query_ptr_, Context & context_)

From 28b981a76b5b1033993b9f3ec8badee4a5526203 Mon Sep 17 00:00:00 2001
From: hexiaoting <hewenting_ict@163.com>
Date: Tue, 9 Feb 2021 18:08:55 +0800
Subject: [PATCH 0867/1238] Fix style error and test cases error

---
 src/Interpreters/CollectJoinOnKeysVisitor.cpp          | 10 ++++++++--
 src/Interpreters/CollectJoinOnKeysVisitor.h            |  1 +
 src/Interpreters/TreeRewriter.cpp                      |  3 +++
 .../00878_join_unexpected_results.reference            |  2 ++
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
index 8b5fbeef7eb..ec413fe08fc 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
@@ -80,6 +80,9 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
         ASTPtr right = func.arguments->children.at(1);
         auto table_numbers = getTableNumbers(left, right, data);
 
+        if (table_numbers.first != table_numbers.second && table_numbers.first > 0 && table_numbers.second > 0)
+            data.new_on_expression_valid = true;
+
         /**
           * if this is an inner join and the expression related to less than 2 tables, then move it to WHERE
           */
@@ -108,6 +111,9 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
             ASTPtr right = func.arguments->children.at(1);
             auto table_numbers = getTableNumbers(left, right, data);
 
+            if (table_numbers.first != table_numbers.second && table_numbers.first > 0 && table_numbers.second > 0)
+                data.new_on_expression_valid = true;
+
             if (data.kind == ASTTableJoin::Kind::Inner
                 && (table_numbers.first == table_numbers.second || table_numbers.first == 0 || table_numbers.second == 0))
             {
@@ -116,7 +122,7 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
                 else
                     data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone());
 
-		return;
+                return;
             }
             else
             {
@@ -127,7 +133,7 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
 
         if (data.asof_left_key || data.asof_right_key)
             throw Exception("ASOF JOIN expects exactly one inequality in ON section. Unexpected '" + queryToString(ast) + "'",
-                            ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
+                ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
 
         ASTPtr left = func.arguments->children.at(0);
         ASTPtr right = func.arguments->children.at(1);
diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h
index aa2fd80d07c..64547baf7d7 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.h
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.h
@@ -37,6 +37,7 @@ public:
         ASTPtr new_on_expression{};
         ASTPtr new_where_conditions{};
         bool has_some{false};
+        bool new_on_expression_valid{false};
 
         void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair<size_t, size_t> & table_no);
         void addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair<size_t, size_t> & table_no,
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 332734e4ca6..9f788703704 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -425,6 +425,9 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele
                             ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
         if (is_asof)
             data.asofToJoinKeys();
+        else if (!data.new_on_expression_valid)
+            throw Exception("JOIN expects left and right joined keys from two joined table in ON section. Unexpected '" + queryToString(data.new_on_expression) + "'",
+                ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
         else if (data.new_where_conditions != nullptr)
         {
             table_join.on_expression = data.new_on_expression;
diff --git a/tests/queries/0_stateless/00878_join_unexpected_results.reference b/tests/queries/0_stateless/00878_join_unexpected_results.reference
index aaf586c2767..65fcbc257ca 100644
--- a/tests/queries/0_stateless/00878_join_unexpected_results.reference
+++ b/tests/queries/0_stateless/00878_join_unexpected_results.reference
@@ -23,6 +23,7 @@ join_use_nulls = 1
 -
 \N	\N
 -
+1	1	\N	\N
 2	2	\N	\N
 -
 1	1	1	1
@@ -50,6 +51,7 @@ join_use_nulls = 0
 -
 -
 -
+1	1	0	0
 2	2	0	0
 -
 1	1	1	1

From 4859657c423c02770da8d6c513e0e42b05f42ccd Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Tue, 9 Feb 2021 13:21:54 +0300
Subject: [PATCH 0868/1238] fix int field to decimal conversion

---
 src/DataTypes/DataTypeDecimalBase.h                 | 13 ++++++++-----
 .../01178_int_field_to_decimal.reference            |  2 ++
 .../0_stateless/01178_int_field_to_decimal.sql      | 10 ++++++++++
 3 files changed, 20 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/01178_int_field_to_decimal.reference
 create mode 100644 tests/queries/0_stateless/01178_int_field_to_decimal.sql

diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h
index d9128151403..c861b3bcac0 100644
--- a/src/DataTypes/DataTypeDecimalBase.h
+++ b/src/DataTypes/DataTypeDecimalBase.h
@@ -120,14 +120,17 @@ public:
         return DecimalUtils::getFractionalPart(x, scale);
     }
 
-    T maxWholeValue() const { return getScaleMultiplier(maxPrecision() - scale) - T(1); }
+    T maxWholeValue() const { return getScaleMultiplier(precision - scale) - T(1); }
 
-    bool canStoreWhole(T x) const
+    template<typename U>
+    bool canStoreWhole(U x) const
     {
+        static_assert(std::is_signed_v<typename T::NativeType>);
         T max = maxWholeValue();
-        if (x > max || x < -max)
-            return false;
-        return true;
+        if constexpr (std::is_signed_v<U>)
+            return -max <= x && x <= max;
+        else
+            return x <= static_cast<std::make_unsigned_t<typename T::NativeType>>(max.value);
     }
 
     /// @returns multiplier for U to become T with correct scale
diff --git a/tests/queries/0_stateless/01178_int_field_to_decimal.reference b/tests/queries/0_stateless/01178_int_field_to_decimal.reference
new file mode 100644
index 00000000000..6c256ba2032
--- /dev/null
+++ b/tests/queries/0_stateless/01178_int_field_to_decimal.reference
@@ -0,0 +1,2 @@
+9.00000000
+10.00000000
diff --git a/tests/queries/0_stateless/01178_int_field_to_decimal.sql b/tests/queries/0_stateless/01178_int_field_to_decimal.sql
new file mode 100644
index 00000000000..bbd72e57d70
--- /dev/null
+++ b/tests/queries/0_stateless/01178_int_field_to_decimal.sql
@@ -0,0 +1,10 @@
+select d from values('d Decimal(8, 8)', 0, 1) where d not in (-1, 0); -- { serverError 69 }
+select d from values('d Decimal(8, 8)', 0, 2) where d not in (1, 0); -- { serverError 69 }
+select d from values('d Decimal(9, 8)', 0, 3) where d not in (-9223372036854775808, 0); -- { serverError 69 }
+select d from values('d Decimal(9, 8)', 0, 4) where d not in (18446744073709551615, 0); -- { serverError 69 }
+select d from values('d Decimal(18, 8)', 0, 5) where d not in (-9223372036854775808, 0); -- { serverError 69 }
+select d from values('d Decimal(18, 8)', 0, 6) where d not in (18446744073709551615, 0); -- { serverError 69 }
+select d from values('d Decimal(26, 8)', 0, 7) where d not in (-9223372036854775808, 0); -- { serverError 69 }
+select d from values('d Decimal(27, 8)', 0, 8) where d not in (18446744073709551615, 0); -- { serverError 69 }
+select d from values('d Decimal(27, 8)', 0, 9) where d not in (-9223372036854775808, 0);
+select d from values('d Decimal(28, 8)', 0, 10) where d not in (18446744073709551615, 0);

From 34cc9d54308fc24d93c33c876d2db7833589bdea Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 9 Feb 2021 14:54:13 +0300
Subject: [PATCH 0869/1238] Update 01711_cte_subquery_fix.sql

---
 tests/queries/0_stateless/01711_cte_subquery_fix.sql | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01711_cte_subquery_fix.sql b/tests/queries/0_stateless/01711_cte_subquery_fix.sql
index 19eed6cc612..ddea548eada 100644
--- a/tests/queries/0_stateless/01711_cte_subquery_fix.sql
+++ b/tests/queries/0_stateless/01711_cte_subquery_fix.sql
@@ -1,2 +1,3 @@
-create or replace table t engine = Memory as with cte as (select * from numbers(10)) select * from cte;
+drop table if exists t;
+create table t engine = Memory as with cte as (select * from numbers(10)) select * from cte;
 drop table t;

From fe8b414a693e591825c05ec93f8043d5efffb117 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Mon, 8 Feb 2021 17:15:35 +0300
Subject: [PATCH 0870/1238] cleanup

---
 src/Processors/Transforms/WindowTransform.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 568a1c78b0f..bb52b8779ba 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -65,12 +65,15 @@ WindowTransform::WindowTransform(const Block & input_header_,
     }
 
 // FIXME this is just all wrong. Disabled desc order for now.
-    if (window_description.frame.type == WindowFrame::FrameType::Range
+    const auto & frame = window_description.frame;
+    if (frame.type == WindowFrame::FrameType::Range
         && window_description.order_by.size() == 1
-        && window_description.order_by[0].direction < 0)
+        && window_description.order_by[0].direction < 0
+        && (frame.begin_type == WindowFrame::BoundaryType::Offset
+            || frame.end_type == WindowFrame::BoundaryType::Offset))
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-            "ORDER BY DESC for RANGE frames is not implemented");
+            "ORDER BY DESC for RANGE OFFSET frames is not implemented");
     }
 //     // If we have at least one RANGE OFFSET frame boundary, no UNBOUNDED frame
 //     // boundaries, and the ORDER BY is DESC, we have to swap the frame end
@@ -417,7 +420,7 @@ void WindowTransform::advanceFrameStartRangeOffsetDispatch()
     {
         advanceFrameStartRangeOffset<ColumnVector<UInt8>>();
     }
-    else if(typeid_cast<const ColumnVector<Int8> *>(column))
+    else if (typeid_cast<const ColumnVector<Int8> *>(column))
     {
         advanceFrameStartRangeOffset<ColumnVector<Int8>>();
     }

From 6e40b9fb6c0eb0bfa2188af0fa146a55b79b4ad3 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 9 Feb 2021 14:56:11 +0300
Subject: [PATCH 0871/1238] fix for the DESC frame

---
 src/Interpreters/WindowDescription.cpp        |  16 ++-
 src/Interpreters/WindowDescription.h          |   8 +-
 src/Processors/Transforms/WindowTransform.cpp | 111 +++++++-----------
 .../01591_window_functions.reference          |  49 ++++++++
 .../0_stateless/01591_window_functions.sql    |  19 +++
 5 files changed, 127 insertions(+), 76 deletions(-)

diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp
index b5fe91188fd..a410ba0de1b 100644
--- a/src/Interpreters/WindowDescription.cpp
+++ b/src/Interpreters/WindowDescription.cpp
@@ -54,7 +54,9 @@ void WindowFrame::toString(WriteBuffer & buf) const
     }
     else if (begin_type == BoundaryType::Unbounded)
     {
-        buf << "UNBOUNDED PRECEDING";
+        buf << "UNBOUNDED";
+        buf << " "
+            << (begin_preceding ? "PRECEDING" : "FOLLOWING");
     }
     else
     {
@@ -69,7 +71,9 @@ void WindowFrame::toString(WriteBuffer & buf) const
     }
     else if (end_type == BoundaryType::Unbounded)
     {
-        buf << "UNBOUNDED PRECEDING";
+        buf << "UNBOUNDED";
+        buf << " "
+            << (end_preceding ? "PRECEDING" : "FOLLOWING");
     }
     else
     {
@@ -119,10 +123,10 @@ void WindowFrame::checkValid() const
     {
         if (!(end_preceding && !begin_preceding))
         {
-            if (begin_offset <= end_offset)
-            {
-                return;
-            }
+            // Should probably check here that starting offset is less than
+            // ending offset, but with regard to ORDER BY direction for RANGE
+            // frames.
+            return;
         }
     }
 
diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h
index 9388a4c7cf8..ee181680b94 100644
--- a/src/Interpreters/WindowDescription.h
+++ b/src/Interpreters/WindowDescription.h
@@ -38,8 +38,9 @@ struct WindowFrame
 
     FrameType type = FrameType::Range;
 
-    // UNBOUNDED FOLLOWING for the frame end doesn't make much sense, so
-    // Unbounded here means UNBOUNDED PRECEDING.
+    // UNBOUNDED FOLLOWING for the frame end is forbidden by the standard, but for
+    // uniformity the begin_preceding still has to be set to true for UNBOUNDED
+    // frame start.
     // Offset might be both preceding and following, controlled by begin_preceding,
     // but the offset value must be positive.
     BoundaryType begin_type = BoundaryType::Unbounded;
@@ -47,7 +48,8 @@ struct WindowFrame
     int64_t begin_offset = 0;
     bool begin_preceding = true;
 
-    // Here as well, Unbounded is UNBOUNDED FOLLOWING.
+    // Here as well, Unbounded can only be UNBOUNDED FOLLOWING, and end_preceding
+    // must be false.
     BoundaryType end_type = BoundaryType::Current;
     int64_t end_offset = 0;
     bool end_preceding = false;
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index bb52b8779ba..944c8047be6 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -63,38 +63,6 @@ WindowTransform::WindowTransform(const Block & input_header_,
         order_by_indices.push_back(
             input_header.getPositionByName(column.column_name));
     }
-
-// FIXME this is just all wrong. Disabled desc order for now.
-    const auto & frame = window_description.frame;
-    if (frame.type == WindowFrame::FrameType::Range
-        && window_description.order_by.size() == 1
-        && window_description.order_by[0].direction < 0
-        && (frame.begin_type == WindowFrame::BoundaryType::Offset
-            || frame.end_type == WindowFrame::BoundaryType::Offset))
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-            "ORDER BY DESC for RANGE OFFSET frames is not implemented");
-    }
-//     // If we have at least one RANGE OFFSET frame boundary, no UNBOUNDED frame
-//     // boundaries, and the ORDER BY is DESC, we have to swap the frame end
-//     // and frame start. This is tricky and I'm not sure how to explain the
-//     // reason, so I can only suggest to draw various RANGE OFFSET frames with
-//     // ASC and DESC orders to understand...
-//     // swap is a no-op if both frames are CURRENT ROW, so use a simpler condition.
-//     auto & frame = window_description.frame;
-//     if (frame.type == WindowFrame::FrameType::Range
-//         && (frame.end_type != WindowFrame::BoundaryType::Unbounded
-//             && frame.begin_type != WindowFrame::BoundaryType::Unbounded)
-//         && window_description.order_by.size() == 1
-//         && window_description.order_by[0].direction < 0)
-//     {
-//         std::swap(frame.begin_type, frame.end_type);
-//         std::swap(frame.begin_preceding, frame.end_preceding);
-//         std::swap(frame.begin_offset, frame.end_offset);
-//
-//         fmt::print(stderr, "swapped frame boundaries\n");
-//     }
-
 }
 
 WindowTransform::~WindowTransform()
@@ -357,11 +325,12 @@ static int compareValuesWithOffset(const ColumnType * compared_column,
         overflow_to_negative = offset < 0;
     }
 
-    fmt::print(stderr,
-        "compared [{}] = {}, ref [{}] = {}, offset {} overflow {} to negative {}\n",
-        compared_row, toString(compared_value),
-        reference_row, toString(reference_value),
-        toString(offset), is_overflow, overflow_to_negative);
+//    fmt::print(stderr,
+//        "compared [{}] = {}, ref [{}] = {}, offset {} preceding {} overflow {} to negative {}\n",
+//        compared_row, toString(compared_value),
+//        reference_row, toString(reference_value),
+//        toString(offset), offset_is_preceding,
+//        is_overflow, overflow_to_negative);
 
     if (is_overflow)
     {
@@ -387,7 +356,10 @@ static int compareValuesWithOffset(const ColumnType * compared_column,
 template <typename ColumnType>
 void WindowTransform::advanceFrameStartRangeOffset()
 {
+    // See the comment for advanceFrameEndRangeOffset().
     const int direction = window_description.order_by[0].direction;
+    const bool preceding = window_description.frame.begin_preceding
+        == (direction > 0);
     const auto * reference_column = assert_cast<const ColumnType *>(
         inputAt(current_row)[order_by_indices[0]].get());
     for (; frame_start < partition_end; advanceRowNumber(frame_start))
@@ -399,7 +371,7 @@ void WindowTransform::advanceFrameStartRangeOffset()
         if (compareValuesWithOffset(compared_column, frame_start.row,
             reference_column, current_row.row,
             window_description.frame.begin_offset,
-            window_description.frame.begin_preceding)
+            preceding)
                 * direction >= 0)
         {
             frame_started = true;
@@ -410,26 +382,40 @@ void WindowTransform::advanceFrameStartRangeOffset()
     frame_started = partition_ended;
 }
 
+// Helper macros to dispatch on type of the ORDER BY column
+#define APPLY_FOR_ONE_TYPE(FUNCTION, TYPE) \
+else if (typeid_cast<const TYPE *>(column)) \
+{ \
+    FUNCTION<TYPE>(); \
+}
+
+#define APPLY_FOR_TYPES(FUNCTION) \
+if (false) /* NOLINT */ \
+{ \
+    /* Do nothing, a starter condition. */ \
+} \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int8>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt8>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int16>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt16>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int32>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt32>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int64>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt64>) \
+else \
+{ \
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, \
+        "The RANGE OFFSET frame for '{}' ORDER BY column is not implemented", \
+        demangle(typeid(*column).name())); \
+}
+
 void WindowTransform::advanceFrameStartRangeOffsetDispatch()
 {
     // Dispatch on the type of the ORDER BY column.
     assert(order_by_indices.size() == 1);
     const IColumn * column = inputAt(current_row)[order_by_indices[0]].get();
 
-    if (typeid_cast<const ColumnVector<UInt8> *>(column))
-    {
-        advanceFrameStartRangeOffset<ColumnVector<UInt8>>();
-    }
-    else if (typeid_cast<const ColumnVector<Int8> *>(column))
-    {
-        advanceFrameStartRangeOffset<ColumnVector<Int8>>();
-    }
-    else
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-            "The RANGE OFFSET frame start for '{}' ORDER BY column is not implemented",
-            demangle(typeid(*column).name()));
-    }
+    APPLY_FOR_TYPES(advanceFrameStartRangeOffset)
 }
 
 void WindowTransform::advanceFrameStart()
@@ -647,7 +633,11 @@ void WindowTransform::advanceFrameEndRowsOffset()
 template <typename ColumnType>
 void WindowTransform::advanceFrameEndRangeOffset()
 {
+    // PRECEDING/FOLLOWING change direction for DESC order.
+    // See CD 9075-2:201?(E) 7.14 <window clause> p. 429.
     const int direction = window_description.order_by[0].direction;
+    const bool preceding = window_description.frame.end_preceding
+        == (direction > 0);
     const auto * reference_column = assert_cast<const ColumnType *>(
         inputAt(current_row)[order_by_indices[0]].get());
     for (; frame_end < partition_end; advanceRowNumber(frame_end))
@@ -660,7 +650,7 @@ void WindowTransform::advanceFrameEndRangeOffset()
         if (compareValuesWithOffset(compared_column, frame_end.row,
             reference_column, current_row.row,
             window_description.frame.end_offset,
-            window_description.frame.end_preceding)
+            preceding)
                 * direction > 0)
         {
             frame_ended = true;
@@ -677,20 +667,7 @@ void WindowTransform::advanceFrameEndRangeOffsetDispatch()
     assert(order_by_indices.size() == 1);
     const IColumn * column = inputAt(current_row)[order_by_indices[0]].get();
 
-    if (typeid_cast<const ColumnVector<UInt8> *>(column))
-    {
-        advanceFrameEndRangeOffset<ColumnVector<UInt8>>();
-    }
-    else if (typeid_cast<const ColumnVector<Int8> *>(column))
-    {
-        advanceFrameEndRangeOffset<ColumnVector<Int8>>();
-    }
-    else
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-            "The RANGE OFFSET frame end for '{}' ORDER BY column is not implemented",
-            demangle(typeid(*column).name()));
-    }
+    APPLY_FOR_TYPES(advanceFrameEndRangeOffset)
 }
 
 void WindowTransform::advanceFrameEnd()
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index b8b3ae17830..4ce4503c3ca 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -771,3 +771,52 @@ order by x;
 125	124	127	4
 126	125	127	3
 127	126	127	2
+-- RANGE OFFSET ORDER BY DESC
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between 1 preceding and 2 following)
+order by x
+settings max_block_size = 1;
+0	0	1	2
+1	0	2	3
+2	0	3	4
+3	1	4	4
+4	2	5	4
+5	3	6	4
+6	4	7	4
+7	5	8	4
+8	6	9	4
+9	7	10	4
+10	8	10	3
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between 1 preceding and unbounded following)
+order by x
+settings max_block_size = 2;
+0	0	1	2
+1	0	2	3
+2	0	3	4
+3	0	4	5
+4	0	5	6
+5	0	6	7
+6	0	7	8
+7	0	8	9
+8	0	9	10
+9	0	10	11
+10	0	10	11
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between unbounded preceding and 2 following)
+order by x
+settings max_block_size = 3;
+0	0	10	11
+1	0	10	11
+2	0	10	11
+3	1	10	10
+4	2	10	9
+5	3	10	8
+6	4	10	7
+7	5	10	6
+8	6	10	5
+9	7	10	4
+10	8	10	3
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index cf2dd28a54c..8ac07e2df2a 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -241,3 +241,22 @@ from (
 )
 window w as (order by x range between 1 preceding and 2 following)
 order by x;
+
+-- RANGE OFFSET ORDER BY DESC
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between 1 preceding and 2 following)
+order by x
+settings max_block_size = 1;
+
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between 1 preceding and unbounded following)
+order by x
+settings max_block_size = 2;
+
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between unbounded preceding and 2 following)
+order by x
+settings max_block_size = 3;

From 7b02aa0c591f4edadbb9fb4bedba8a8bb2172582 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Tue, 9 Feb 2021 15:02:28 +0300
Subject: [PATCH 0872/1238] Update README.md

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index 53778c79bef..1c6a021c00c 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,6 @@ ClickHouse® is an open-source column-oriented database management system that a
 * [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-ly9m4w1x-6j7x5Ts_pQZqrctAbRZ3cg) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
 * [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
 * [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
-* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
 * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
 * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
 

From 709e74d7ff8cfcb40b840f2540ee3d93f2fd4f2f Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Tue, 9 Feb 2021 15:14:30 +0300
Subject: [PATCH 0873/1238] Update insert-into.md

---
 docs/ru/sql-reference/statements/insert-into.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md
index 61dc1170883..0ad85ed0166 100644
--- a/docs/ru/sql-reference/statements/insert-into.md
+++ b/docs/ru/sql-reference/statements/insert-into.md
@@ -63,8 +63,6 @@ SELECT * FROM insert_select_testtable
 -   Значения, вычисляемые из `DEFAULT` выражений, указанных в определении таблицы.
 -   Нули и пустые строки, если `DEFAULT` не определены.
 
-Если [strict_insert_defaults=1](../../operations/settings/settings.md), то столбцы, для которых не определены `DEFAULT`, необходимо перечислить в запросе.
-
 В INSERT можно передавать данные любого [формата](../../interfaces/formats.md#formats), который поддерживает ClickHouse. Для этого формат необходимо указать в запросе в явном виде:
 
 ``` sql

From 5b2e0522597b045ac9f6ac772d18c04a5742bf67 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Tue, 9 Feb 2021 15:14:53 +0300
Subject: [PATCH 0874/1238] Update insert-into.md

---
 docs/en/sql-reference/statements/insert-into.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md
index facc1b19dad..c517a515ab7 100644
--- a/docs/en/sql-reference/statements/insert-into.md
+++ b/docs/en/sql-reference/statements/insert-into.md
@@ -62,8 +62,6 @@ If a list of columns doesn't include all existing columns, the rest of the colum
 -   The values calculated from the `DEFAULT` expressions specified in the table definition.
 -   Zeros and empty strings, if `DEFAULT` expressions are not defined.
 
-If [strict\_insert\_defaults=1](../../operations/settings/settings.md), columns that do not have `DEFAULT` defined must be listed in the query.
-
 Data can be passed to the INSERT in any [format](../../interfaces/formats.md#formats) supported by ClickHouse. The format must be specified explicitly in the query:
 
 ``` sql

From d543632763aeac6db4a1b08c444aa08838bb84fd Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Tue, 9 Feb 2021 17:14:09 +0300
Subject: [PATCH 0875/1238] Update adopters.md

---
 docs/en/introduction/adopters.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md
index c7230f2f080..454d856f779 100644
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@@ -81,6 +81,7 @@ toc_title: Adopters
 | <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) |
 | <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
 | <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
+| <a href="https://prana-system.com/en/" class="favicon">PRANA</a> | Industrial predictive analytics | Main product | — | — | [News (russian), Feb 2021](https://habr.com/en/news/t/541392/) |
 | <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
 | <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
 | <a href="https://www.rbinternational.com/" class="favicon">Raiffeisenbank</a> | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) |

From 5e0095c70b6f8e68c512e7a8c9d918364a9b24da Mon Sep 17 00:00:00 2001
From: feng lv <fenglv15@mails.ucas.ac.cn>
Date: Tue, 9 Feb 2021 14:17:48 +0000
Subject: [PATCH 0876/1238] fix a problem in
 ArithmeticOperationsInAgrFuncOptimize

---
 .../ArithmeticOperationsInAgrFuncOptimize.cpp       | 13 +++++++++----
 ...te_aggregate_function_case_insensitive.reference |  6 ++++++
 ..._rewrite_aggregate_function_case_insensitive.sql |  6 ++++++
 3 files changed, 21 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.reference
 create mode 100644 tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.sql

diff --git a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp
index c7d32ba2721..5c071ed9134 100644
--- a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp
+++ b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp
@@ -7,6 +7,8 @@
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Interpreters/ArithmeticOperationsInAgrFuncOptimize.h>
 
+#include <Poco/String.h>
+
 namespace DB
 {
 
@@ -95,9 +97,12 @@ ASTPtr tryExchangeFunctions(const ASTFunction & func)
            {"max", {"multiply", "divide", "plus", "minus"}},
            {"avg", {"multiply", "divide", "plus", "minus"}}};
 
+    /// Aggregate functions[sum|min|max|avg] is case-insensitive, so we use lower cases name
+    auto lower_name = Poco::toLower(func.name);
+
     const ASTFunction * child_func = getInternalFunction(func);
-    if (!child_func || !child_func->arguments || child_func->arguments->children.size() != 2 ||
-        !supported.count(func.name) || !supported.find(func.name)->second.count(child_func->name))
+    if (!child_func || !child_func->arguments || child_func->arguments->children.size() != 2 || !supported.count(lower_name)
+        || !supported.find(lower_name)->second.count(child_func->name))
         return {};
 
     /// Cannot rewrite function with alias cause alias could become undefined
@@ -116,12 +121,12 @@ ASTPtr tryExchangeFunctions(const ASTFunction & func)
         if (child_func->name == "divide")
             return {};
 
-        const String & new_name = changeNameIfNeeded(func.name, child_func->name, *first_literal);
+        const String & new_name = changeNameIfNeeded(lower_name, child_func->name, *first_literal);
         optimized_ast = exchangeExtractFirstArgument(new_name, *child_func);
     }
     else if (second_literal) /// second or both are consts
     {
-        const String & new_name = changeNameIfNeeded(func.name, child_func->name, *second_literal);
+        const String & new_name = changeNameIfNeeded(lower_name, child_func->name, *second_literal);
         optimized_ast = exchangeExtractSecondArgument(new_name, *child_func);
     }
 
diff --git a/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.reference b/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.reference
new file mode 100644
index 00000000000..e92a057f8c3
--- /dev/null
+++ b/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.reference
@@ -0,0 +1,6 @@
+22.5
+SELECT sum(number) / 2
+FROM numbers(10)
+22.5
+SELECT sum(number) / 2
+FROM numbers(10)
diff --git a/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.sql b/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.sql
new file mode 100644
index 00000000000..1c5271b4717
--- /dev/null
+++ b/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.sql
@@ -0,0 +1,6 @@
+SELECT sum(number / 2) FROM numbers(10);
+EXPLAIN SYNTAX SELECT sum(number / 2) FROM numbers(10);
+
+
+SELECT Sum(number / 2) FROM numbers(10);
+EXPLAIN SYNTAX SELECT Sum(number / 2) FROM numbers(10);

From c18749a704cf2982d9e3eba29930c1c6ccf9f8c3 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 9 Feb 2021 17:44:04 +0300
Subject: [PATCH 0877/1238] fix a bug and add some tests

---
 docker/test/performance-comparison/perf.py    |  3 ++-
 src/Parsers/ExpressionElementParsers.cpp      |  2 --
 src/Processors/Transforms/WindowTransform.cpp |  1 +
 tests/performance/window_functions.xml        | 27 +++++++++++++++++++
 .../01591_window_functions.reference          | 16 +++++++++++
 .../0_stateless/01591_window_functions.sql    |  6 +++++
 6 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py
index 48479161ef9..f1c5df146aa 100755
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@@ -44,6 +44,7 @@ parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated l
 parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
 parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
 parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
+parser.add_argument('--max-query-seconds', type=int, default=10, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
 parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
 parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
 parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
@@ -323,7 +324,7 @@ for query_index in queries_to_run:
             server_seconds += elapsed
             print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
 
-            if elapsed > 10:
+            if elapsed > args.max_query_seconds:
                 # Stop processing pathologically slow queries, to avoid timing out
                 # the entire test task. This shouldn't really happen, so we don't
                 # need much handling for this case and can just exit.
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 9622d6a273b..e7cd85798b9 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -667,8 +667,6 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
                     throw Exception(ErrorCodes::NOT_IMPLEMENTED,
                         "Frame end UNBOUNDED PRECEDING is not implemented");
                 }
-
-                node->frame.end_offset = -node->frame.end_offset;
             }
             else if (keyword_following.ignore(pos, expected))
             {
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 944c8047be6..8be65c6451c 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -386,6 +386,7 @@ void WindowTransform::advanceFrameStartRangeOffset()
 #define APPLY_FOR_ONE_TYPE(FUNCTION, TYPE) \
 else if (typeid_cast<const TYPE *>(column)) \
 { \
+    /* NOLINT clang-tidy you're dumb, I can't put FUNCTION in braces here. */ \
     FUNCTION<TYPE>(); \
 }
 
diff --git a/tests/performance/window_functions.xml b/tests/performance/window_functions.xml
index 93983e9b1bf..74df2b64a3b 100644
--- a/tests/performance/window_functions.xml
+++ b/tests/performance/window_functions.xml
@@ -59,4 +59,31 @@
         format Null
     ]]></query>
 
+    <!--
+        Rows from the hottest 21-second intervals, to test the RANGE OFFSET frame.
+    -->
+    <query>
+        SELECT * FROM
+            (SELECT EventTime,
+                count(*) OVER (ORDER BY EventTime ASC
+                    RANGE BETWEEN 10 PRECEDING AND 10 FOLLOWING) AS c
+            FROM hits_10m_single)
+        FORMAT Null
+    </query>
+
+    <!--
+        This is kind of the same, except the following frame boundary is not
+        inclusive. Should be much faster, because we don't have to reset the
+        aggregation state. After we support subtraction of aggregate state, the
+        above query should become closer in performance to this one.
+    -->
+    <query>
+        select * from
+            (select EventTime,
+                count(*) over (partition by
+                    floor((toUInt32(EventTime) + 10 + 1) / 20)) as c
+            from hits_10m_single)
+        format Null
+    </query>
+
 </test>
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 4ce4503c3ca..9506d3ce75e 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -820,3 +820,19 @@ settings max_block_size = 3;
 8	6	10	5
 9	7	10	4
 10	8	10	3
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between unbounded preceding and 2 preceding)
+order by x
+settings max_block_size = 4;
+0	2	10	9
+1	3	10	8
+2	4	10	7
+3	5	10	6
+4	6	10	5
+5	7	10	4
+6	8	10	3
+7	9	10	2
+8	10	10	1
+9	0	0	0
+10	0	0	0
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 8ac07e2df2a..49f84867182 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -260,3 +260,9 @@ select x, min(x) over w, max(x) over w, count(x) over w from (
 window w as (order by x desc range between unbounded preceding and 2 following)
 order by x
 settings max_block_size = 3;
+
+select x, min(x) over w, max(x) over w, count(x) over w from (
+    select toUInt8(number) x from numbers(11)) t
+window w as (order by x desc range between unbounded preceding and 2 preceding)
+order by x
+settings max_block_size = 4;

From b130fbfd788fc013113e158225c29ff65594d410 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 9 Feb 2021 17:47:18 +0300
Subject: [PATCH 0878/1238] Add coordination settings

---
 src/Coordination/NuKeeperServer.cpp           | 40 +++++++++++-------
 src/Coordination/NuKeeperServer.h             | 12 ++++--
 src/Coordination/NuKeeperStateMachine.cpp     | 27 ++++++------
 src/Coordination/NuKeeperStateMachine.h       |  7 +++-
 .../NuKeeperStorageDispatcher.cpp             | 22 +++++-----
 src/Coordination/NuKeeperStorageDispatcher.h  |  4 +-
 tests/config/config.d/test_keeper_port.xml    |  8 +++-
 .../configs/enable_test_keeper.xml            |  8 +++-
 .../configs/enable_test_keeper1.xml           |  8 +++-
 .../configs/enable_test_keeper2.xml           |  8 +++-
 .../configs/enable_test_keeper3.xml           |  8 +++-
 .../test_testkeeper_multinode/test.py         | 42 ++++++++++++++++++-
 12 files changed, 139 insertions(+), 55 deletions(-)

diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index cbd52b98377..40508b08761 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -19,12 +19,16 @@ namespace ErrorCodes
     extern const int RAFT_ERROR;
 }
 
-NuKeeperServer::NuKeeperServer(int server_id_, const std::string & hostname_, int port_, ResponsesQueue & responses_queue_)
+NuKeeperServer::NuKeeperServer(
+    int server_id_, const std::string & hostname_, int port_,
+    const CoordinationSettingsPtr & coordination_settings_,
+    ResponsesQueue & responses_queue_)
     : server_id(server_id_)
     , hostname(hostname_)
     , port(port_)
     , endpoint(hostname + ":" + std::to_string(port))
-    , state_machine(nuraft::cs_new<NuKeeperStateMachine>(responses_queue_))
+    , coordination_settings(coordination_settings_)
+    , state_machine(nuraft::cs_new<NuKeeperStateMachine>(responses_queue_, coordination_settings))
     , state_manager(nuraft::cs_new<InMemoryStateManager>(server_id, endpoint))
     , responses_queue(responses_queue_)
 {
@@ -43,17 +47,18 @@ void NuKeeperServer::addServer(int server_id_, const std::string & server_uri_,
 }
 
 
-void NuKeeperServer::startup(int64_t operation_timeout_ms)
+void NuKeeperServer::startup()
 {
     nuraft::raft_params params;
-    params.heart_beat_interval_ = 500;
-    params.election_timeout_lower_bound_ = 1000;
-    params.election_timeout_upper_bound_ = 2000;
-    params.reserved_log_items_ = 5000;
-    params.snapshot_distance_ = 5000;
-    params.client_req_timeout_ = operation_timeout_ms;
-    params.auto_forwarding_ = true;
-    params.auto_forwarding_req_timeout_ = operation_timeout_ms * 2;
+    params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds();
+    params.election_timeout_lower_bound_ = coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds();
+    params.election_timeout_upper_bound_ = coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds();
+    params.reserved_log_items_ = coordination_settings->reserved_log_items;
+    params.snapshot_distance_ = coordination_settings->snapshot_distance;
+    params.client_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds();
+    params.auto_forwarding_ = coordination_settings->auto_forwarding;
+    params.auto_forwarding_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds() * 2;
+
     params.return_method_ = nuraft::raft_params::blocking;
 
     nuraft::asio_service::options asio_opts{};
@@ -65,6 +70,7 @@ void NuKeeperServer::startup(int64_t operation_timeout_ms)
     if (!raft_instance)
         throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
 
+    /// FIXME
     static constexpr auto MAX_RETRY = 100;
     for (size_t i = 0; i < MAX_RETRY; ++i)
     {
@@ -80,7 +86,7 @@ void NuKeeperServer::startup(int64_t operation_timeout_ms)
 void NuKeeperServer::shutdown()
 {
     state_machine->shutdownStorage();
-    if (!launcher.shutdown(5))
+    if (!launcher.shutdown(coordination_settings->shutdown_timeout.totalSeconds()))
         LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Failed to shutdown RAFT server in {} seconds", 5);
 }
 
@@ -173,6 +179,7 @@ bool NuKeeperServer::isLeaderAlive() const
 
 bool NuKeeperServer::waitForServer(int32_t id) const
 {
+    /// FIXME
     for (size_t i = 0; i < 50; ++i)
     {
         if (raft_instance->get_srv_config(id) != nullptr)
@@ -180,17 +187,22 @@ bool NuKeeperServer::waitForServer(int32_t id) const
         LOG_DEBUG(&Poco::Logger::get("NuRaftInit"), "Waiting for server {} to join the cluster", id);
         std::this_thread::sleep_for(std::chrono::milliseconds(100));
     }
+
+    LOG_DEBUG(&Poco::Logger::get("NuRaftInit"), "Cannot wait for server {}", id);
     return false;
 }
 
-void NuKeeperServer::waitForServers(const std::vector<int32_t> & ids) const
+bool NuKeeperServer::waitForServers(const std::vector<int32_t> & ids) const
 {
     for (int32_t id : ids)
-        waitForServer(id);
+        if (!waitForServer(id))
+            return false;
+    return true;
 }
 
 void NuKeeperServer::waitForCatchUp() const
 {
+    /// FIXME
     while (raft_instance->is_catching_up() || raft_instance->is_receiving_snapshot() || raft_instance->is_leader())
     {
         LOG_DEBUG(&Poco::Logger::get("NuRaftInit"), "Waiting current RAFT instance to catch up");
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index 6fa2ae44ce2..bb5870fe89a 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -5,6 +5,7 @@
 #include <Coordination/InMemoryStateManager.h>
 #include <Coordination/NuKeeperStateMachine.h>
 #include <Coordination/NuKeeperStorage.h>
+#include <Coordination/CoordinationSettings.h>
 #include <unordered_map>
 
 namespace DB
@@ -21,6 +22,8 @@ private:
 
     std::string endpoint;
 
+    CoordinationSettingsPtr coordination_settings;
+
     nuraft::ptr<NuKeeperStateMachine> state_machine;
 
     nuraft::ptr<nuraft::state_mgr> state_manager;
@@ -34,9 +37,12 @@ private:
     ResponsesQueue & responses_queue;
 
 public:
-    NuKeeperServer(int server_id_, const std::string & hostname_, int port_, ResponsesQueue & responses_queue_);
+    NuKeeperServer(
+        int server_id_, const std::string & hostname_, int port_,
+        const CoordinationSettingsPtr & coordination_settings_,
+        ResponsesQueue & responses_queue_);
 
-    void startup(int64_t operation_timeout_ms);
+    void startup();
 
     void putRequest(const NuKeeperStorage::RequestForSession & request);
 
@@ -51,7 +57,7 @@ public:
     bool isLeaderAlive() const;
 
     bool waitForServer(int32_t server_id) const;
-    void waitForServers(const std::vector<int32_t> & ids) const;
+    bool waitForServers(const std::vector<int32_t> & ids) const;
     void waitForCatchUp() const;
 
     void shutdown();
diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index 9be8e889fa3..d282f57ce73 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -8,8 +8,6 @@
 namespace DB
 {
 
-static constexpr int MAX_SNAPSHOTS = 3;
-
 NuKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
 {
     ReadBufferFromNuraftBuffer buffer(data);
@@ -43,8 +41,9 @@ nuraft::ptr<nuraft::buffer> writeResponses(NuKeeperStorage::ResponsesForSessions
 }
 
 
-NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, int64_t tick_time)
-    : storage(tick_time)
+NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, const CoordinationSettingsPtr & coordination_settings_)
+    : coordination_settings(coordination_settings_)
+    , storage(coordination_settings->dead_session_check_period_ms.totalMilliseconds())
     , responses_queue(responses_queue_)
     , last_committed_idx(0)
     , log(&Poco::Logger::get("NuRaftStateMachine"))
@@ -129,7 +128,7 @@ NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::readSnapshot(nura
     NuKeeperStorageSerializer serializer;
 
     ReadBufferFromNuraftBuffer reader(in);
-    NuKeeperStorage new_storage(500 /*FIXME*/);
+    NuKeeperStorage new_storage(coordination_settings->dead_session_check_period_ms.totalMilliseconds());
     serializer.deserialize(new_storage, reader);
     return std::make_shared<StorageSnapshot>(ss, new_storage);
 }
@@ -153,15 +152,19 @@ void NuKeeperStateMachine::create_snapshot(
     {
         std::lock_guard<std::mutex> lock(snapshots_lock);
         snapshots[s.get_last_log_idx()] = snapshot;
-        int num = snapshots.size();
-        auto entry = snapshots.begin();
-
-        for (int i = 0; i < num - MAX_SNAPSHOTS; ++i)
+        size_t num = snapshots.size();
+        if (num > coordination_settings->max_stored_snapshots)
         {
-            if (entry == snapshots.end())
-                break;
-            entry = snapshots.erase(entry);
+            auto entry = snapshots.begin();
+
+            for (size_t i = 0; i < num - coordination_settings->max_stored_snapshots; ++i)
+            {
+                if (entry == snapshots.end())
+                    break;
+                entry = snapshots.erase(entry);
+            }
         }
+
     }
     nuraft::ptr<std::exception> except(nullptr);
     bool ret = true;
diff --git a/src/Coordination/NuKeeperStateMachine.h b/src/Coordination/NuKeeperStateMachine.h
index 5f3065ee144..87748db20a5 100644
--- a/src/Coordination/NuKeeperStateMachine.h
+++ b/src/Coordination/NuKeeperStateMachine.h
@@ -4,6 +4,7 @@
 #include <libnuraft/nuraft.hxx> // Y_IGNORE
 #include <common/logger_useful.h>
 #include <Coordination/ThreadSafeQueue.h>
+#include <Coordination/CoordinationSettings.h>
 
 namespace DB
 {
@@ -13,7 +14,7 @@ using ResponsesQueue = ThreadSafeQueue<NuKeeperStorage::ResponseForSession>;
 class NuKeeperStateMachine : public nuraft::state_machine
 {
 public:
-    NuKeeperStateMachine(ResponsesQueue & responses_queue_, int64_t tick_time = 500);
+    NuKeeperStateMachine(ResponsesQueue & responses_queue_, const CoordinationSettingsPtr & coordination_settings_);
 
     nuraft::ptr<nuraft::buffer> pre_commit(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; }
 
@@ -72,10 +73,12 @@ private:
 
     StorageSnapshotPtr createSnapshotInternal(nuraft::snapshot & s);
 
-    static StorageSnapshotPtr readSnapshot(nuraft::snapshot & s, nuraft::buffer & in);
+    StorageSnapshotPtr readSnapshot(nuraft::snapshot & s, nuraft::buffer & in);
 
     static void writeSnapshot(const StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out);
 
+    CoordinationSettingsPtr coordination_settings;
+
     NuKeeperStorage storage;
 
     ResponsesQueue & responses_queue;
diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
index 86bdae9cc37..914985ee534 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -12,7 +12,8 @@ namespace ErrorCodes
 }
 
 NuKeeperStorageDispatcher::NuKeeperStorageDispatcher()
-    : log(&Poco::Logger::get("NuKeeperDispatcher"))
+    : coordination_settings(std::make_shared<CoordinationSettings>())
+    , log(&Poco::Logger::get("NuKeeperDispatcher"))
 {
 }
 
@@ -23,7 +24,7 @@ void NuKeeperStorageDispatcher::requestThread()
     {
         NuKeeperStorage::RequestForSession request;
 
-        UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds());
+        UInt64 max_wait = UInt64(coordination_settings->operation_timeout_ms.totalMilliseconds());
 
         if (requests_queue.tryPop(request, max_wait))
         {
@@ -49,7 +50,7 @@ void NuKeeperStorageDispatcher::responseThread()
     {
         NuKeeperStorage::ResponseForSession response_for_session;
 
-        UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds());
+        UInt64 max_wait = UInt64(coordination_settings->operation_timeout_ms.totalMilliseconds());
 
         if (responses_queue.tryPop(response_for_session, max_wait))
         {
@@ -97,7 +98,7 @@ bool NuKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestP
     /// Put close requests without timeouts
     if (request->getOpNum() == Coordination::OpNum::Close)
         requests_queue.push(std::move(request_info));
-    else if (!requests_queue.tryPush(std::move(request_info), operation_timeout.totalMilliseconds()))
+    else if (!requests_queue.tryPush(std::move(request_info), coordination_settings->operation_timeout_ms.totalMilliseconds()))
         throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
     return true;
 }
@@ -134,8 +135,8 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
     std::string myhostname;
     int myport;
     int32_t my_priority = 1;
+    coordination_settings->loadFromConfig("test_keeper_server.coordination_settings", config);
 
-    operation_timeout = Poco::Timespan(0, config.getUInt("test_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000);
     Poco::Util::AbstractConfiguration::Keys keys;
     config.keys("test_keeper_server.raft_configuration", keys);
     bool my_can_become_leader = true;
@@ -163,10 +164,10 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
         ids.push_back(server_id);
     }
 
-    server = std::make_unique<NuKeeperServer>(myid, myhostname, myport, responses_queue);
+    server = std::make_unique<NuKeeperServer>(myid, myhostname, myport, coordination_settings, responses_queue);
     try
     {
-        server->startup(operation_timeout.totalMilliseconds());
+        server->startup();
         if (shouldBuildQuorum(myid, my_priority, my_can_become_leader, server_configs))
         {
             for (const auto & [id, hostname, port, can_become_leader, priority] : server_configs)
@@ -183,8 +184,8 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
         }
         else
         {
-            LOG_DEBUG(log, "Waiting for {} servers to build cluster", ids.size());
-            server->waitForServers(ids);
+            while (!server->waitForServers(ids))
+                LOG_DEBUG(log, "Waiting for {} servers to build cluster", ids.size());
             server->waitForCatchUp();
         }
     }
@@ -283,8 +284,7 @@ void NuKeeperStorageDispatcher::sessionCleanerTask()
             tryLogCurrentException(__PRETTY_FUNCTION__);
         }
 
-        /*FIXME*/
-        std::this_thread::sleep_for(std::chrono::milliseconds(500));
+        std::this_thread::sleep_for(std::chrono::milliseconds(coordination_settings->dead_session_check_period_ms.totalMilliseconds()));
     }
 }
 
diff --git a/src/Coordination/NuKeeperStorageDispatcher.h b/src/Coordination/NuKeeperStorageDispatcher.h
index 6820247a5af..62144b92a7a 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.h
+++ b/src/Coordination/NuKeeperStorageDispatcher.h
@@ -14,6 +14,7 @@
 #include <common/logger_useful.h>
 #include <functional>
 #include <Coordination/NuKeeperServer.h>
+#include <Coordination/CoordinationSettings.h>
 
 
 namespace DB
@@ -25,10 +26,9 @@ class NuKeeperStorageDispatcher
 {
 
 private:
-    Poco::Timespan operation_timeout{0, Coordination::DEFAULT_OPERATION_TIMEOUT_MS * 1000};
-
     std::mutex push_request_mutex;
 
+    CoordinationSettingsPtr coordination_settings;
     using RequestsQueue = ConcurrentBoundedQueue<NuKeeperStorage::RequestForSession>;
     RequestsQueue requests_queue{1};
     ResponsesQueue responses_queue;
diff --git a/tests/config/config.d/test_keeper_port.xml b/tests/config/config.d/test_keeper_port.xml
index fff60d749f6..6ca00a972d4 100644
--- a/tests/config/config.d/test_keeper_port.xml
+++ b/tests/config/config.d/test_keeper_port.xml
@@ -1,9 +1,13 @@
 <yandex>
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
-        <operation_timeout_ms>10000</operation_timeout_ms>
-        <session_timeout_ms>30000</session_timeout_ms>
         <server_id>1</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>10000</operation_timeout_ms>
+            <session_timeout_ms>30000</session_timeout_ms>
+        </coordination_settings>
+
         <raft_configuration>
             <server>
                 <id>1</id>
diff --git a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
index fff60d749f6..00a593051f9 100644
--- a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
+++ b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
@@ -1,9 +1,13 @@
 <yandex>
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
-        <operation_timeout_ms>10000</operation_timeout_ms>
-        <session_timeout_ms>30000</session_timeout_ms>
         <server_id>1</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+        </coordination_settings>
+
         <raft_configuration>
             <server>
                 <id>1</id>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
index e1b6da40338..75065bb2a7a 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
@@ -1,9 +1,13 @@
 <yandex>
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
-        <operation_timeout_ms>5000</operation_timeout_ms>
-        <session_timeout_ms>10000</session_timeout_ms>
         <server_id>1</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+        </coordination_settings>
+
         <raft_configuration>
             <server>
                 <id>1</id>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
index 7622aa164da..18937dd4910 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
@@ -1,9 +1,13 @@
 <yandex>
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
-        <operation_timeout_ms>5000</operation_timeout_ms>
-        <session_timeout_ms>10000</session_timeout_ms>
         <server_id>2</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+        </coordination_settings>
+
         <raft_configuration>
             <server>
                 <id>1</id>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
index 1edbfa7271e..5330367cd89 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
@@ -1,9 +1,13 @@
 <yandex>
     <test_keeper_server>
         <tcp_port>9181</tcp_port>
-        <operation_timeout_ms>5000</operation_timeout_ms>
-        <session_timeout_ms>10000</session_timeout_ms>
         <server_id>3</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+        </coordination_settings>
+
         <raft_configuration>
             <server>
                 <id>1</id>
diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index ff001fb75ee..05879613ba6 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -52,6 +52,47 @@ def get_fake_zk(nodename):
     _fake_zk_instance.start()
     return _fake_zk_instance
 
+def test_read_write_multinode(started_cluster):
+    try:
+        node1_zk = get_fake_zk("node1")
+        node2_zk = get_fake_zk("node2")
+        node3_zk = get_fake_zk("node3")
+
+        node1_zk.create("/test_read_write_multinode_node1", b"somedata1")
+        node2_zk.create("/test_read_write_multinode_node2", b"somedata2")
+        node3_zk.create("/test_read_write_multinode_node3", b"somedata3")
+
+        # stale reads are allowed
+        while node1_zk.exists("/test_read_write_multinode_node2") is None:
+            time.sleep(0.1)
+
+        while node1_zk.exists("/test_read_write_multinode_node3") is None:
+            time.sleep(0.1)
+
+        while node2_zk.exists("/test_read_write_multinode_node3") is None:
+            time.sleep(0.1)
+
+        assert node3_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1"
+        assert node2_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1"
+        assert node1_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1"
+
+        assert node3_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2"
+        assert node2_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2"
+        assert node1_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2"
+
+        assert node3_zk.get("/test_read_write_multinode_node3")[0] == b"somedata3"
+        assert node2_zk.get("/test_read_write_multinode_node3")[0] == b"somedata3"
+        assert node1_zk.get("/test_read_write_multinode_node3")[0] == b"somedata3"
+
+    finally:
+        try:
+            for zk_conn in [node1_zk, node2_zk, node3_zk]:
+                zk_conn.stop()
+                zk_conn.close()
+        except:
+            pass
+
+
 def test_watch_on_follower(started_cluster):
     try:
         node1_zk = get_fake_zk("node1")
@@ -105,7 +146,6 @@ def test_watch_on_follower(started_cluster):
             pass
 
 
-
 # in extremely rare case it can take more than 5 minutes in debug build with sanitizer
 @pytest.mark.timeout(600)
 def test_blocade_leader(started_cluster):

From 3874effea16b4140227efa6e11fe6dc34024924f Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Tue, 9 Feb 2021 10:09:38 -0500
Subject: [PATCH 0879/1238] Fixing rendering issues and links.

---
 .../external-authenticators/index.md          |  2 +-
 .../external-authenticators/ldap.md           | 74 +++++++++----------
 2 files changed, 37 insertions(+), 39 deletions(-)

diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md
index fb8483fa341..95f80f192f5 100644
--- a/docs/en/operations/external-authenticators/index.md
+++ b/docs/en/operations/external-authenticators/index.md
@@ -10,4 +10,4 @@ ClickHouse supports authenticating and managing users using external services.
 
 The following external authenticators and directories are supported:
 
-- [LDAP](#external-authenticators-ldap) [Authenticator](#ldap-external-authenticator) and [Directory](#ldap-external-user-directory)
+- [LDAP](./ldap.md#external-authenticators-ldap) [Authenticator](./ldap.md#ldap-external-authenticator) and [Directory](./ldap.md#ldap-external-user-directory)
diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md
index 03be357a12a..36a13227852 100644
--- a/docs/en/operations/external-authenticators/ldap.md
+++ b/docs/en/operations/external-authenticators/ldap.md
@@ -5,8 +5,7 @@ LDAP server can be used to authenticate ClickHouse users. There are two differen
 - use LDAP as an external authenticator for existing users, which are defined in `users.xml` or in local access control paths
 - use LDAP as an external user directory and allow locally undefined users to be authenticated if they exist on the LDAP server
 
-For both of these approaches, an internally named LDAP server must be defined in the ClickHouse config 
-so that other parts of config are able to refer to it.
+For both of these approaches, an internally named LDAP server must be defined in the ClickHouse config so that other parts of config are able to refer to it.
 
 ## LDAP Server Definition {#ldap-server-definition}
 
@@ -34,27 +33,27 @@ To define LDAP server you must add `ldap_servers` section to the `config.xml`. F
 </yandex>
 ```
 
-Note, that you can define multiple LDAP servers inside `ldap_servers` section using distinct names.
+Note, that you can define multiple LDAP servers inside the `ldap_servers` section using distinct names.
 
 Parameters:
 
 - `host` - LDAP server hostname or IP, this parameter is mandatory and cannot be empty.
 - `port` - LDAP server port, default is `636` if `enable_tls` is set to `true`, `389` otherwise.
 - `bind_dn` - template used to construct the DN to bind to.
-  - The resulting DN will be constructed by replacing all `{user_name}` substrings of the
-    template with the actual user name during each authentication attempt.
+    - The resulting DN will be constructed by replacing all `{user_name}` substrings of the
+      template with the actual user name during each authentication attempt.
 - `verification_cooldown` - a period of time, in seconds, after a successful bind attempt,
   during which the user will be assumed to be successfully authenticated for all consecutive
   requests without contacting the LDAP server.
-  - Specify `0` (the default) to disable caching and force contacting the LDAP server for each authentication request.
+    - Specify `0` (the default) to disable caching and force contacting the LDAP server for each authentication request.
 - `enable_tls` - flag to trigger use of secure connection to the LDAP server.
-  - Specify `no` for plain text `ldap://` protocol (not recommended).
-  - Specify `yes` for LDAP over SSL/TLS `ldaps://` protocol (recommended, the default).
-  - Specify `starttls` for legacy StartTLS protocol (plain text `ldap://` protocol, upgraded to TLS).
+    - Specify `no` for plain text `ldap://` protocol (not recommended).
+    - Specify `yes` for LDAP over SSL/TLS `ldaps://` protocol (recommended, the default).
+    - Specify `starttls` for legacy StartTLS protocol (plain text `ldap://` protocol, upgraded to TLS).
 - `tls_minimum_protocol_version` - the minimum protocol version of SSL/TLS.
-  - Accepted values are: `ssl2`, `ssl3`, `tls1.0`, `tls1.1`, `tls1.2` (the default).
+    - Accepted values are: `ssl2`, `ssl3`, `tls1.0`, `tls1.1`, `tls1.2` (the default).
 - `tls_require_cert` - SSL/TLS peer certificate verification behavior.
-  - Accepted values are: `never`, `allow`, `try`, `demand` (the default).
+    - Accepted values are: `never`, `allow`, `try`, `demand` (the default).
 - `tls_cert_file` - path to certificate file.
 - `tls_key_file` - path to certificate key file.
 - `tls_ca_cert_file` - path to CA certificate file.
@@ -65,8 +64,7 @@ Parameters:
 
 A remote LDAP server can be used as a method for verifying passwords for locally defined users (users defined in `users.xml` or in local access control paths). In order to achieve this, specify previously defined LDAP server name instead of `password` or similar sections in the user definition.
 
-At each login attempt, ClickHouse will try to "bind" to the specified DN defined by the `bind_dn` parameter
-in the [LDAP server definition](#ldap-server-definition) using the provided credentials, and if successful, the user will be considered authenticated. This is often called a "simple bind" method.
+At each login attempt, ClickHouse will try to "bind" to the specified DN defined by the `bind_dn` parameter in the [LDAP server definition](#ldap-server-definition) using the provided credentials, and if successful, the user will be considered authenticated. This is often called a "simple bind" method.
 
 For example,
 
@@ -87,7 +85,7 @@ For example,
 
 Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously.
 
-When SQL-driven [Access Control and Account Management](#access-control) is enabled in ClickHouse, users that are identified by LDAP servers can also be created using the [CRATE USER](#create-user-statement) statement.
+When SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled in ClickHouse, users that are authenticated by LDAP servers can also be created using the [CRATE USER](../../sql-reference/statements/create/user.md#create-user-statement) statement.
 
 
 ```sql
@@ -96,9 +94,9 @@ CREATE USER my_user IDENTIFIED WITH ldap_server BY 'my_ldap_server'
 
 ## LDAP Exernal User Directory {#ldap-external-user-directory}
 
-In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. In order to achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section in of the `config.xml` file.
+In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. In order to achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section of the `config.xml` file.
 
-At each login attempt, ClickHouse will try to find the user definition locally and authenticate it as usual, but if the user is not defined, ClickHouse will assume it exists in the external LDAP directory, and will try to "bind" to the specified DN at the LDAP server using the provided credentials, and if successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](#access-control) is enabled in ClickHouse and roles are created using the [CREATE ROLE](#create-role-statement) statement.
+At each login attempt, ClickHouse will try to find the user definition locally and authenticate it as usual, but if the user is not defined, ClickHouse will assume it exists in the external LDAP directory, and will try to "bind" to the specified DN at the LDAP server using the provided credentials. If successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then be assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled and roles are created using the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement.
 
 Example (goes into `config.xml`):
 
@@ -130,29 +128,29 @@ defined LDAP server that is configured in the `config.xml` (see [LDAP Server Def
 
 Parameters:
 
-- `server` - one of LDAP server names defined in `ldap_servers` config section above.
+- `server` - one of LDAP server names defined in the `ldap_servers` config section above.
   This parameter is mandatory and cannot be empty.
 - `roles` - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server.
-  - If no roles are specified here or assigned during role mapping (below), user will not be able
-    to perform any actions after authentication.
+    - If no roles are specified here or assigned during role mapping (below), user will not be able
+      to perform any actions after authentication.
 - `role_mapping` - section with LDAP search parameters and mapping rules.
-  - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter`
-    and the name of the logged in user. For each entry found during that search, the value of the specified
-    attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed,
-    and the rest of the value becomes the name of a local role defined in ClickHouse,
-    which is expected to be created beforehand by the [CREATE ROLE](#create-role-statement) statement.
-  - There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied.
-    - `base_dn` - template used to construct the base DN for the LDAP search.
-      - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}`
-        substrings of the template with the actual user name and bind DN during each LDAP search.
-    - `scope` - scope of the LDAP search.
-      - Accepted values are: `base`, `one_level`, `children`, `subtree` (the default).
-    - `search_filter` - template used to construct the search filter for the LDAP search.
-      - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}`
-        substrings of the template with the actual user name, bind DN, and base DN during each LDAP search.
-      - Note, that the special characters must be escaped properly in XML.
-    - `attribute` - attribute name whose values will be returned by the LDAP search.
-    - `prefix` - prefix, that will be expected to be in front of each string in the original
-      list of strings returned by the LDAP search. Prefix will be removed from the original
-      strings and resulting strings will be treated as local role names. Empty, by default.
+    - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter`
+      and the name of the logged in user. For each entry found during that search, the value of the specified
+      attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed,
+      and the rest of the value becomes the name of a local role defined in ClickHouse,
+      which is expected to be created beforehand by the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement.
+    - There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied.
+        - `base_dn` - template used to construct the base DN for the LDAP search.
+           - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}`
+             substrings of the template with the actual user name and bind DN during each LDAP search.
+        - `scope` - scope of the LDAP search.
+            - Accepted values are: `base`, `one_level`, `children`, `subtree` (the default).
+        - `search_filter` - template used to construct the search filter for the LDAP search.
+            - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}`
+              substrings of the template with the actual user name, bind DN, and base DN during each LDAP search.
+            - Note, that the special characters must be escaped properly in XML.
+        - `attribute` - attribute name whose values will be returned by the LDAP search.
+        - `prefix` - prefix, that will be expected to be in front of each string in the original
+          list of strings returned by the LDAP search. Prefix will be removed from the original
+          strings and resulting strings will be treated as local role names. Empty, by default.
 

From 2c6a0e74fb90d2cd5c8b988c4e9f3eebf60366c8 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Tue, 9 Feb 2021 18:14:20 +0300
Subject: [PATCH 0880/1238] better replica creation

---
 src/Databases/DatabaseReplicated.cpp          | 119 ++++++++++--------
 src/Databases/DatabaseReplicated.h            |   6 +-
 src/Databases/DatabaseReplicatedWorker.cpp    |  16 ++-
 src/Databases/DatabaseReplicatedWorker.h      |   2 +
 src/Interpreters/DDLTask.cpp                  |   4 +-
 src/Interpreters/DDLTask.h                    |   5 +-
 src/Interpreters/DDLWorker.cpp                |  39 +++---
 src/Interpreters/DDLWorker.h                  |   6 +
 src/Interpreters/executeDDLQueryOnCluster.cpp |   1 -
 .../test_replicated_database/test.py          |   9 +-
 10 files changed, 115 insertions(+), 92 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 4a6058afcd0..a3da271a597 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -3,7 +3,6 @@
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/ReadHelpers.h>
-#include <IO/WriteBufferFromFile.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/executeQuery.h>
@@ -105,8 +104,6 @@ DatabaseReplicated::DatabaseReplicated(
             throw Exception(ErrorCodes::REPLICA_IS_ALREADY_EXIST,
                             "Replica {} of shard {} of replicated database at {} already exists. Replica host ID: '{}', current host ID: '{}'",
                             replica_name, shard_name, zookeeper_path, replica_host_id, host_id);
-
-        log_entry_to_execute = parse<UInt32>(current_zookeeper->get(replica_path + "/log_ptr"));
     }
     else
     {
@@ -232,9 +229,6 @@ bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperP
 
 void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper)
 {
-    /// When creating new replica, use latest snapshot version as initial value of log_pointer
-    //log_entry_to_execute = 0;   //FIXME
-
     /// Write host name to replica_path, it will protect from multiple replicas with the same name
     auto host_id = getHostID(global_context, db_uuid);
 
@@ -265,40 +259,6 @@ void DatabaseReplicated::loadStoredObjects(Context & context, bool has_force_res
     ddl_worker->startup();
 }
 
-void DatabaseReplicated::onUnexpectedLogEntry(const String & entry_name, const ZooKeeperPtr & zookeeper)
-{
-    /// We cannot execute next entry of replication log. Possible reasons:
-    /// 1. Replica is staled, some entries were removed by log cleanup process.
-    ///    In this case we should recover replica from the last snapshot.
-    /// 2. Replication log is broken due to manual operations with ZooKeeper or logical error.
-    ///    In this case we just stop replication without any attempts to recover it automatically,
-    ///    because such attempts may lead to unexpected data removal.
-
-    constexpr const char * name = "query-";
-    if (!startsWith(entry_name, name))
-        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "Unexpected entry in replication log: {}", entry_name);
-
-    UInt32 entry_number;
-    if (!tryParse(entry_number, entry_name.substr(strlen(name))))
-        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "Cannot parse number of replication log entry {}", entry_name);
-
-    if (entry_number < log_entry_to_execute)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} already executed, current pointer is {}", entry_number, log_entry_to_execute);
-
-    /// Entry name is valid. Let's get min log pointer to check if replica is staled.
-    UInt32 min_snapshot = parse<UInt32>(zookeeper->get(zookeeper_path + "/min_log_ptr"));   // FIXME
-
-    if (log_entry_to_execute < min_snapshot)
-    {
-        recoverLostReplica(zookeeper, 0);   //FIXME log_pointer
-        return;
-    }
-
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot recover replica, probably it's a bug. "
-                                               "Got log entry '{}' when expected entry number {}");
-}
-
-
 BlockIO DatabaseReplicated::propose(const ASTPtr & query, const Context & query_context)
 {
     if (query_context.getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY)
@@ -335,22 +295,25 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query, const Context & query_
 }
 
 
-void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 from_snapshot)
+void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr)
 {
-    //LOG_WARNING(log, "Will recover replica");
+    bool new_replica = our_log_ptr == 0;
+    if (new_replica)
+        LOG_INFO(log, "Will create new replica from log pointer {}", max_log_ptr);
+    else
+        LOG_WARNING(log, "Will recover replica with staled log pointer {} from log pointer {}", our_log_ptr, max_log_ptr);
 
-    //FIXME drop old tables
+    if (new_replica && !empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "It's new replica, but database is not empty");
 
-    String snapshot_metadata_path = zookeeper_path + "/metadata";
-    Strings tables_in_snapshot = current_zookeeper->getChildren(snapshot_metadata_path);
-    snapshot_metadata_path += '/';
-    from_snapshot = parse<UInt32>(current_zookeeper->get(zookeeper_path + "/max_log_ptr"));
+    if (!new_replica)
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Automatic replica recovery is not implemented");
 
-    for (const auto & table_name : tables_in_snapshot)
+    auto table_name_to_metadata = tryGetConsistentMetadataSnapshot(current_zookeeper, max_log_ptr);
+
+    for (const auto & name_and_meta : table_name_to_metadata)
     {
-        //FIXME It's not atomic. We need multiget here (available since ZooKeeper 3.6.0).
-        String query_text = current_zookeeper->get(snapshot_metadata_path + table_name);
-        auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, query_text);
+        auto query_ast = parseQueryFromMetadataInZooKeeper(name_and_meta.first, name_and_meta.second);
 
         Context query_context = global_context;
         query_context.makeQueryContext();
@@ -358,14 +321,60 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
         query_context.setCurrentDatabase(database_name);
         query_context.setCurrentQueryId(""); // generate random query_id
 
-        //FIXME
-        DatabaseCatalog::instance().waitTableFinallyDropped(query_ast->as<ASTCreateQuery>()->uuid);
-
         LOG_INFO(log, "Executing {}", serializeAST(*query_ast));
         InterpreterCreateQuery(query_ast, query_context).execute();
     }
 
-    current_zookeeper->set(replica_path + "/log_ptr", toString(from_snapshot));
+    current_zookeeper->set(replica_path + "/log_ptr", toString(max_log_ptr));
+}
+
+std::map<String, String> DatabaseReplicated::tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr)
+{
+    std::map<String, String> table_name_to_metadata;
+    constexpr int max_retries = 10;
+    int iteration = 0;
+    while (++iteration <= max_retries)
+    {
+        table_name_to_metadata.clear();
+        LOG_DEBUG(log, "Trying to get consistent metadata snapshot for log pointer {}", max_log_ptr);
+        Strings table_names = zookeeper->getChildren(zookeeper_path + "/metadata");
+
+        std::vector<zkutil::ZooKeeper::FutureGet> futures;
+        futures.reserve(table_names.size());
+        for (const auto & table : table_names)
+            futures.emplace_back(zookeeper->asyncTryGet(zookeeper_path + "/metadata/" + table));
+
+        for (size_t i = 0; i < table_names.size(); ++i)
+        {
+            auto res = futures[i].get();
+            if (res.error != Coordination::Error::ZOK)
+                break;
+            table_name_to_metadata.emplace(table_names[i], res.data);
+        }
+
+        UInt32 new_max_log_ptr = parse<UInt32>(zookeeper->get(zookeeper_path + "/max_log_ptr"));
+        if (new_max_log_ptr == max_log_ptr && table_names.size() == table_name_to_metadata.size())
+            break;
+
+        if (max_log_ptr < new_max_log_ptr)
+        {
+            LOG_DEBUG(log, "Log pointer moved from {} to {}, will retry", max_log_ptr, new_max_log_ptr);
+            max_log_ptr = new_max_log_ptr;
+        }
+        else
+        {
+            assert(max_log_ptr == new_max_log_ptr);
+            assert(table_names.size() != table_name_to_metadata.size());
+            LOG_DEBUG(log, "Cannot get metadata of some tables due to ZooKeeper error, will retry");
+        }
+    }
+
+    if (max_retries < iteration)
+        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "Cannot get consistent metadata snapshot");
+
+    LOG_DEBUG(log, "Got consistent metadata snapshot for log pointer {}", max_log_ptr);
+
+    return table_name_to_metadata;
 }
 
 ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query)
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index c39321f0caa..fffc2b5c98a 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -86,8 +86,8 @@ private:
     bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
     void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
 
-    void onUnexpectedLogEntry(const String & entry_name, const ZooKeeperPtr & zookeeper);
-    void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 from_snapshot);
+    void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr);
+    std::map<String, String> tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr);
 
     ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query);
 
@@ -96,8 +96,6 @@ private:
     String replica_name;
     String replica_path;
 
-    UInt32 log_entry_to_execute;
-
     zkutil::ZooKeeperPtr getZooKeeper() const;
 
     std::unique_ptr<DatabaseReplicatedDDLWorker> ddl_worker;
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index dd9dc322f9d..3162979e787 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -45,11 +45,14 @@ void DatabaseReplicatedDDLWorker::initializeReplication()
     /// Check if we need to recover replica.
     /// Invariant: replica is lost if it's log_ptr value is less then max_log_ptr - logs_to_keep.
 
-    UInt32 our_log_ptr = parse<UInt32>(current_zookeeper->get(database->replica_path + "/log_ptr"));
+    String log_ptr_str = current_zookeeper->get(database->replica_path + "/log_ptr");
+    UInt32 our_log_ptr = parse<UInt32>(log_ptr_str);
     UInt32 max_log_ptr = parse<UInt32>(current_zookeeper->get(database->zookeeper_path + "/max_log_ptr"));
-    UInt32 logs_to_keep = parse<UInt32>(current_zookeeper->get(database->zookeeper_path + "/logs_to_keep"));
+    logs_to_keep = parse<UInt32>(current_zookeeper->get(database->zookeeper_path + "/logs_to_keep"));
     if (our_log_ptr == 0 || our_log_ptr + logs_to_keep < max_log_ptr)
-        database->recoverLostReplica(current_zookeeper, 0);
+        database->recoverLostReplica(current_zookeeper, our_log_ptr, max_log_ptr);
+    else
+        last_skipped_entry_name.emplace(log_ptr_str);
 }
 
 String DatabaseReplicatedDDLWorker::enqueueQuery(DDLLogEntry & entry)
@@ -239,4 +242,11 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
     return task;
 }
 
+bool DatabaseReplicatedDDLWorker::canRemoveQueueEntry(const String & entry_name, const Coordination::Stat &)
+{
+    UInt32 entry_number = DDLTaskBase::getLogEntryNumber(entry_name);
+    UInt32 max_log_ptr = parse<UInt32>(getAndSetZooKeeper()->get(database->zookeeper_path + "/max_log_ptr"));
+    return entry_number + logs_to_keep < max_log_ptr;
+}
+
 }
diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h
index e3fd58c4305..33806df88ba 100644
--- a/src/Databases/DatabaseReplicatedWorker.h
+++ b/src/Databases/DatabaseReplicatedWorker.h
@@ -20,11 +20,13 @@ private:
     void initializeReplication();
 
     DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper) override;
+    bool canRemoveQueueEntry(const String & entry_name, const Coordination::Stat & stat) override;
 
     DatabaseReplicated * const database;
     mutable std::mutex mutex;
     std::condition_variable wait_current_task_change;
     String current_task;
+    UInt32 logs_to_keep = std::numeric_limits<UInt32>::max();
 };
 
 }
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 9e379443364..7f47f0a6659 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -320,7 +320,7 @@ std::unique_ptr<Context> DatabaseReplicatedTask::makeQueryContext(Context & from
     return query_context;
 }
 
-String DatabaseReplicatedTask::getLogEntryName(UInt32 log_entry_number)
+String DDLTaskBase::getLogEntryName(UInt32 log_entry_number)
 {
     constexpr size_t seq_node_digits = 10;
     String number = toString(log_entry_number);
@@ -328,7 +328,7 @@ String DatabaseReplicatedTask::getLogEntryName(UInt32 log_entry_number)
     return name;
 }
 
-UInt32 DatabaseReplicatedTask::getLogEntryNumber(const String & log_entry_name)
+UInt32 DDLTaskBase::getLogEntryNumber(const String & log_entry_name)
 {
     constexpr const char * name = "query-";
     assert(startsWith(log_entry_name, name));
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 43d9fa1c0ae..f02e17103aa 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -101,6 +101,8 @@ struct DDLTaskBase
     inline String getFinishedNodePath() const { return entry_path + "/finished/" + host_id_str; }
     inline String getShardNodePath() const { return entry_path + "/shards/" + getShardID(); }
 
+    static String getLogEntryName(UInt32 log_entry_number);
+    static UInt32 getLogEntryNumber(const String & log_entry_name);
 };
 
 struct DDLTask : public DDLTaskBase
@@ -132,9 +134,6 @@ struct DatabaseReplicatedTask : public DDLTaskBase
     String getShardID() const override;
     std::unique_ptr<Context> makeQueryContext(Context & from_context) override;
 
-    static String getLogEntryName(UInt32 log_entry_number);
-    static UInt32 getLogEntryNumber(const String & log_entry_name);
-
     DatabaseReplicated * database;
 };
 
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 665bacf9d6d..efaacabf4de 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -451,10 +451,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task)
 
 void DDLWorker::updateMaxDDLEntryID(const String & entry_name)
 {
-    DB::ReadBufferFromString in(entry_name);
-    DB::assertString("query-", in);
-    UInt64 id;
-    readText(id, in);
+    UInt64 id = DDLTaskBase::getLogEntryNumber(entry_name);
     auto prev_id = max_id.load(std::memory_order_relaxed);
     while (prev_id < id)
     {
@@ -744,16 +741,13 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
 }
 
 
-void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper)
+void DDLWorker::cleanupQueue(Int64, const ZooKeeperPtr & zookeeper)
 {
     LOG_DEBUG(log, "Cleaning queue");
 
     Strings queue_nodes = zookeeper->getChildren(queue_dir);
     filterAndSortQueueNodes(queue_nodes);
 
-    size_t num_outdated_nodes = (queue_nodes.size() > max_tasks_in_queue) ? queue_nodes.size() - max_tasks_in_queue : 0;
-    auto first_non_outdated_node = queue_nodes.begin() + num_outdated_nodes;
-
     for (auto it = queue_nodes.cbegin(); it < queue_nodes.cend(); ++it)
     {
         if (stop_flag)
@@ -772,15 +766,7 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo
             if (!zookeeper->exists(node_path, &stat))
                 continue;
 
-            /// Delete node if its lifetime is expired (according to task_max_lifetime parameter)
-            constexpr UInt64 zookeeper_time_resolution = 1000;
-            Int64 zookeeper_time_seconds = stat.ctime / zookeeper_time_resolution;
-            bool node_lifetime_is_expired = zookeeper_time_seconds + task_max_lifetime < current_time_seconds;
-
-            /// If too many nodes in task queue (> max_tasks_in_queue), delete oldest one
-            bool node_is_outside_max_window = it < first_non_outdated_node;
-
-            if (!node_lifetime_is_expired && !node_is_outside_max_window)
+            if (!canRemoveQueueEntry(node_name, stat))
                 continue;
 
             /// Skip if there are active nodes (it is weak guard)
@@ -799,10 +785,7 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo
                 continue;
             }
 
-            if (node_lifetime_is_expired)
-                LOG_INFO(log, "Lifetime of task {} is expired, deleting it", node_name);
-            else if (node_is_outside_max_window)
-                LOG_INFO(log, "Task {} is outdated, deleting it", node_name);
+            LOG_INFO(log, "Task {} is outdated, deleting it", node_name);
 
             /// Deleting
             {
@@ -827,6 +810,19 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo
     }
 }
 
+bool DDLWorker::canRemoveQueueEntry(const String & entry_name, const Coordination::Stat & stat)
+{
+    /// Delete node if its lifetime is expired (according to task_max_lifetime parameter)
+    constexpr UInt64 zookeeper_time_resolution = 1000;
+    Int64 zookeeper_time_seconds = stat.ctime / zookeeper_time_resolution;
+    bool node_lifetime_is_expired = zookeeper_time_seconds + task_max_lifetime < Poco::Timestamp().epochTime();
+
+    /// If too many nodes in task queue (> max_tasks_in_queue), delete oldest one
+    UInt32 entry_number = DDLTaskBase::getLogEntryNumber(entry_name);
+    bool node_is_outside_max_window = entry_number < max_id.load(std::memory_order_relaxed) - max_tasks_in_queue;
+
+    return node_lifetime_is_expired || node_is_outside_max_window;
+}
 
 /// Try to create nonexisting "status" dirs for a node
 void DDLWorker::createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper)
@@ -927,6 +923,7 @@ void DDLWorker::runMainThread()
             worker_pool = std::make_unique<ThreadPool>(pool_size);
         /// Clear other in-memory state, like server just started.
         current_tasks.clear();
+        last_skipped_entry_name.reset();
         max_id = 0;
     };
 
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 706face3885..1ae4f815b44 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -24,6 +24,11 @@ namespace Poco
     namespace Util { class AbstractConfiguration; }
 }
 
+namespace Coordination
+{
+    struct Stat;
+}
+
 namespace DB
 {
 class ASTAlterQuery;
@@ -94,6 +99,7 @@ protected:
 
     /// Checks and cleanups queue's nodes
     void cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper);
+    virtual bool canRemoveQueueEntry(const String & entry_name, const Coordination::Stat & stat);
 
     /// Init task node
     static void createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper);
diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
index a0148316610..2774f78663e 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.cpp
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -277,7 +277,6 @@ Block DDLQueryStatusInputStream::readImpl()
                     status.tryDeserializeText(status_data);
             }
 
-            //FIXME
             String host = host_id;
             UInt16 port = 0;
             if (by_hostname)
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 2a5a7f4716e..04646507ed7 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -8,7 +8,7 @@ from helpers.test_tools import assert_eq_with_retry
 cluster = ClickHouseCluster(__file__)
 
 main_node = cluster.add_instance('main_node', main_configs=['configs/config.xml'], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 1})
-dummy_node = cluster.add_instance('dummy_node', main_configs=['configs/config.xml'], with_zookeeper=True, macros={"shard": 1, "replica": 2})
+dummy_node = cluster.add_instance('dummy_node', main_configs=['configs/config.xml'], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 2})
 competing_node = cluster.add_instance('competing_node', main_configs=['configs/config.xml'], with_zookeeper=True, macros={"shard": 1, "replica": 3})
 snapshotting_node = cluster.add_instance('snapshotting_node', main_configs=['configs/config.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 1})
 snapshot_recovering_node = cluster.add_instance('snapshot_recovering_node', main_configs=['configs/config.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 2})
@@ -100,9 +100,12 @@ def test_alters_from_different_replicas(started_cluster):
 
     main_node.query("CREATE TABLE testdb.dist AS testdb.concurrent_test ENGINE = Distributed(cluster, testdb, concurrent_test, CounterID)")
 
-    dummy_node.kill_clickhouse(stop_start_wait_sec=0)
+    dummy_node.stop_clickhouse(kill=True)
 
-    competing_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added0 UInt32;")
+    settings = {"distributed_ddl_task_timeout": 10}
+    assert "There are 1 unfinished hosts (0 of them are currently active)" in \
+        competing_node.query_and_get_error("ALTER TABLE testdb.concurrent_test ADD COLUMN Added0 UInt32;", settings=settings)
+    dummy_node.start_clickhouse()
     main_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added2 UInt32;")
     competing_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added1 UInt32 AFTER Added0;")
     main_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;")

From 8212976dc0f627ed9c2d36c29436d6d7a75f3e77 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 9 Feb 2021 18:36:08 +0300
Subject: [PATCH 0881/1238] add boundary order check

---
 src/Interpreters/WindowDescription.cpp  | 32 ++++++++++++++++++++++---
 src/Interpreters/WindowDescription.h    |  3 +++
 src/Processors/QueryPlan/WindowStep.cpp | 14 ++---------
 3 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp
index a410ba0de1b..e922f49c896 100644
--- a/src/Interpreters/WindowDescription.cpp
+++ b/src/Interpreters/WindowDescription.cpp
@@ -121,11 +121,21 @@ void WindowFrame::checkValid() const
     if (end_type == BoundaryType::Offset
         && begin_type == BoundaryType::Offset)
     {
+        // Frame starting with following rows can't have preceding rows.
         if (!(end_preceding && !begin_preceding))
         {
-            // Should probably check here that starting offset is less than
-            // ending offset, but with regard to ORDER BY direction for RANGE
-            // frames.
+            // Frame start offset must be less or equal that the frame end offset.
+            const bool begin_before_end
+                = begin_offset * (begin_preceding ? -1 : 1)
+                    <= end_offset * (end_preceding ? -1 : 1);
+
+            if (!begin_before_end)
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                    "Frame start offset {} {} does not precede the frame end offset {} {}",
+                    begin_offset, begin_preceding ? "PRECEDING" : "FOLLOWING",
+                    end_offset, end_preceding ? "PRECEDING" : "FOLLOWING");
+            }
             return;
         }
     }
@@ -135,4 +145,20 @@ void WindowFrame::checkValid() const
         toString());
 }
 
+void WindowDescription::checkValid() const
+{
+    frame.checkValid();
+
+    // RANGE OFFSET requires exactly one ORDER BY column.
+    if (frame.type == WindowFrame::FrameType::Range
+        && (frame.begin_type == WindowFrame::BoundaryType::Offset
+            || frame.end_type == WindowFrame::BoundaryType::Offset)
+        && order_by.size() != 1)
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            "The RANGE OFFSET window frame requires exactly one ORDER BY column, {} given",
+           order_by.size());
+    }
+}
+
 }
diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h
index ee181680b94..1eb4491ad60 100644
--- a/src/Interpreters/WindowDescription.h
+++ b/src/Interpreters/WindowDescription.h
@@ -131,7 +131,10 @@ struct WindowDescription
     // The window functions that are calculated for this window.
     std::vector<WindowFunctionDescription> window_functions;
 
+
     std::string dump() const;
+
+    void checkValid() const;
 };
 
 using WindowFunctionDescriptions = std::vector<WindowFunctionDescription>;
diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp
index 04d374963ff..42834d70b8f 100644
--- a/src/Processors/QueryPlan/WindowStep.cpp
+++ b/src/Processors/QueryPlan/WindowStep.cpp
@@ -60,21 +60,11 @@ WindowStep::WindowStep(const DataStream & input_stream_,
     , window_functions(window_functions_)
     , input_header(input_stream_.header)
 {
-    const auto & frame = window_description.frame;
     // We don't remove any columns, only add, so probably we don't have to update
     // the output DataStream::distinct_columns.
-    frame.checkValid();
 
-    // RANGE OFFSET requires exactly one ORDER BY column.
-    if (frame.type == WindowFrame::FrameType::Range
-        && (frame.begin_type == WindowFrame::BoundaryType::Offset
-            || frame.end_type == WindowFrame::BoundaryType::Offset)
-        && window_description.order_by.size() != 1)
-    {
-        throw Exception(ErrorCodes::BAD_ARGUMENTS,
-            "The RANGE OFFSET window frame requires exactly one ORDER BY column, {} given",
-            window_description.order_by.size());
-    }
+    window_description.checkValid();
+
 }
 
 void WindowStep::transformPipeline(QueryPipeline & pipeline)

From eac13d9d9d007e19896cc87d79f1c60aa55859bd Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Tue, 9 Feb 2021 18:37:33 +0300
Subject: [PATCH 0882/1238] Add retries to test_access_control_on_cluster

---
 tests/integration/test_access_control_on_cluster/test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_access_control_on_cluster/test.py b/tests/integration/test_access_control_on_cluster/test.py
index e804be2c94e..bc740402161 100644
--- a/tests/integration/test_access_control_on_cluster/test.py
+++ b/tests/integration/test_access_control_on_cluster/test.py
@@ -18,22 +18,22 @@ def started_cluster():
 
 
 def test_access_control_on_cluster():
-    ch1.query("CREATE USER Alex ON CLUSTER 'cluster'")
+    ch1.query_with_retry("CREATE USER Alex ON CLUSTER 'cluster'", retry_count=3)
     assert ch1.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
     assert ch2.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
     assert ch3.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
 
-    ch2.query("GRANT ON CLUSTER 'cluster' SELECT ON *.* TO Alex")
+    ch2.query_with_retry("GRANT ON CLUSTER 'cluster' SELECT ON *.* TO Alex", retry_count=3)
     assert ch1.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
     assert ch2.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
     assert ch3.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
 
-    ch3.query("REVOKE ON CLUSTER 'cluster' SELECT ON *.* FROM Alex")
+    ch3.query_with_retry("REVOKE ON CLUSTER 'cluster' SELECT ON *.* FROM Alex", retry_count=3)
     assert ch1.query("SHOW GRANTS FOR Alex") == ""
     assert ch2.query("SHOW GRANTS FOR Alex") == ""
     assert ch3.query("SHOW GRANTS FOR Alex") == ""
 
-    ch2.query("DROP USER Alex ON CLUSTER 'cluster'")
+    ch2.query_with_retry("DROP USER Alex ON CLUSTER 'cluster'", retry_count=3)
     assert "There is no user `Alex`" in ch1.query_and_get_error("SHOW CREATE USER Alex")
     assert "There is no user `Alex`" in ch2.query_and_get_error("SHOW CREATE USER Alex")
     assert "There is no user `Alex`" in ch3.query_and_get_error("SHOW CREATE USER Alex")

From c78f3ba204683d2a7b22c050cd8821426b25965a Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 9 Feb 2021 18:39:15 +0300
Subject: [PATCH 0883/1238] Missed file

---
 src/Coordination/CoordinationSettings.cpp | 35 ++++++++++++++++++++
 src/Coordination/CoordinationSettings.h   | 40 +++++++++++++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 src/Coordination/CoordinationSettings.cpp
 create mode 100644 src/Coordination/CoordinationSettings.h

diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp
new file mode 100644
index 00000000000..cd46817e82f
--- /dev/null
+++ b/src/Coordination/CoordinationSettings.cpp
@@ -0,0 +1,35 @@
+#include <Coordination/CoordinationSettings.h>
+#include <Core/Settings.h>
+#include <common/logger_useful.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int UNKNOWN_SETTING;
+}
+
+IMPLEMENT_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
+
+void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config)
+{
+    if (!config.has(config_elem))
+        return;
+
+    Poco::Util::AbstractConfiguration::Keys config_keys;
+    config.keys(config_elem, config_keys);
+
+    try
+    {
+        for (const String & key : config_keys)
+            set(key, config.getString(config_elem + "." + key));
+    }
+    catch (Exception & e)
+    {
+        if (e.code() == ErrorCodes::UNKNOWN_SETTING)
+            e.addMessage("in Coordination settings config");
+        throw;
+    }
+}
+
+}
diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h
new file mode 100644
index 00000000000..374d432f2db
--- /dev/null
+++ b/src/Coordination/CoordinationSettings.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <Core/Defines.h>
+#include <Core/BaseSettings.h>
+#include <Common/ZooKeeper/ZooKeeperConstants.h>
+#include <Poco/Util/AbstractConfiguration.h>
+
+namespace DB
+{
+
+struct Settings;
+
+/** These settings represent fine tunes for internal details of Coordination storages
+  * and should not be changed by the user without a reason.
+  */
+
+#define LIST_OF_COORDINATION_SETTINGS(M) \
+    M(Milliseconds, session_timeout_ms, Coordination::DEFAULT_SESSION_TIMEOUT_MS, "Default client session timeout", 0) \
+    M(Milliseconds, operation_timeout_ms, Coordination::DEFAULT_OPERATION_TIMEOUT_MS, "Default client operation timeout", 0) \
+    M(Milliseconds, dead_session_check_period_ms, 500, "How often leader will check sessions to consider them dead and remove", 0) \
+    M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
+    M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
+    M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Lower bound of election timer (avoid too often leader elections)", 0) \
+    M(UInt64, reserved_log_items, 5000, "How many log items to store (don't remove during compaction)", 0) \
+    M(UInt64, snapshot_distance, 5000, "How many log items we have to collect to write new snapshot", 0) \
+    M(UInt64, max_stored_snapshots, 3, "How many snapshots we want to store", 0) \
+    M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
+    M(Milliseconds, shutdown_timeout, 5000, "How many time we will until RAFT shutdown", 0)
+
+DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
+
+
+struct CoordinationSettings : public BaseSettings<CoordinationSettingsTraits>
+{
+    void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config);
+};
+
+using CoordinationSettingsPtr = std::shared_ptr<CoordinationSettings>;
+
+}

From 3ba6629d7ba8d1a1e2e480d3e94899054b64ae9a Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Tue, 9 Feb 2021 18:48:33 +0300
Subject: [PATCH 0884/1238] add stable sort for tests

---
 .../test_cassandra.py                              | 14 +++++++-------
 .../test_clickhouse_local.py                       |  6 +++---
 .../test_clickhouse_remote.py                      | 14 +++++++-------
 .../test_executable_hashed.py                      |  2 +-
 .../test_file.py                                   | 14 +++++++-------
 .../test_http.py                                   | 14 +++++++-------
 .../test_https.py                                  | 14 +++++++-------
 .../test_mongo.py                                  | 14 +++++++-------
 .../test_mysql.py                                  | 14 +++++++-------
 9 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py
index 0c69b7f7cbb..81f9db1964b 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -43,13 +43,13 @@ def setup_module(module):
     main_configs = []
     main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml'))
     main_configs.append(os.path.join('configs', 'log_conf.xml'))
-   
+
     for fname in os.listdir(DICT_CONFIG_PATH):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries, with_cassandra=True)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -70,14 +70,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_SIMPLE)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_COMPLEX)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_local.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_local.py
index 1adc02ba6aa..3d56746be6e 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_local.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_local.py
@@ -69,14 +69,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_SIMPLE)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_COMPLEX)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
     
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_remote.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_remote.py
index 4e7f307b959..374e620e1c3 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_remote.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_clickhouse_remote.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -47,10 +47,10 @@ def setup_module(module):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
     cluster.add_instance('clickhouse1', main_configs=main_configs)
-    
+
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -71,14 +71,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", list(set(LAYOUTS_SIMPLE).difference(set("cache"))) )
+@pytest.mark.parametrize("layout_name", sorted(list(set(LAYOUTS_SIMPLE).difference(set("cache"))) ))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", list(set(LAYOUTS_COMPLEX).difference(set("complex_key_cache"))))
+@pytest.mark.parametrize("layout_name", sorted(list(set(LAYOUTS_COMPLEX).difference(set("complex_key_cache")))))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_executable_hashed.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_executable_hashed.py
index 03af42bb1d4..dfcc35c54f8 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_executable_hashed.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_executable_hashed.py
@@ -77,6 +77,6 @@ def test_simple(started_cluster, layout_name):
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_file.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_file.py
index f786bda847f..aa81cca466b 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_file.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_file.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -42,13 +42,13 @@ def setup_module(module):
     dictionaries = []
     main_configs = []
     main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml'))
-    
+
     for fname in os.listdir(DICT_CONFIG_PATH):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -69,14 +69,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", set(LAYOUTS_SIMPLE).difference({'cache', 'direct'}) )
+@pytest.mark.parametrize("layout_name", sorted(set(LAYOUTS_SIMPLE).difference({'cache', 'direct'})) )
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", list(set(LAYOUTS_COMPLEX).difference({'complex_key_cache', 'complex_key_direct'})))
+@pytest.mark.parametrize("layout_name", sorted(list(set(LAYOUTS_COMPLEX).difference({'complex_key_cache', 'complex_key_direct'}))))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_http.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_http.py
index 80baee5ee45..7c8b5a41b01 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_http.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_http.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -42,7 +42,7 @@ def setup_module(module):
     dictionaries = []
     main_configs = []
     main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml'))
-    
+
     for fname in os.listdir(DICT_CONFIG_PATH):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
@@ -50,7 +50,7 @@ def setup_module(module):
 
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -71,14 +71,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_SIMPLE)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_COMPLEX)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_https.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_https.py
index ccac2cfd268..44950f013b3 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_https.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_https.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -42,7 +42,7 @@ def setup_module(module):
     dictionaries = []
     main_configs = []
     main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml'))
-    
+
     for fname in os.listdir(DICT_CONFIG_PATH):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
@@ -50,7 +50,7 @@ def setup_module(module):
 
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -71,14 +71,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_SIMPLE)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_COMPLEX)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py
index ffa376dcdb3..7d808845854 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -42,13 +42,13 @@ def setup_module(module):
     dictionaries = []
     main_configs = []
     main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml'))
-    
+
     for fname in os.listdir(DICT_CONFIG_PATH):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries, with_mongo=True)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -69,14 +69,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_SIMPLE)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_COMPLEX)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py
index 77d16e901a9..7a6b0b7ce8d 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py
@@ -1,4 +1,4 @@
-import os 
+import os
 import math
 import pytest
 
@@ -43,13 +43,13 @@ def setup_module(module):
     main_configs = []
     main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml'))
     main_configs.append(os.path.join('configs', 'log_conf.xml'))
-    
+
     for fname in os.listdir(DICT_CONFIG_PATH):
         dictionaries.append(os.path.join(DICT_CONFIG_PATH, fname))
 
     node = cluster.add_instance('node', main_configs=main_configs, dictionaries=dictionaries, with_mysql=True)
 
-    
+
 def teardown_module(module):
     global DICT_CONFIG_PATH
     for fname in os.listdir(DICT_CONFIG_PATH):
@@ -70,14 +70,14 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_SIMPLE)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
 def test_simple(started_cluster, layout_name):
     simple_tester.execute(layout_name, node)
 
-@pytest.mark.parametrize("layout_name", LAYOUTS_COMPLEX)
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
 def test_complex(started_cluster, layout_name):
     complex_tester.execute(layout_name, node)
-    
-@pytest.mark.parametrize("layout_name", LAYOUTS_RANGED)
+
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
 def test_ranged(started_cluster, layout_name):
     ranged_tester.execute(layout_name, node)

From 9de7a0a7792fe66882622a943cbf4dc30daa041d Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Tue, 9 Feb 2021 18:55:36 +0300
Subject: [PATCH 0885/1238] Add comment

---
 src/Storages/StorageMongoDB.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageMongoDB.h b/src/Storages/StorageMongoDB.h
index 54706337e3e..589ab276539 100644
--- a/src/Storages/StorageMongoDB.h
+++ b/src/Storages/StorageMongoDB.h
@@ -52,7 +52,7 @@ private:
 
     std::shared_ptr<Poco::MongoDB::Connection> connection;
     bool authentified = false;
-    std::mutex connection_mutex;
+    std::mutex connection_mutex; /// Protects the variables `connection` and `authentified`.
 };
 
 }

From db09d5a990aa1b5085cb22dc22ff856352d86ed5 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 9 Feb 2021 19:35:46 +0300
Subject: [PATCH 0886/1238] Fix not closed ports in DelayedPortsProcessor

---
 src/Processors/DelayedPortsProcessor.cpp | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/Processors/DelayedPortsProcessor.cpp b/src/Processors/DelayedPortsProcessor.cpp
index d740ef08e5a..ae4ba4659aa 100644
--- a/src/Processors/DelayedPortsProcessor.cpp
+++ b/src/Processors/DelayedPortsProcessor.cpp
@@ -40,6 +40,11 @@ void DelayedPortsProcessor::finishPair(PortsPair & pair)
 {
     if (!pair.is_finished)
     {
+        if (pair.output_port)
+            pair.output_port->finish();
+
+        pair.input_port->close();
+
         pair.is_finished = true;
         ++num_finished_pairs;
 
@@ -52,15 +57,12 @@ bool DelayedPortsProcessor::processPair(PortsPair & pair)
 {
     if (pair.output_port && pair.output_port->isFinished())
     {
-        pair.input_port->close();
         finishPair(pair);
         return false;
     }
 
     if (pair.input_port->isFinished())
     {
-        if (pair.output_port)
-            pair.output_port->finish();
         finishPair(pair);
         return false;
     }
@@ -111,12 +113,7 @@ IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_in
     if (num_finished_outputs == outputs.size())
     {
         for (auto & pair : port_pairs)
-        {
-            if (pair.output_port)
-                pair.output_port->finish();
-
-            pair.input_port->close();
-        }
+            finishPair(pair);
 
         return Status::Finished;
     }

From a2a0d385c5093739d78b22a5f926f856dbc4fb3a Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Tue, 9 Feb 2021 19:50:39 +0300
Subject: [PATCH 0887/1238] style

---
 src/Interpreters/WindowDescription.h    | 2 +-
 src/Processors/QueryPlan/WindowStep.cpp | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h
index 1eb4491ad60..faad4649f91 100644
--- a/src/Interpreters/WindowDescription.h
+++ b/src/Interpreters/WindowDescription.h
@@ -17,7 +17,7 @@ class ASTFunction;
 struct WindowFunctionDescription
 {
     std::string column_name;
-    const ASTFunction * function_node;
+    const ASTFunction * function_node = nullptr;
     AggregateFunctionPtr aggregate_function;
     Array function_parameters;
     DataTypes argument_types;
diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp
index 42834d70b8f..2b824f91b45 100644
--- a/src/Processors/QueryPlan/WindowStep.cpp
+++ b/src/Processors/QueryPlan/WindowStep.cpp
@@ -9,11 +9,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-}
-
 static ITransformingStep::Traits getTraits()
 {
     return ITransformingStep::Traits

From ed59b355c0dba42da612546a584b0645ef463019 Mon Sep 17 00:00:00 2001
From: Dmitriy <sevirov@yandex-team.ru>
Date: Tue, 9 Feb 2021 20:34:16 +0300
Subject: [PATCH 0888/1238] Update the description of the
 opentelemetry_start_trace_probability setting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Обновил документацию настройки.
---
 docs/en/operations/settings/settings.md | 7 ++++---
 docs/ru/operations/settings/settings.md | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 869c76fb975..0554ea79ecd 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2594,12 +2594,13 @@ Default value: `16`.
 
 ## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability}
 
-Enables a trace for executed queries.
+Sets the probability that the ClickHouse can start a trace for executed queries (if no parent [trace context](https://www.w3.org/TR/trace-context/) is supplied).
 
 Possible values:
 
--   0 — The trace for a executed query is disabled.
--   1 — The trace for a executed query is enabled.
+-   0 — The trace for a executed queries is disabled (if no parent trace context is supplied).
+-   (0, 1) — The probability with which the ClickHouse can start a trace for executed queries (if no parent trace context is supplied). For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries.
+-   1 — The trace for all executed queries is enabled.
 
 Default value: `0`.
 
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 2aa81daa0b0..47e2666e652 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -2475,12 +2475,13 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
 
 ## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability}
 
-Включает трассировку для выполненных запросов.
+Задает вероятность того, что ClickHouse начнет трассировку для выполненных запросов (если не указан [родительский контекст](https://www.w3.org/TR/trace-context/) трассировки).
 
 Возможные значения:
 
--   0 — трассировка для выполненного запроса отключена.
--   1 — трассировка для выполненного запроса включена.
+-   0 — трассировка для выполненных запросов отключена (если не указан родительский контекст трассировки).
+-   (0, 1) — вероятность, с которой ClickHouse начнет трассировку для выполненных запросов (если не указан родительский контекст трассировки). Например, при значении настройки, равной `0,5`, ClickHouse начнет трассировку в среднем для половины запросов.
+-   1 — трассировка для всех выполненных запросов включена.
 
 Значение по умолчанию: `0`.
 

From 51c221f993ce1cd7e6500defbeb05458aee2bd1e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 9 Feb 2021 21:29:06 +0300
Subject: [PATCH 0889/1238] Fix outdated session kill

---
 .../NuKeeperStorageDispatcher.cpp             |  8 ++-
 .../test_testkeeper_multinode/test.py         | 69 ++++++++++++++-----
 2 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
index 914985ee534..8ca5d3fff13 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -274,7 +274,13 @@ void NuKeeperStorageDispatcher::sessionCleanerTask()
                     LOG_INFO(log, "Found dead session {}, will try to close it", dead_session);
                     Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
                     request->xid = Coordination::CLOSE_XID;
-                    putRequest(request, dead_session);
+                    NuKeeperStorage::RequestForSession request_info;
+                    request_info.request = request;
+                    request_info.session_id = dead_session;
+                    {
+                        std::lock_guard lock(push_request_mutex);
+                        requests_queue.push(std::move(request_info));
+                    }
                     finishSession(dead_session);
                 }
             }
diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index 05879613ba6..51f60df7719 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -27,23 +27,8 @@ def started_cluster():
 def smaller_exception(ex):
     return '\n'.join(str(ex).split('\n')[0:2])
 
-def test_simple_replicated_table(started_cluster):
-
-    for i, node in enumerate([node1, node2, node3]):
-        node.query("CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{}') ORDER BY tuple()".format(i + 1))
-
-    node2.query("INSERT INTO t SELECT number FROM numbers(10)")
-
-    node1.query("SYSTEM SYNC REPLICA t", timeout=10)
-    node3.query("SYSTEM SYNC REPLICA t", timeout=10)
-
-    assert node1.query("SELECT COUNT() FROM t") == "10\n"
-    assert node2.query("SELECT COUNT() FROM t") == "10\n"
-    assert node3.query("SELECT COUNT() FROM t") == "10\n"
-
-
-def get_fake_zk(nodename):
-    _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=30.0)
+def get_fake_zk(nodename, timeout=30.0):
+    _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
     def reset_last_zxid_listener(state):
         print("Fake zk callback called for state", state)
         _fake_zk_instance.last_zxid = 0
@@ -146,6 +131,56 @@ def test_watch_on_follower(started_cluster):
             pass
 
 
+def test_session_expiration(started_cluster):
+    try:
+        node1_zk = get_fake_zk("node1")
+        node2_zk = get_fake_zk("node2")
+        node3_zk = get_fake_zk("node3", timeout=3.0)
+
+        node3_zk.create("/test_ephemeral_node", b"world", ephemeral=True)
+
+        with PartitionManager() as pm:
+            pm.partition_instances(node3, node2)
+            pm.partition_instances(node3, node1)
+            node3_zk.stop()
+            node3_zk.close()
+            time.sleep(5)
+
+        assert node1_zk.exists("/test_ephemeral_node") is None
+        assert node2_zk.exists("/test_ephemeral_node") is None
+
+    finally:
+        try:
+            for zk_conn in [node1_zk, node2_zk, node3_zk]:
+                try:
+                    zk_conn.stop()
+                    zk_conn.close()
+                except:
+                    pass
+        except:
+            pass
+
+def test_simple_replicated_table(started_cluster):
+    # something may be wrong after partition in other tests
+    # so create with retry
+    for i, node in enumerate([node1, node2, node3]):
+        for i in range(100):
+            try:
+                node.query("CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{}') ORDER BY tuple()".format(i + 1))
+                break
+            except:
+                time.sleep(0.1)
+
+    node2.query("INSERT INTO t SELECT number FROM numbers(10)")
+
+    node1.query("SYSTEM SYNC REPLICA t", timeout=10)
+    node3.query("SYSTEM SYNC REPLICA t", timeout=10)
+
+    assert node1.query("SELECT COUNT() FROM t") == "10\n"
+    assert node2.query("SELECT COUNT() FROM t") == "10\n"
+    assert node3.query("SELECT COUNT() FROM t") == "10\n"
+
+
 # in extremely rare case it can take more than 5 minutes in debug build with sanitizer
 @pytest.mark.timeout(600)
 def test_blocade_leader(started_cluster):

From 1e56f907a87f239ef6ef3d73443792df4f1a1a78 Mon Sep 17 00:00:00 2001
From: Denny Crane <deniszhuravlov@gmail.com>
Date: Tue, 9 Feb 2021 14:47:11 -0400
Subject: [PATCH 0890/1238] Update external-dicts-dict-layout.md

range_hashed description corrected
---
 .../external-dictionaries/external-dicts-dict-layout.md       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
index 05c418b1f15..efef91b4b09 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
@@ -208,8 +208,8 @@ This function returns the value for the specified `id`s and the date range that
 Details of the algorithm:
 
 -   If the `id` is not found or a range is not found for the `id`, it returns the default value for the dictionary.
--   If there are overlapping ranges, you can use any.
--   If the range delimiter is `NULL` or an invalid date (such as 1900-01-01 or 2039-01-01), the range is left open. The range can be open on both sides.
+-   If there are overlapping ranges, it returns value for any (random) range.
+-   If the range delimiter is `NULL` or an invalid date (such as 1900-01-01), the range is open. The range can be open on both sides.
 
 Configuration example:
 

From ca855e36a17ff3f2340b3f7f73d299aa61c2a7b1 Mon Sep 17 00:00:00 2001
From: Denny Crane <deniszhuravlov@gmail.com>
Date: Tue, 9 Feb 2021 14:49:45 -0400
Subject: [PATCH 0891/1238] Update external-dicts-dict-layout.md

range_hashed corrected
---
 .../external-dictionaries/external-dicts-dict-layout.md       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
index fc4a3ac7285..f6b8b670563 100644
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
@@ -205,8 +205,8 @@ RANGE(MIN first MAX last)
 Особенности алгоритма:
 
 -   Если не найден `id` или для найденного `id` не найден диапазон, то возвращается значение по умолчанию для словаря.
--   Если есть перекрывающиеся диапазоны, то можно использовать любой подходящий.
--   Если граница диапазона `NULL` или некорректная дата (1900-01-01, 2039-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон.
+-   Если есть перекрывающиеся диапазоны, то возвращается значение из любого (случайного) подходящего диапазона.
+-   Если граница диапазона `NULL` или некорректная дата (1900-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон.
 
 Пример конфигурации:
 

From 27d607a955a1d500ec4291c0a7d24e48da553c35 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 9 Feb 2021 22:48:34 +0300
Subject: [PATCH 0892/1238] Respect header in addMissingDefaults

---
 src/DataStreams/AddingDefaultBlockOutputStream.cpp | 3 +--
 src/DataStreams/AddingDefaultBlockOutputStream.h   | 1 -
 src/Interpreters/InterpreterInsertQuery.cpp        | 2 +-
 src/Interpreters/addMissingDefaults.cpp            | 2 ++
 src/Interpreters/inplaceBlockConversions.cpp       | 7 +------
 5 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/DataStreams/AddingDefaultBlockOutputStream.cpp b/src/DataStreams/AddingDefaultBlockOutputStream.cpp
index 23cd5e2c3c8..70771aff11b 100644
--- a/src/DataStreams/AddingDefaultBlockOutputStream.cpp
+++ b/src/DataStreams/AddingDefaultBlockOutputStream.cpp
@@ -9,12 +9,11 @@ namespace DB
 AddingDefaultBlockOutputStream::AddingDefaultBlockOutputStream(
     const BlockOutputStreamPtr & output_,
     const Block & header_,
-    const Block & output_block_,
     const ColumnsDescription & columns_,
     const Context & context_)
     : output(output_), header(header_)
 {
-    auto dag = addMissingDefaults(header_, output_block_.getNamesAndTypesList(), columns_, context_);
+    auto dag = addMissingDefaults(header_, output->getHeader().getNamesAndTypesList(), columns_, context_);
     actions = std::make_shared<ExpressionActions>(std::move(dag));
 }
 
diff --git a/src/DataStreams/AddingDefaultBlockOutputStream.h b/src/DataStreams/AddingDefaultBlockOutputStream.h
index bff8ad7f39a..f7e421d6cd9 100644
--- a/src/DataStreams/AddingDefaultBlockOutputStream.h
+++ b/src/DataStreams/AddingDefaultBlockOutputStream.h
@@ -25,7 +25,6 @@ public:
     AddingDefaultBlockOutputStream(
         const BlockOutputStreamPtr & output_,
         const Block & header_,
-        const Block & output_block_,
         const ColumnsDescription & columns_,
         const Context & context_);
 
diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index 55c4d19206f..4352d04a252 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -341,7 +341,7 @@ BlockIO InterpreterInsertQuery::execute()
             /// Actually we don't know structure of input blocks from query/table,
             /// because some clients break insertion protocol (columns != header)
             out = std::make_shared<AddingDefaultBlockOutputStream>(
-                out, query_sample_block, out->getHeader(), metadata_snapshot->getColumns(), context);
+                out, query_sample_block, metadata_snapshot->getColumns(), context);
 
             /// It's important to squash blocks as early as possible (before other transforms),
             ///  because other transforms may work inefficient if block size is small.
diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp
index 7dfe839b564..f1feda0355a 100644
--- a/src/Interpreters/addMissingDefaults.cpp
+++ b/src/Interpreters/addMissingDefaults.cpp
@@ -89,6 +89,8 @@ ActionsDAGPtr addMissingDefaults(
     /// Computes explicitly specified values by default and materialized columns.
     if (auto dag = evaluateMissingDefaults(actions->getResultColumns(), required_columns, columns, context))
         actions = ActionsDAG::merge(std::move(*actions), std::move(*dag));
+    else
+        actions->removeUnusedActions(required_columns.getNames());
 
     return actions;
 }
diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp
index 61082e0894c..eba03d7aa61 100644
--- a/src/Interpreters/inplaceBlockConversions.cpp
+++ b/src/Interpreters/inplaceBlockConversions.cpp
@@ -105,12 +105,7 @@ ActionsDAGPtr createExpressions(
 
     if (save_unneeded_columns)
     {
-        Names required_names;
-        required_names.reserve(required_columns.size());
-        for (const auto & column : required_columns)
-            required_names.push_back(column.name);
-
-        dag->removeUnusedActions(required_names);
+        dag->removeUnusedActions(required_columns.getNames());
         dag->addMaterializingOutputActions();
     }
 

From 8ba9576ed956f80e8361945cb549b7eabe1358ca Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 9 Feb 2021 22:52:48 +0300
Subject: [PATCH 0893/1238] Added comment.

---
 src/Interpreters/addMissingDefaults.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp
index f1feda0355a..9ed18b056cd 100644
--- a/src/Interpreters/addMissingDefaults.cpp
+++ b/src/Interpreters/addMissingDefaults.cpp
@@ -90,6 +90,8 @@ ActionsDAGPtr addMissingDefaults(
     if (auto dag = evaluateMissingDefaults(actions->getResultColumns(), required_columns, columns, context))
         actions = ActionsDAG::merge(std::move(*actions), std::move(*dag));
     else
+        /// Removes unused columns and reorders result.
+        /// The same is done in evaluateMissingDefaults if not empty dag is returned.
         actions->removeUnusedActions(required_columns.getNames());
 
     return actions;

From 6b80ae6c4f03352fbab86be6d0681775173edc94 Mon Sep 17 00:00:00 2001
From: Russ Frank <rf@oden.io>
Date: Tue, 9 Feb 2021 15:01:57 -0500
Subject: [PATCH 0894/1238] style fixes

---
 src/AggregateFunctions/AggregateFunctionDeltaSum.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
index 9bca24a63ca..11824c9d51f 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
@@ -81,7 +81,7 @@ public:
         {
             // In the opposite scenario, the lhs comes after the rhs, e.g. [4, 6] [1, 2]. Since we
             // assume the input interval states are sorted by time, we assume this is a counter
-            // reset, and therefore do *not* add the difference between our first value and the 
+            // reset, and therefore do *not* add the difference between our first value and the
             // rhs last value.
 
             place_data->sum += rhs_data->sum;
@@ -99,7 +99,7 @@ public:
             place_data->sum = rhs_data->sum;
         }
 
-        // Otherwise lhs either has data or is unitialized, so we don't need to modify its values.
+        // Otherwise lhs either has data or is uninitialized, so we don't need to modify its values.
     }
 
     void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override

From 7848f0202c6a1b076a3607c9fe2911a9b615d644 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 10 Feb 2021 00:02:34 +0300
Subject: [PATCH 0895/1238] One more test

---
 .../test_testkeeper_multinode/test.py         | 32 +++++++++++++++++--
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index 51f60df7719..70968842f4d 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -8,9 +8,9 @@ from multiprocessing.dummy import Pool
 from helpers.network import PartitionManager
 
 cluster = ClickHouseCluster(__file__)
-node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'])
-node2 = cluster.add_instance('node2', main_configs=['configs/enable_test_keeper2.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'])
-node3 = cluster.add_instance('node3', main_configs=['configs/enable_test_keeper3.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'])
+node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+node2 = cluster.add_instance('node2', main_configs=['configs/enable_test_keeper2.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+node3 = cluster.add_instance('node3', main_configs=['configs/enable_test_keeper3.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
 
 from kazoo.client import KazooClient
 
@@ -160,6 +160,32 @@ def test_session_expiration(started_cluster):
         except:
             pass
 
+
+def test_follower_restart(started_cluster):
+    try:
+        node1_zk = get_fake_zk("node1")
+
+        node1_zk.create("/test_restart_node", b"hello")
+
+        node3.restart_clickhouse(kill=True)
+
+        node3_zk = get_fake_zk("node3")
+
+        # got data from log
+        assert node3_zk.get("/test_restart_node")[0] == b"hello"
+
+    finally:
+        try:
+            for zk_conn in [node1_zk, node3_zk]:
+                try:
+                    zk_conn.stop()
+                    zk_conn.close()
+                except:
+                    pass
+        except:
+            pass
+
+
 def test_simple_replicated_table(started_cluster):
     # something may be wrong after partition in other tests
     # so create with retry

From 4ac8860cea4cddec7999a460f38021a18da1587b Mon Sep 17 00:00:00 2001
From: HuFuwang <hufuwang@HuFuwangs-MacBook-Pro.local>
Date: Wed, 10 Feb 2021 10:13:43 +0800
Subject: [PATCH 0896/1238] refine code in MergeTreeData::loadDataParts to
 avoid parsing WAL file as data part.

---
 src/Storages/MergeTree/MergeTreeData.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 0c6bcd3f6fd..b4f28c11501 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -729,16 +729,16 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
             if (startsWith(it->name(), "tmp"))
                 continue;
 
-            part_names_with_disks.emplace_back(it->name(), disk_ptr);
-
-            /// Create and correctly initialize global WAL object, if it's needed
-            if (it->name() == MergeTreeWriteAheadLog::DEFAULT_WAL_FILE_NAME && settings->in_memory_parts_enable_wal)
+            if (!startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME))
+                part_names_with_disks.emplace_back(it->name(), disk_ptr);
+            else if (it->name() == MergeTreeWriteAheadLog::DEFAULT_WAL_FILE_NAME && settings->in_memory_parts_enable_wal)
             {
+                /// Create and correctly initialize global WAL object
                 write_ahead_log = std::make_shared<MergeTreeWriteAheadLog>(*this, disk_ptr, it->name());
                 for (auto && part : write_ahead_log->restore(metadata_snapshot))
                     parts_from_wal.push_back(std::move(part));
             }
-            else if (startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME) && settings->in_memory_parts_enable_wal)
+            else if (settings->in_memory_parts_enable_wal)
             {
                 MergeTreeWriteAheadLog wal(*this, disk_ptr, it->name());
                 for (auto && part : wal.restore(metadata_snapshot))

From afb5846a244defe3ea7d2da0e129018b1ed7619f Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Wed, 10 Feb 2021 11:22:24 +0800
Subject: [PATCH 0897/1238] refactor

---
 src/Storages/MergeTree/MergeTreeData.cpp | 58 ++++++++++--------------
 src/Storages/MergeTree/MergeTreeData.h   |  4 +-
 2 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 4bed3868f9d..f8ce7002d12 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2346,7 +2346,7 @@ size_t MergeTreeData::getPartsCount() const
 }
 
 
-size_t MergeTreeData::getMaxPartsCountForPartition(size_t * inactive) const
+size_t MergeTreeData::getMaxPartsCountForPartitionWithState(DataPartState state) const
 {
     auto lock = lockParts();
 
@@ -2354,7 +2354,7 @@ size_t MergeTreeData::getMaxPartsCountForPartition(size_t * inactive) const
     size_t cur_count = 0;
     const String * cur_partition_id = nullptr;
 
-    for (const auto & part : getDataPartsStateRange(DataPartState::Committed))
+    for (const auto & part : getDataPartsStateRange(state))
     {
         if (cur_partition_id && part->info.partition_id == *cur_partition_id)
         {
@@ -2369,30 +2369,22 @@ size_t MergeTreeData::getMaxPartsCountForPartition(size_t * inactive) const
         res = std::max(res, cur_count);
     }
 
-    if (inactive)
-    {
-        *inactive = 0;
-        cur_count = 0;
-        for (const auto & part : getDataPartsStateRange(DataPartState::Outdated))
-        {
-            if (cur_partition_id && part->info.partition_id == *cur_partition_id)
-            {
-                ++cur_count;
-            }
-            else
-            {
-                cur_partition_id = &part->info.partition_id;
-                cur_count = 1;
-            }
-
-            *inactive = std::max(*inactive, cur_count);
-        }
-    }
-
     return res;
 }
 
 
+size_t MergeTreeData::getMaxPartsCountForPartition() const
+{
+    return getMaxPartsCountForPartitionWithState(DataPartState::Committed);
+}
+
+
+size_t MergeTreeData::getMaxInactivePartsCountForPartition() const
+{
+    return getMaxPartsCountForPartitionWithState(DataPartState::Outdated);
+}
+
+
 std::optional<Int64> MergeTreeData::getMinPartDataVersion() const
 {
     auto lock = lockParts();
@@ -2418,31 +2410,29 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until) const
         throw Exception("Too many parts (" + toString(parts_count_in_total) + ") in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in <merge_tree> element in config.xml or with per-table setting.", ErrorCodes::TOO_MANY_PARTS);
     }
 
-    size_t parts_count_in_partition;
+    size_t parts_count_in_partition = getMaxPartsCountForPartition();
     ssize_t k_inactive = -1;
     if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0)
     {
-        size_t inactive_parts;
-        parts_count_in_partition = getMaxPartsCountForPartition(&inactive_parts);
-        if (inactive_parts >= settings->inactive_parts_to_throw_insert)
+        size_t inactive_parts_count_in_partition = getMaxInactivePartsCountForPartition();
+        if (inactive_parts_count_in_partition >= settings->inactive_parts_to_throw_insert)
         {
             ProfileEvents::increment(ProfileEvents::RejectedInserts);
             throw Exception(
-                "Too many inactive parts (" + toString(parts_count_in_partition)
-                    + "). Parts cleaning are processing significantly slower than inserts.",
-                ErrorCodes::TOO_MANY_PARTS);
+                ErrorCodes::TOO_MANY_PARTS,
+                "Too many inactive parts ({}). Parts cleaning are processing significantly slower than inserts",
+                inactive_parts_count_in_partition);
         }
-        k_inactive = ssize_t(inactive_parts) - ssize_t(settings->inactive_parts_to_delay_insert);
+        k_inactive = ssize_t(inactive_parts_count_in_partition) - ssize_t(settings->inactive_parts_to_delay_insert);
     }
-    else
-        parts_count_in_partition = getMaxPartsCountForPartition();
 
     if (parts_count_in_partition >= settings->parts_to_throw_insert)
     {
         ProfileEvents::increment(ProfileEvents::RejectedInserts);
         throw Exception(
-            "Too many parts (" + toString(parts_count_in_partition) + "). Merges are processing significantly slower than inserts.",
-            ErrorCodes::TOO_MANY_PARTS);
+            ErrorCodes::TOO_MANY_PARTS,
+            "Too many parts ({}). Parts cleaning are processing significantly slower than inserts",
+            parts_count_in_partition);
     }
 
     if (k_inactive < 0 && parts_count_in_partition < settings->parts_to_delay_insert)
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index d4b6c1fba27..395156aeb64 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -415,7 +415,9 @@ public:
     size_t getTotalActiveSizeInRows() const;
 
     size_t getPartsCount() const;
-    size_t getMaxPartsCountForPartition(size_t * inactive = nullptr) const;
+    size_t getMaxPartsCountForPartitionWithState(DataPartState state) const;
+    size_t getMaxPartsCountForPartition() const;
+    size_t getMaxInactivePartsCountForPartition() const;
 
     /// Get min value of part->info.getDataVersion() for all active parts.
     /// Makes sense only for ordinary MergeTree engines because for them block numbering doesn't depend on partition.

From 565a148da91d545bc794c759ad75c7dd3d957637 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sun, 7 Feb 2021 15:22:51 +0800
Subject: [PATCH 0898/1238] Normalize count() variants

---
 src/Core/Settings.h                           |  1 +
 .../RewriteCountVariantsVisitor.cpp           | 62 +++++++++++++++++++
 .../RewriteCountVariantsVisitor.h             | 17 +++++
 src/Interpreters/TreeOptimizer.cpp            |  9 +++
 ...ptimize_normalize_count_variants.reference |  5 ++
 ...1706_optimize_normalize_count_variants.sql |  4 ++
 6 files changed, 98 insertions(+)
 create mode 100644 src/Interpreters/RewriteCountVariantsVisitor.cpp
 create mode 100644 src/Interpreters/RewriteCountVariantsVisitor.h
 create mode 100644 tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference
 create mode 100644 tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 96571cedd3f..797d6250ae4 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -375,6 +375,7 @@ class IColumn;
     M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \
     M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
     M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
+    M(Bool, optimize_normalize_count_variants, false, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
     M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
     M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
     M(Bool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
diff --git a/src/Interpreters/RewriteCountVariantsVisitor.cpp b/src/Interpreters/RewriteCountVariantsVisitor.cpp
new file mode 100644
index 00000000000..f7cce82a478
--- /dev/null
+++ b/src/Interpreters/RewriteCountVariantsVisitor.cpp
@@ -0,0 +1,62 @@
+#include <Interpreters/RewriteCountVariantsVisitor.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTSubquery.h>
+#include <Parsers/ASTTablesInSelectQuery.h>
+#include <Poco/String.h>
+#include <Common/typeid_cast.h>
+
+namespace DB
+{
+void RewriteCountVariantsVisitor::visit(ASTPtr & node)
+{
+    if (node->as<ASTSubquery>() || node->as<ASTTableExpression>() || node->as<ASTArrayJoin>())
+        return;
+
+    for (auto & child : node->children)
+        visit(child);
+
+    if (auto * func = node->as<ASTFunction>())
+        visit(*func);
+}
+
+void RewriteCountVariantsVisitor::visit(ASTFunction & func)
+{
+    if (func.arguments->children.empty() || func.arguments->children.size() > 1 || !func.arguments->children[0])
+        return;
+
+    auto name = Poco::toLower(func.name);
+
+    if (name != "sum" && name != "count")
+        return;
+
+    auto & func_arguments = func.arguments->children;
+
+    const auto * first_arg_literal = func_arguments[0]->as<ASTLiteral>();
+    if (!first_arg_literal)
+        return;
+
+    bool transform = false;
+    if (name == "count")
+    {
+        if (first_arg_literal->value.getType() != Field::Types::Null)
+            transform = true;
+    }
+    else if (name == "sum")
+    {
+        if (first_arg_literal->value.getType() == Field::Types::UInt64)
+        {
+            auto constant = first_arg_literal->value.get<UInt64>();
+            if (constant == 1)
+                transform = true;
+        }
+    }
+    if (!transform)
+        return;
+
+    func.name = "count";
+    func.arguments->children.clear();
+}
+
+}
diff --git a/src/Interpreters/RewriteCountVariantsVisitor.h b/src/Interpreters/RewriteCountVariantsVisitor.h
new file mode 100644
index 00000000000..09eef3542aa
--- /dev/null
+++ b/src/Interpreters/RewriteCountVariantsVisitor.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <Parsers/IAST.h>
+
+namespace DB
+{
+
+class ASTFunction;
+
+class RewriteCountVariantsVisitor
+{
+public:
+    static void visit(ASTPtr & ast);
+    static void visit(ASTFunction &);
+};
+
+}
diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp
index 2347ab0d4a5..5c6f76c8c29 100644
--- a/src/Interpreters/TreeOptimizer.cpp
+++ b/src/Interpreters/TreeOptimizer.cpp
@@ -10,6 +10,7 @@
 #include <Interpreters/RewriteAnyFunctionVisitor.h>
 #include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
 #include <Interpreters/RedundantFunctionsInOrderByVisitor.h>
+#include <Interpreters/RewriteCountVariantsVisitor.h>
 #include <Interpreters/MonotonicityCheckVisitor.h>
 #include <Interpreters/ConvertStringsToEnumVisitor.h>
 #include <Interpreters/PredicateExpressionsOptimizer.h>
@@ -555,6 +556,11 @@ void optimizeSumIfFunctions(ASTPtr & query)
     RewriteSumIfFunctionVisitor(data).visit(query);
 }
 
+void optimizeCountConstantAndSumOne(ASTPtr & query)
+{
+    RewriteCountVariantsVisitor::visit(query);
+}
+
 
 void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, const Context & context)
 {
@@ -616,6 +622,9 @@ void TreeOptimizer::apply(ASTPtr & query, Aliases & aliases, const NameSet & sou
     if (settings.optimize_move_functions_out_of_any)
         optimizeAnyFunctions(query);
 
+    if (settings.optimize_normalize_count_variants)
+        optimizeCountConstantAndSumOne(query);
+
     if (settings.optimize_rewrite_sum_if_to_count_if)
         optimizeSumIfFunctions(query);
 
diff --git a/tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference
new file mode 100644
index 00000000000..73b404fa131
--- /dev/null
+++ b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference
@@ -0,0 +1,5 @@
+SELECT
+    count(),
+    count(),
+    count(),
+    count()
diff --git a/tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql
new file mode 100644
index 00000000000..32e5209c35f
--- /dev/null
+++ b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql
@@ -0,0 +1,4 @@
+
+set optimize_normalize_count_variants = 1;
+
+explain syntax select count(), count(1), count(-1), sum(1);

From fc6c57f8e099967f8d120944d1b85f019ddaf60e Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 8 Feb 2021 18:43:35 +0800
Subject: [PATCH 0899/1238] Fix

---
 src/Core/Settings.h                                            | 2 +-
 src/Interpreters/RewriteCountVariantsVisitor.h                 | 2 +-
 .../01706_optimize_normalize_count_variants.reference          | 3 ++-
 .../0_stateless/01706_optimize_normalize_count_variants.sql    | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 797d6250ae4..9bb9ad30f15 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -375,7 +375,7 @@ class IColumn;
     M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \
     M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
     M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
-    M(Bool, optimize_normalize_count_variants, false, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
+    M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
     M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
     M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
     M(Bool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
diff --git a/src/Interpreters/RewriteCountVariantsVisitor.h b/src/Interpreters/RewriteCountVariantsVisitor.h
index 09eef3542aa..6f731c8c463 100644
--- a/src/Interpreters/RewriteCountVariantsVisitor.h
+++ b/src/Interpreters/RewriteCountVariantsVisitor.h
@@ -10,7 +10,7 @@ class ASTFunction;
 class RewriteCountVariantsVisitor
 {
 public:
-    static void visit(ASTPtr & ast);
+    static void visit(ASTPtr &);
     static void visit(ASTFunction &);
 };
 
diff --git a/tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference
index 73b404fa131..0343ad84abb 100644
--- a/tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference
+++ b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference
@@ -2,4 +2,5 @@ SELECT
     count(),
     count(),
     count(),
-    count()
+    count(),
+    count(NULL)
diff --git a/tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql
index 32e5209c35f..d20f23feef8 100644
--- a/tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql
+++ b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql
@@ -1,4 +1,4 @@
 
 set optimize_normalize_count_variants = 1;
 
-explain syntax select count(), count(1), count(-1), sum(1);
+explain syntax select count(), count(1), count(-1), sum(1), count(null);

From d9afee55ba224c5b8e0587514d4047f32bdb05f9 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Tue, 9 Feb 2021 10:45:07 +0300
Subject: [PATCH 0900/1238] Add RewriteCountVariantsVisitor.cpp to ya.make

---
 src/Interpreters/ya.make | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make
index 6a155749ddf..cd4980927e4 100644
--- a/src/Interpreters/ya.make
+++ b/src/Interpreters/ya.make
@@ -129,6 +129,7 @@ SRCS(
     RequiredSourceColumnsData.cpp
     RequiredSourceColumnsVisitor.cpp
     RewriteAnyFunctionVisitor.cpp
+    RewriteCountVariantsVisitor.cpp
     RewriteSumIfFunctionVisitor.cpp
     RowRefs.cpp
     Set.cpp

From e53787fd1af8c0770489d4c79bbf348f757b752e Mon Sep 17 00:00:00 2001
From: Hasitha Kanchana <48449865+hasithaka@users.noreply.github.com>
Date: Wed, 10 Feb 2021 10:01:43 +0100
Subject: [PATCH 0901/1238] Update update.md

Fixed content for generic version
---
 docs/en/operations/update.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/update.md b/docs/en/operations/update.md
index 04fbaf761c8..59a1054f187 100644
--- a/docs/en/operations/update.md
+++ b/docs/en/operations/update.md
@@ -20,11 +20,14 @@ ClickHouse does not support a distributed update. The operation should be perfor
 The upgrade of older version of ClickHouse to specific version:
 
 As an example: 
+xx.yy.a.b is a current stable version. The latest stable version could be found [here](https://github.com/ClickHouse/ClickHouse/releases)
 
 ```bash
 $ sudo apt-get update
-$ sudo apt-get install clickhouse-server=20.12.4.5 clickhouse-client=20.12.4.5 clickhouse-common-static=20.12.4.5
+$ sudo apt-get install clickhouse-server=xx.yy.a.b clickhouse-client=xx.yy.a.b clickhouse-common-static=xx.yy.a.b
 $ sudo service clickhouse-server restart
 ```
 
-Note: It's always recommended to backup all databases before initiating the upgrade process. Please make sure the new version is compatible with new changes so on.
+
+
+

From beb5912b4f07f1a3c12e16f9cd27742c3fc5196c Mon Sep 17 00:00:00 2001
From: filimonov <1549571+filimonov@users.noreply.github.com>
Date: Wed, 10 Feb 2021 10:02:35 +0100
Subject: [PATCH 0902/1238] Add example of client configuration adjusemtents

---
 programs/client/clickhouse-client.xml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml
index c073ab38aea..c6b7ba37009 100644
--- a/programs/client/clickhouse-client.xml
+++ b/programs/client/clickhouse-client.xml
@@ -29,4 +29,25 @@
         <test>{display_name} \x01\e[1;32m\x02:)\x01\e[0m\x02 </test> <!-- if it matched to the substring "test" in the server display name - -->
         <production>{display_name} \x01\e[1;31m\x02:)\x01\e[0m\x02 </production> <!-- if it matched to the substring "production" in the server display name -->
     </prompt_by_server_display_name>
+
+    <!-- 
+        Settings adjustable via command-line parameters (see clickhouse-client --help)
+        can take their defaults from that config file, see examples:
+
+    <host>127.0.0.1</host>
+    <port>9440</port>
+    <secure>1</secure>
+    <user>dbuser</user>
+    <password>dbpwd123</password>
+    <format>PrettyCompactMonoBlock</format>
+    <multiline>1</multiline>
+    <multiquery>1</multiquery>
+    <stacktrace>1</stacktrace>
+    <database>default2</database>
+    <pager>less -SR</pager>
+    <history_file>/home/user/clickhouse_custom_history.log</history_file>
+    <max_parser_depth>2500</max_parser_depth>
+
+        The same can be done on user-level configuration, just create & adjust: ~/.clickhouse-client/config.xml
+    -->
 </config>

From 2ce58440d1d18e31fbe34484852c18cd7a57b445 Mon Sep 17 00:00:00 2001
From: Hasitha Kanchana <48449865+hasithaka@users.noreply.github.com>
Date: Wed, 10 Feb 2021 10:02:42 +0100
Subject: [PATCH 0903/1238] Update update.md

Add a new line ;)
---
 docs/en/operations/update.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/en/operations/update.md b/docs/en/operations/update.md
index 59a1054f187..981eac0bff1 100644
--- a/docs/en/operations/update.md
+++ b/docs/en/operations/update.md
@@ -19,7 +19,8 @@ ClickHouse does not support a distributed update. The operation should be perfor
 
 The upgrade of older version of ClickHouse to specific version:
 
-As an example: 
+As an example:
+ 
 xx.yy.a.b is a current stable version. The latest stable version could be found [here](https://github.com/ClickHouse/ClickHouse/releases)
 
 ```bash

From df1889b8e860e2ab555daed1d59868099e2a68fe Mon Sep 17 00:00:00 2001
From: Hasitha Kanchana <48449865+hasithaka@users.noreply.github.com>
Date: Wed, 10 Feb 2021 10:04:25 +0100
Subject: [PATCH 0904/1238] Update update.md

Highlight the sample version
---
 docs/en/operations/update.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/update.md b/docs/en/operations/update.md
index 981eac0bff1..9fa9c44e130 100644
--- a/docs/en/operations/update.md
+++ b/docs/en/operations/update.md
@@ -21,7 +21,7 @@ The upgrade of older version of ClickHouse to specific version:
 
 As an example:
  
-xx.yy.a.b is a current stable version. The latest stable version could be found [here](https://github.com/ClickHouse/ClickHouse/releases)
+`xx.yy.a.b` is a current stable version. The latest stable version could be found [here](https://github.com/ClickHouse/ClickHouse/releases)
 
 ```bash
 $ sudo apt-get update

From c95140d906401c8c133838c89369ef79d5ec8745 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 10 Feb 2021 12:28:53 +0300
Subject: [PATCH 0905/1238] Better startup and non-verbose logging by default

---
 src/Coordination/CoordinationSettings.h       |  5 +-
 src/Coordination/LoggerWrapper.h              |  5 +-
 src/Coordination/NuKeeperServer.cpp           | 58 +++++++++----------
 src/Coordination/NuKeeperServer.h             | 12 +++-
 .../NuKeeperStorageDispatcher.cpp             | 17 ++++--
 .../configs/enable_test_keeper.xml            |  1 +
 .../configs/enable_test_keeper1.xml           |  1 +
 .../configs/enable_test_keeper2.xml           |  1 +
 .../configs/enable_test_keeper3.xml           |  1 +
 .../test_testkeeper_multinode/test.py         |  5 +-
 10 files changed, 63 insertions(+), 43 deletions(-)

diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h
index 374d432f2db..441e1a5936f 100644
--- a/src/Coordination/CoordinationSettings.h
+++ b/src/Coordination/CoordinationSettings.h
@@ -2,6 +2,7 @@
 
 #include <Core/Defines.h>
 #include <Core/BaseSettings.h>
+#include <Core/SettingsEnums.h>
 #include <Common/ZooKeeper/ZooKeeperConstants.h>
 #include <Poco/Util/AbstractConfiguration.h>
 
@@ -25,7 +26,9 @@ struct Settings;
     M(UInt64, snapshot_distance, 5000, "How many log items we have to collect to write new snapshot", 0) \
     M(UInt64, max_stored_snapshots, 3, "How many snapshots we want to store", 0) \
     M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
-    M(Milliseconds, shutdown_timeout, 5000, "How many time we will until RAFT shutdown", 0)
+    M(Milliseconds, shutdown_timeout, 5000, "How many time we will until RAFT shutdown", 0) \
+    M(Milliseconds, startup_timeout, 30000, "How many time we will until RAFT to start", 0) \
+    M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0)
 
 DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
 
diff --git a/src/Coordination/LoggerWrapper.h b/src/Coordination/LoggerWrapper.h
index c8da2372a91..755b72c06cc 100644
--- a/src/Coordination/LoggerWrapper.h
+++ b/src/Coordination/LoggerWrapper.h
@@ -2,6 +2,7 @@
 
 #include <libnuraft/nuraft.hxx> // Y_IGNORE
 #include <common/logger_useful.h>
+#include <Core/SettingsEnums.h>
 
 namespace DB
 {
@@ -9,9 +10,9 @@ namespace DB
 class LoggerWrapper : public nuraft::logger
 {
 public:
-    LoggerWrapper(const std::string & name)
+    LoggerWrapper(const std::string & name, LogsLevel level_)
         : log(&Poco::Logger::get(name))
-        , level(6)
+        , level(static_cast<int>(level_))
     {
         log->setLevel(level);
     }
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 40508b08761..314a1412313 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -47,7 +47,7 @@ void NuKeeperServer::addServer(int server_id_, const std::string & server_uri_,
 }
 
 
-void NuKeeperServer::startup()
+void NuKeeperServer::startup(bool should_build_quorum)
 {
     nuraft::raft_params params;
     params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds();
@@ -62,25 +62,19 @@ void NuKeeperServer::startup()
     params.return_method_ = nuraft::raft_params::blocking;
 
     nuraft::asio_service::options asio_opts{};
+    nuraft::raft_server::init_options init_options;
+    init_options.skip_initial_election_timeout_ = !should_build_quorum;
+    init_options.raft_callback_ = [this] (nuraft::cb_func::Type type, nuraft::cb_func::Param * param)
+    {
+        return callbackFunc(type, param);
+    };
 
     raft_instance = launcher.init(
-        state_machine, state_manager, nuraft::cs_new<LoggerWrapper>("RaftInstance"), port,
-        asio_opts, params);
+        state_machine, state_manager, nuraft::cs_new<LoggerWrapper>("RaftInstance", coordination_settings->raft_logs_level), port,
+        asio_opts, params, init_options);
 
     if (!raft_instance)
         throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
-
-    /// FIXME
-    static constexpr auto MAX_RETRY = 100;
-    for (size_t i = 0; i < MAX_RETRY; ++i)
-    {
-        if (raft_instance->is_initialized())
-            return;
-
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot start RAFT server within startup timeout");
 }
 
 void NuKeeperServer::shutdown()
@@ -177,10 +171,22 @@ bool NuKeeperServer::isLeaderAlive() const
     return raft_instance->is_leader_alive();
 }
 
+
+nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * /* param */)
+{
+    if (type == nuraft::cb_func::Type::BecomeFresh || type == nuraft::cb_func::Type::BecomeLeader)
+    {
+        std::unique_lock lock(initialized_mutex);
+        initialized_flag = true;
+        initialized_cv.notify_all();
+    }
+    return nuraft::cb_func::ReturnCode::Ok;
+}
+
 bool NuKeeperServer::waitForServer(int32_t id) const
 {
     /// FIXME
-    for (size_t i = 0; i < 50; ++i)
+    for (size_t i = 0; i < 30; ++i)
     {
         if (raft_instance->get_srv_config(id) != nullptr)
             return true;
@@ -192,22 +198,12 @@ bool NuKeeperServer::waitForServer(int32_t id) const
     return false;
 }
 
-bool NuKeeperServer::waitForServers(const std::vector<int32_t> & ids) const
+void NuKeeperServer::waitInit()
 {
-    for (int32_t id : ids)
-        if (!waitForServer(id))
-            return false;
-    return true;
-}
-
-void NuKeeperServer::waitForCatchUp() const
-{
-    /// FIXME
-    while (raft_instance->is_catching_up() || raft_instance->is_receiving_snapshot() || raft_instance->is_leader())
-    {
-        LOG_DEBUG(&Poco::Logger::get("NuRaftInit"), "Waiting current RAFT instance to catch up");
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
+    std::unique_lock lock(initialized_mutex);
+    int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds();
+    if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag; }))
+        throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization");
 }
 
 std::unordered_set<int64_t> NuKeeperServer::getDeadSessions()
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index bb5870fe89a..ce6dd2f0fbb 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -36,13 +36,19 @@ private:
 
     ResponsesQueue & responses_queue;
 
+    std::mutex initialized_mutex;
+    bool initialized_flag = false;
+    std::condition_variable initialized_cv;
+
+    nuraft::cb_func::ReturnCode callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * param);
+
 public:
     NuKeeperServer(
         int server_id_, const std::string & hostname_, int port_,
         const CoordinationSettingsPtr & coordination_settings_,
         ResponsesQueue & responses_queue_);
 
-    void startup();
+    void startup(bool should_build_quorum);
 
     void putRequest(const NuKeeperStorage::RequestForSession & request);
 
@@ -57,8 +63,8 @@ public:
     bool isLeaderAlive() const;
 
     bool waitForServer(int32_t server_id) const;
-    bool waitForServers(const std::vector<int32_t> & ids) const;
-    void waitForCatchUp() const;
+
+    void waitInit();
 
     void shutdown();
 };
diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
index 8ca5d3fff13..300604e0f6e 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -167,9 +167,12 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
     server = std::make_unique<NuKeeperServer>(myid, myhostname, myport, coordination_settings, responses_queue);
     try
     {
-        server->startup();
-        if (shouldBuildQuorum(myid, my_priority, my_can_become_leader, server_configs))
+        bool should_build_quorum = shouldBuildQuorum(myid, my_priority, my_can_become_leader, server_configs);
+        server->startup(should_build_quorum);
+        if (should_build_quorum)
         {
+
+            server->waitInit();
             for (const auto & [id, hostname, port, can_become_leader, priority] : server_configs)
             {
                 LOG_DEBUG(log, "Adding server with id {} ({}:{})", id, hostname, port);
@@ -181,12 +184,15 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
 
                 LOG_DEBUG(log, "Server with id {} ({}:{}) added to cluster", id, hostname, port);
             }
+
+            if (server_configs.size() > 1)
+                LOG_DEBUG(log, "All servers were added to quorum");
         }
         else
         {
-            while (!server->waitForServers(ids))
-                LOG_DEBUG(log, "Waiting for {} servers to build cluster", ids.size());
-            server->waitForCatchUp();
+            LOG_DEBUG(log, "Waiting as follower");
+            server->waitInit();
+            LOG_DEBUG(log, "Follower became fresh");
         }
     }
     catch (...)
@@ -282,6 +288,7 @@ void NuKeeperStorageDispatcher::sessionCleanerTask()
                         requests_queue.push(std::move(request_info));
                     }
                     finishSession(dead_session);
+                    LOG_INFO(log, "Dead session close request pushed");
                 }
             }
         }
diff --git a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
index 00a593051f9..1a441909998 100644
--- a/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
+++ b/tests/integration/test_testkeeper_back_to_back/configs/enable_test_keeper.xml
@@ -6,6 +6,7 @@
         <coordination_settings>
             <operation_timeout_ms>5000</operation_timeout_ms>
             <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
index 75065bb2a7a..3ae44f926d0 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
@@ -6,6 +6,7 @@
         <coordination_settings>
             <operation_timeout_ms>5000</operation_timeout_ms>
             <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
index 18937dd4910..7674c755511 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
@@ -6,6 +6,7 @@
         <coordination_settings>
             <operation_timeout_ms>5000</operation_timeout_ms>
             <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
index 5330367cd89..59dde3bc1b1 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
@@ -6,6 +6,7 @@
         <coordination_settings>
             <operation_timeout_ms>5000</operation_timeout_ms>
             <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index 70968842f4d..e2b0537d5ec 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -144,7 +144,10 @@ def test_session_expiration(started_cluster):
             pm.partition_instances(node3, node1)
             node3_zk.stop()
             node3_zk.close()
-            time.sleep(5)
+            for _ in range(100):
+                if node1_zk.exists("/test_ephemeral_node") is None and node2_zk.exists("/test_ephemeral_node") is None:
+                    break
+                time.sleep(0.1)
 
         assert node1_zk.exists("/test_ephemeral_node") is None
         assert node2_zk.exists("/test_ephemeral_node") is None

From 63080a0b5e6d09b1e9336ccb8023e6e6f5d7569b Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 10 Feb 2021 12:31:01 +0300
Subject: [PATCH 0906/1238] Redundant space

---
 cmake/find/nuraft.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/find/nuraft.cmake b/cmake/find/nuraft.cmake
index bcc656de129..7fa5251946e 100644
--- a/cmake/find/nuraft.cmake
+++ b/cmake/find/nuraft.cmake
@@ -1,6 +1,6 @@
 option(ENABLE_NURAFT "Enable NuRaft" ${ENABLE_LIBRARIES})
 
- if (NOT ENABLE_NURAFT)
+if (NOT ENABLE_NURAFT)
     return()
 endif()
 

From 57d8d81d5946ff8f70c07174aae5a9ef99585099 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 10 Feb 2021 13:02:09 +0300
Subject: [PATCH 0907/1238] Fix style

---
 src/Coordination/NuKeeperServer.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 314a1412313..0d4bdcc60fe 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -15,7 +15,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int TIMEOUT_EXCEEDED;
     extern const int RAFT_ERROR;
 }
 

From 0d179e021bf4681f8d6e15d927ac2a296a89d6c1 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 10 Feb 2021 13:15:42 +0300
Subject: [PATCH 0908/1238] Add sync cmd

---
 src/Common/ZooKeeper/ZooKeeperConstants.cpp | 3 +++
 src/Common/ZooKeeper/ZooKeeperConstants.h   | 1 +
 2 files changed, 4 insertions(+)

diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.cpp b/src/Common/ZooKeeper/ZooKeeperConstants.cpp
index b4cb9feb518..295094b336b 100644
--- a/src/Common/ZooKeeper/ZooKeeperConstants.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperConstants.cpp
@@ -15,6 +15,7 @@ static const std::unordered_set<int32_t> VALID_OPERATIONS =
     static_cast<int32_t>(OpNum::Get),
     static_cast<int32_t>(OpNum::Set),
     static_cast<int32_t>(OpNum::SimpleList),
+    static_cast<int32_t>(OpNum::Sync),
     static_cast<int32_t>(OpNum::Heartbeat),
     static_cast<int32_t>(OpNum::List),
     static_cast<int32_t>(OpNum::Check),
@@ -48,6 +49,8 @@ std::string toString(OpNum op_num)
             return "Check";
         case OpNum::Multi:
             return "Multi";
+        case OpNum::Sync:
+            return "Sync";
         case OpNum::Heartbeat:
             return "Heartbeat";
         case OpNum::Auth:
diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h
index 8a20330a2d7..81ca6c6a460 100644
--- a/src/Common/ZooKeeper/ZooKeeperConstants.h
+++ b/src/Common/ZooKeeper/ZooKeeperConstants.h
@@ -24,6 +24,7 @@ enum class OpNum : int32_t
     Get = 4,
     Set = 5,
     SimpleList = 8,
+    Sync = 9,
     Heartbeat = 11,
     List = 12,
     Check = 13,

From 14c7f38aad66cbbba715c3a9a757cd923612ec05 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 10 Feb 2021 13:28:22 +0300
Subject: [PATCH 0909/1238] Update deltasum.md

---
 .../sql-reference/aggregate-functions/reference/deltasum.md   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md
index 59af4e44120..bb6f802ccaf 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md
@@ -7,13 +7,13 @@ toc_priority: 141
 Syntax: `deltaSum(value)`
 
 Adds the differences between consecutive rows. If the difference is negative, it is ignored. 
-`value` must be some integer, floating point or decimal type.
-
+`value` must be some integer or floating point type.
 
 Example:
 
 ```sql
 select deltaSum(arrayJoin([1, 2, 3]));                  -- => 2
 select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3]));   -- => 7
+select deltaSum(arrayJoin([2.25, 3, 4.5])); -- => 2.25
 ```
 

From 21faea1f6845591c71b255d6a7de6abfc5cf69be Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 10 Feb 2021 13:43:31 +0300
Subject: [PATCH 0910/1238] Relax check in data writer for non adaptive tables

---
 .../MergeTree/MergeTreeDataPartWriterWide.cpp |  4 +--
 ...ptive_granularity_vertical_merge.reference |  6 ++++
 ...no_adaptive_granularity_vertical_merge.sql | 30 +++++++++++++++++++
 3 files changed, 38 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.reference
 create mode 100644 tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql

diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index 81a6539780c..f2bbf53bd97 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -140,7 +140,7 @@ void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_wri
     /// If we didn't finished last granule than we will continue to write it from new block
     if (!last_granule.is_complete)
     {
-        if (settings.blocks_are_granules_size)
+        if (settings.can_use_adaptive_granularity && settings.blocks_are_granules_size)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Incomplete granules are not allowed while blocks are granules size. "
                 "Mark number {} (rows {}), rows written in last mark {}, rows to write in last mark from block {} (from row {}), total marks currently {}",
                 last_granule.mark_number, index_granularity.getMarkRows(last_granule.mark_number), rows_written_in_last_mark,
@@ -506,7 +506,7 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(IMergeTreeDataPart::Ch
     WrittenOffsetColumns offset_columns;
     if (rows_written_in_last_mark > 0)
     {
-        if (settings.blocks_are_granules_size)
+        if (settings.can_use_adaptive_granularity && settings.blocks_are_granules_size)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Incomplete granule is not allowed while blocks are granules size even for last granule. "
                 "Mark number {} (rows {}), rows written for last mark {}, total marks {}",
                 getCurrentMark(), index_granularity.getMarkRows(getCurrentMark()), rows_written_in_last_mark, index_granularity.getMarksCount());
diff --git a/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.reference b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.reference
new file mode 100644
index 00000000000..51acb066394
--- /dev/null
+++ b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.reference
@@ -0,0 +1,6 @@
+1	1
+2	2
+1	1
+2	2
+1	1
+2	2
diff --git a/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql
new file mode 100644
index 00000000000..e744de3c27d
--- /dev/null
+++ b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql
@@ -0,0 +1,30 @@
+DROP TABLE IF EXISTS old_school_table;
+
+CREATE TABLE old_school_table
+(
+    key UInt64,
+    value String
+)
+ENGINE = MergeTree()
+ORDER BY key
+SETTINGS index_granularity_bytes = 0, enable_mixed_granularity_parts = 0,
+vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1;
+
+INSERT INTO old_school_table VALUES (1, '1');
+INSERT INTO old_school_table VALUES (2, '2');
+
+OPTIMIZE TABLE old_school_table FINAL;
+
+SELECT * FROM old_school_table ORDER BY key;
+
+OPTIMIZE TABLE old_school_table FINAL; -- just to be sure
+
+SELECT * FROM old_school_table ORDER BY key;
+
+ALTER TABLE old_school_table MODIFY SETTING vertical_merge_algorithm_min_rows_to_activate = 10000, vertical_merge_algorithm_min_columns_to_activate = 10000;
+
+OPTIMIZE TABLE old_school_table FINAL; -- and horizontal merge
+
+SELECT * FROM old_school_table ORDER BY key;
+
+DROP TABLE IF EXISTS old_school_table;

From 41a3cd978e2c1b820d4fffb8dad3ffea037f140b Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 10 Feb 2021 14:18:11 +0300
Subject: [PATCH 0911/1238] print changed settings in fuzzer when the server
 dies

---
 programs/client/Client.cpp | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index e41f780e99a..3ad42d41aed 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -1374,7 +1374,30 @@ private:
             {
                 // Probably the server is dead because we found an assertion
                 // failure. Fail fast.
-                fmt::print(stderr, "Lost connection to the server\n");
+                fmt::print(stderr, "Lost connection to the server.\n");
+
+                // Print the changed settings because they might be needed to
+                // reproduce the error.
+                const auto & changes = context.getSettingsRef().changes();
+                if (!changes.empty())
+                {
+                    fmt::print(stderr, "Changed settings: ");
+                    for (size_t i = 0; i < changes.size(); ++i)
+                    {
+                        if (i)
+                        {
+                            fmt::print(stderr, ",");
+                        }
+                        fmt::print(stderr, "{} = '{}'", changes[i].name,
+                            toString(changes[i].value));
+                    }
+                    fmt::print(stderr, "\n");
+                }
+                else
+                {
+                    fmt::print(stderr, "No changed settings.\n");
+                }
+
                 return false;
             }
 

From e07bdad5c0919757e5376d16b05efaaf214a8b28 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 10 Feb 2021 14:44:21 +0300
Subject: [PATCH 0912/1238] Fix test build

---
 src/Coordination/tests/gtest_for_build.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index 82affd38062..c6f29831618 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -86,7 +86,7 @@ struct SimpliestRaftServer
         params.return_method_ = nuraft::raft_params::blocking;
 
         raft_instance = launcher.init(
-            state_machine, state_manager, nuraft::cs_new<DB::LoggerWrapper>("ToyRaftLogger"), port,
+            state_machine, state_manager, nuraft::cs_new<DB::LoggerWrapper>("ToyRaftLogger", DB::LogsLevel::trace), port,
             nuraft::asio_service::options{}, params);
 
         if (!raft_instance)

From 1227ca3d7a1d75d83144bb6729b4b7c82e7ceade Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 10 Feb 2021 14:49:41 +0300
Subject: [PATCH 0913/1238] Fix min bytes for wide parts

---
 .../01712_no_adaptive_granularity_vertical_merge.sql            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql
index e744de3c27d..0acf6992c1e 100644
--- a/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql
+++ b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql
@@ -7,7 +7,7 @@ CREATE TABLE old_school_table
 )
 ENGINE = MergeTree()
 ORDER BY key
-SETTINGS index_granularity_bytes = 0, enable_mixed_granularity_parts = 0,
+SETTINGS index_granularity_bytes = 0, enable_mixed_granularity_parts = 0, min_bytes_for_wide_part = 0,
 vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1;
 
 INSERT INTO old_school_table VALUES (1, '1');

From a63d73db58dca2046e4a997e1474e53af27df756 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 10 Feb 2021 15:09:12 +0300
Subject: [PATCH 0914/1238] Don't allow TTL in old syntax

---
 .../MergeTree/registerStorageMergeTree.cpp    |  3 ++
 ...3_table_ttl_old_syntax_zookeeper.reference |  1 +
 .../01713_table_ttl_old_syntax_zookeeper.sql  | 29 +++++++++++++++++++
 3 files changed, 33 insertions(+)
 create mode 100644 tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.reference
 create mode 100644 tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.sql

diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 44c2bcefcd8..ec016c912a6 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -716,6 +716,9 @@ static StoragePtr create(const StorageFactory::Arguments & args)
                 "Index granularity must be a positive integer" + getMergeTreeVerboseHelp(is_extended_storage_def),
                 ErrorCodes::BAD_ARGUMENTS);
         ++arg_num;
+
+        if (args.storage_def->ttl_table && !args.attach)
+            throw Exception("Table TTL is not allowed for MergeTree in old syntax", ErrorCodes::BAD_ARGUMENTS);
     }
 
     DataTypes data_types = metadata.partition_key.data_types;
diff --git a/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.reference b/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.sql b/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.sql
new file mode 100644
index 00000000000..7d4c83c9d3a
--- /dev/null
+++ b/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.sql
@@ -0,0 +1,29 @@
+DROP TABLE IF EXISTS ttl_table;
+
+CREATE TABLE ttl_table
+(
+    date Date,
+    value UInt64
+)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01713_table_ttl', '1', date, date, 8192)
+TTL date + INTERVAL 2 MONTH; --{ serverError 36 }
+
+CREATE TABLE ttl_table
+(
+    date Date,
+    value UInt64
+)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01713_table_ttl', '1', date, date, 8192)
+PARTITION BY date; --{ serverError 42 }
+
+CREATE TABLE ttl_table
+(
+    date Date,
+    value UInt64
+)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01713_table_ttl', '1', date, date, 8192)
+ORDER BY value; --{ serverError 42 }
+
+SELECT 1;
+
+DROP TABLE IF EXISTS ttl_table;

From 739990d0751f94707db2f0c1ef9b40d252d6f752 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 10 Feb 2021 15:37:21 +0300
Subject: [PATCH 0915/1238] fix fast test

---
 docker/test/fasttest/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index bb29959acd2..a8aa852ca5b 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -120,7 +120,7 @@ function clone_root
                 git checkout FETCH_HEAD
                 echo 'Clonned merge head'
             else
-                git fetch
+                git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/head"
                 git checkout "$COMMIT_SHA"
                 echo 'Checked out to commit'
             fi

From a67a54e2a501bfe405837e28cccbeb5c8e0c4bc0 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 10 Feb 2021 15:37:30 +0300
Subject: [PATCH 0916/1238] cleanup

---
 src/Processors/Transforms/WindowTransform.cpp | 9 ++++-----
 src/Processors/Transforms/WindowTransform.h   | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 8be65c6451c..d3ce87166e1 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -386,8 +386,8 @@ void WindowTransform::advanceFrameStartRangeOffset()
 #define APPLY_FOR_ONE_TYPE(FUNCTION, TYPE) \
 else if (typeid_cast<const TYPE *>(column)) \
 { \
-    /* NOLINT clang-tidy you're dumb, I can't put FUNCTION in braces here. */ \
-    FUNCTION<TYPE>(); \
+    /* clang-tidy you're dumb, I can't put FUNCTION in braces here. */ \
+    FUNCTION<TYPE>(); /* NOLINT */ \
 }
 
 #define APPLY_FOR_TYPES(FUNCTION) \
@@ -949,9 +949,8 @@ void WindowTransform::appendChunk(Chunk & chunk)
         partition_start = partition_end;
         advanceRowNumber(partition_end);
         partition_ended = false;
-        // We have to reset the frame when the new partition starts. This is not a
-        // generally correct way to do so, but we don't really support moving frame
-        // for now.
+        // We have to reset the frame and other pointers when the new partition
+        // starts.
         frame_start = partition_start;
         frame_end = partition_start;
         prev_frame_start = partition_start;
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index b70f5877007..8ac10e63518 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -277,7 +277,7 @@ public:
     // frame_start and the frame_end may jump forward by an unknown amount of
     // blocks, e.g. if we use a RANGE frame. This means that sometimes we don't
     // know neither frame_end nor frame_start.
-    // We update the states of the window functions as we track the frame
+    // We update the states of the window functions after we find the final frame
     // boundaries.
     // After we have found the final boundaries of the frame, we can immediately
     // output the result for the current row, w/o waiting for more data.

From b57b3244f5864cb3af7f4d3e707c6f33445932ed Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 10 Feb 2021 15:37:05 +0300
Subject: [PATCH 0917/1238] reuse sort order

---
 src/Interpreters/InterpreterSelectQuery.cpp | 135 +++++++++++++++-----
 1 file changed, 103 insertions(+), 32 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 2ee1b3956e4..f1a13e7afcb 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1831,46 +1831,117 @@ void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const Act
     query_plan.addStep(std::move(expression_step));
 }
 
+static bool windowDescriptionComparator(const WindowDescription * _left,
+    const WindowDescription * _right)
+{
+    const auto & left = _left->full_sort_description;
+    const auto & right = _right->full_sort_description;
+    if (left.size() != right.size())
+    {
+        return left.size() < right.size();
+    }
+
+    for (size_t i = 0; i < left.size(); ++i)
+    {
+        if (left[i].column_name < right[i].column_name)
+        {
+            return true;
+        }
+
+        if (left[i].column_number < right[i].column_number)
+        {
+            return true;
+        }
+
+        if (left[i].direction < right[i].direction)
+        {
+            return true;
+        }
+
+        if (left[i].nulls_direction < right[i].nulls_direction)
+        {
+            return true;
+        }
+
+        assert(left[i] == right[i]);
+    }
+
+    return false;
+}
+
+static bool sortIsPrefix(const WindowDescription & _prefix,
+    const WindowDescription & _full)
+{
+    const auto & prefix = _prefix.full_sort_description;
+    const auto & full = _full.full_sort_description;
+
+    if (prefix.size() > full.size())
+    {
+        return false;
+    }
+
+    for (size_t i = 0; i < prefix.size(); ++i)
+    {
+        if (full[i] != prefix[i])
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
 
 void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
 {
+    std::vector<const WindowDescription *> windows_sorted;
     for (const auto & [_, w] : query_analyzer->windowDescriptions())
     {
-        const Settings & settings = context->getSettingsRef();
+        windows_sorted.push_back(&w);
+    }
 
-        auto partial_sorting = std::make_unique<PartialSortingStep>(
-            query_plan.getCurrentDataStream(),
-            w.full_sort_description,
-            0 /* LIMIT */,
-            SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort,
-                settings.sort_overflow_mode));
-        partial_sorting->setStepDescription("Sort each block for window '"
-            + w.window_name + "'");
-        query_plan.addStep(std::move(partial_sorting));
+    std::sort(windows_sorted.begin(), windows_sorted.end(),
+        windowDescriptionComparator);
 
-        auto merge_sorting_step = std::make_unique<MergeSortingStep>(
-            query_plan.getCurrentDataStream(),
-            w.full_sort_description,
-            settings.max_block_size,
-            0 /* LIMIT */,
-            settings.max_bytes_before_remerge_sort,
-            settings.remerge_sort_lowered_memory_bytes_ratio,
-            settings.max_bytes_before_external_sort,
-            context->getTemporaryVolume(),
-            settings.min_free_disk_space_for_temporary_data);
-        merge_sorting_step->setStepDescription("Merge sorted blocks for window '"
-            + w.window_name + "'");
-        query_plan.addStep(std::move(merge_sorting_step));
+    const Settings & settings = context->getSettingsRef();
+    for (size_t i = 0; i < windows_sorted.size(); ++i)
+    {
+        const auto & w = *windows_sorted[i];
+        if (i == 0 || !sortIsPrefix(w, *windows_sorted[i - 1]))
+        {
+            auto partial_sorting = std::make_unique<PartialSortingStep>(
+                query_plan.getCurrentDataStream(),
+                w.full_sort_description,
+                0 /* LIMIT */,
+                SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort,
+                    settings.sort_overflow_mode));
+            partial_sorting->setStepDescription("Sort each block for window '"
+                + w.window_name + "'");
+            query_plan.addStep(std::move(partial_sorting));
 
-        // First MergeSorted, now MergingSorted.
-        auto merging_sorted = std::make_unique<MergingSortedStep>(
-            query_plan.getCurrentDataStream(),
-            w.full_sort_description,
-            settings.max_block_size,
-            0 /* LIMIT */);
-        merging_sorted->setStepDescription("Merge sorted streams for window '"
-            + w.window_name + "'");
-        query_plan.addStep(std::move(merging_sorted));
+            auto merge_sorting_step = std::make_unique<MergeSortingStep>(
+                query_plan.getCurrentDataStream(),
+                w.full_sort_description,
+                settings.max_block_size,
+                0 /* LIMIT */,
+                settings.max_bytes_before_remerge_sort,
+                settings.remerge_sort_lowered_memory_bytes_ratio,
+                settings.max_bytes_before_external_sort,
+                context->getTemporaryVolume(),
+                settings.min_free_disk_space_for_temporary_data);
+            merge_sorting_step->setStepDescription(
+                "Merge sorted blocks for window '" + w.window_name + "'");
+            query_plan.addStep(std::move(merge_sorting_step));
+
+            // First MergeSorted, now MergingSorted.
+            auto merging_sorted = std::make_unique<MergingSortedStep>(
+                query_plan.getCurrentDataStream(),
+                w.full_sort_description,
+                settings.max_block_size,
+                0 /* LIMIT */);
+            merging_sorted->setStepDescription(
+                "Merge sorted streams for window '" + w.window_name + "'");
+            query_plan.addStep(std::move(merging_sorted));
+        }
 
         auto window_step = std::make_unique<WindowStep>(
             query_plan.getCurrentDataStream(),

From d5b0f30732db069705403ce946e663a3df9ac37c Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 10 Feb 2021 15:43:11 +0300
Subject: [PATCH 0918/1238] cache the block size in the block itself for some
 speedup

---
 src/Processors/Transforms/WindowTransform.cpp |  6 ++++-
 src/Processors/Transforms/WindowTransform.h   | 22 +++++++++++++------
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index d3ce87166e1..914289bca2f 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -835,6 +835,10 @@ void WindowTransform::appendChunk(Chunk & chunk)
             block.output_columns.push_back(ws.window_function.aggregate_function
                 ->getReturnType()->createColumn());
         }
+
+        // Even in case of `count() over ()` we should have a dummy input column.
+        // Not sure how reliable this is...
+        block.rows = block.input_columns[0]->size();
     }
 
     // Start the calculations. First, advance the partition end.
@@ -1037,7 +1041,7 @@ IProcessor::Status WindowTransform::prepare()
             {
                 columns.push_back(ColumnPtr(std::move(res)));
             }
-            output_data.chunk.setColumns(columns, block.numRows());
+            output_data.chunk.setColumns(columns, block.rows);
 
             output.pushData(std::move(output_data));
         }
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 8ac10e63518..541d4eb87c8 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -33,9 +33,7 @@ struct WindowTransformBlock
     Columns input_columns;
     MutableColumns output_columns;
 
-    // Even in case of `count() over ()` we should have a dummy input column.
-    // Not sure how reliable this is...
-    size_t numRows() const { return input_columns[0]->size(); }
+    size_t rows = 0;
 };
 
 struct RowNumber
@@ -136,9 +134,19 @@ private:
     const Columns & inputAt(const RowNumber & x) const
     { return const_cast<WindowTransform *>(this)->inputAt(x); }
 
+    auto & blockAt(const RowNumber & x)
+    {
+        assert(x.block >= first_block_number);
+        assert(x.block - first_block_number < blocks.size());
+        return blocks[x.block - first_block_number];
+    }
+
+    const auto & blockAt(const RowNumber & x) const
+    { return const_cast<WindowTransform *>(this)->blockAt(x); }
+
     size_t blockRowsNumber(const RowNumber & x) const
     {
-        return inputAt(x)[0]->size();
+        return blockAt(x).rows;
     }
 
     MutableColumns & outputAt(const RowNumber & x)
@@ -153,7 +161,7 @@ private:
         assert(x.block >= first_block_number);
         assert(x.block - first_block_number < blocks.size());
 
-        const auto block_rows = inputAt(x)[0]->size();
+        const auto block_rows = blockAt(x).rows;
         assert(x.row < block_rows);
 
         x.row++;
@@ -177,8 +185,8 @@ private:
         --x.block;
         assert(x.block >= first_block_number);
         assert(x.block < first_block_number + blocks.size());
-        assert(inputAt(x)[0]->size() > 0);
-        x.row = inputAt(x)[0]->size() - 1;
+        assert(blockAt(x).rows > 0);
+        x.row = blockAt(x).rows - 1;
 
 #ifndef NDEBUG
         auto xx = x;

From af214e794f96b28e43f8e9158ae3036f742ba6f1 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 10 Feb 2021 15:45:39 +0300
Subject: [PATCH 0919/1238] Review fixes.

---
 src/DataStreams/AddingDefaultBlockOutputStream.cpp |  4 ++--
 src/DataStreams/AddingDefaultBlockOutputStream.h   |  2 +-
 src/Functions/replicate.h                          |  2 ++
 src/Interpreters/addMissingDefaults.cpp            | 11 +----------
 src/Interpreters/inplaceBlockConversions.h         |  1 +
 5 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/src/DataStreams/AddingDefaultBlockOutputStream.cpp b/src/DataStreams/AddingDefaultBlockOutputStream.cpp
index 70771aff11b..db1542801d6 100644
--- a/src/DataStreams/AddingDefaultBlockOutputStream.cpp
+++ b/src/DataStreams/AddingDefaultBlockOutputStream.cpp
@@ -14,13 +14,13 @@ AddingDefaultBlockOutputStream::AddingDefaultBlockOutputStream(
     : output(output_), header(header_)
 {
     auto dag = addMissingDefaults(header_, output->getHeader().getNamesAndTypesList(), columns_, context_);
-    actions = std::make_shared<ExpressionActions>(std::move(dag));
+    adding_defaults_actions = std::make_shared<ExpressionActions>(std::move(dag));
 }
 
 void AddingDefaultBlockOutputStream::write(const Block & block)
 {
     auto copy = block;
-    actions->execute(copy);
+    adding_defaults_actions->execute(copy);
     output->write(copy);
 }
 
diff --git a/src/DataStreams/AddingDefaultBlockOutputStream.h b/src/DataStreams/AddingDefaultBlockOutputStream.h
index f7e421d6cd9..5fbbe2aed60 100644
--- a/src/DataStreams/AddingDefaultBlockOutputStream.h
+++ b/src/DataStreams/AddingDefaultBlockOutputStream.h
@@ -39,7 +39,7 @@ public:
 private:
     BlockOutputStreamPtr output;
     const Block header;
-    ExpressionActionsPtr actions;
+    ExpressionActionsPtr adding_defaults_actions;
 };
 
 
diff --git a/src/Functions/replicate.h b/src/Functions/replicate.h
index 6dbea4bd169..9a33951b2a3 100644
--- a/src/Functions/replicate.h
+++ b/src/Functions/replicate.h
@@ -6,6 +6,8 @@ namespace DB
 
 class Context;
 
+/// Creates an array, multiplying the column (the first argument) by the number of elements in the array (the second argument).
+/// Function may accept more then two arguments. If so, the first array with non-empty offsets is chosen.
 class FunctionReplicate : public IFunction
 {
 public:
diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp
index 9ed18b056cd..9e8ce1f75b4 100644
--- a/src/Interpreters/addMissingDefaults.cpp
+++ b/src/Interpreters/addMissingDefaults.cpp
@@ -9,7 +9,6 @@
 #include <Storages/ColumnsDescription.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Functions/IFunctionAdaptors.h>
-#include <Functions/replicate.h>
 #include <Functions/materialize.h>
 
 
@@ -44,15 +43,7 @@ ActionsDAGPtr addMissingDefaults(
 
     auto actions = std::make_shared<ActionsDAG>(header.getColumnsWithTypeAndName());
 
-    FunctionOverloadResolverPtr func_builder_replicate =
-            std::make_shared<FunctionOverloadResolverAdaptor>(
-                    std::make_unique<DefaultOverloadResolver>(
-                            std::make_shared<FunctionReplicate>()));
-
-    FunctionOverloadResolverPtr func_builder_materialize =
-            std::make_shared<FunctionOverloadResolverAdaptor>(
-                    std::make_unique<DefaultOverloadResolver>(
-                            std::make_shared<FunctionMaterialize>()));
+    FunctionOverloadResolverPtr func_builder_replicate = FunctionFactory::instance().get("replicate", context);
 
     /// We take given columns from input block and missed columns without default value
     /// (default and materialized will be computed later).
diff --git a/src/Interpreters/inplaceBlockConversions.h b/src/Interpreters/inplaceBlockConversions.h
index 8f72e5fe862..837fe9153a9 100644
--- a/src/Interpreters/inplaceBlockConversions.h
+++ b/src/Interpreters/inplaceBlockConversions.h
@@ -17,6 +17,7 @@ class ActionsDAG;
 using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
 
 /// Create actions which adds missing defaults to block according to required_columns using columns description.
+/// Return nullptr if no cations required.
 ActionsDAGPtr evaluateMissingDefaults(
     const Block & header,
     const NamesAndTypesList & required_columns,

From b4d53886a399b1728517c10838f3a2f5b3b3b35b Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 10 Feb 2021 16:01:05 +0300
Subject: [PATCH 0920/1238] Add sync request/response

---
 src/Common/ZooKeeper/ZooKeeperCommon.cpp | 22 ++++++++++++++++++++++
 src/Common/ZooKeeper/ZooKeeperCommon.h   | 19 +++++++++++++++++++
 src/Coordination/NuKeeperStorage.cpp     | 12 ++++++++++++
 3 files changed, 53 insertions(+)

diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp
index 2d32cd75624..56f9de31ec8 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp
@@ -37,6 +37,26 @@ void ZooKeeperRequest::write(WriteBuffer & out) const
     out.next();
 }
 
+void ZooKeeperSyncRequest::writeImpl(WriteBuffer & out) const
+{
+    Coordination::write(path, out);
+}
+
+void ZooKeeperSyncRequest::readImpl(ReadBuffer & in)
+{
+    Coordination::read(path, in);
+}
+
+void ZooKeeperSyncResponse::readImpl(ReadBuffer & in)
+{
+    Coordination::read(path, in);
+}
+
+void ZooKeeperSyncResponse::writeImpl(WriteBuffer & out) const
+{
+    Coordination::write(path, out);
+}
+
 void ZooKeeperWatchResponse::readImpl(ReadBuffer & in)
 {
     Coordination::read(type, in);
@@ -423,6 +443,7 @@ void ZooKeeperMultiResponse::writeImpl(WriteBuffer & out) const
 }
 
 ZooKeeperResponsePtr ZooKeeperHeartbeatRequest::makeResponse() const { return std::make_shared<ZooKeeperHeartbeatResponse>(); }
+ZooKeeperResponsePtr ZooKeeperSyncRequest::makeResponse() const { return std::make_shared<ZooKeeperSyncResponse>(); }
 ZooKeeperResponsePtr ZooKeeperAuthRequest::makeResponse() const { return std::make_shared<ZooKeeperAuthResponse>(); }
 ZooKeeperResponsePtr ZooKeeperCreateRequest::makeResponse() const { return std::make_shared<ZooKeeperCreateResponse>(); }
 ZooKeeperResponsePtr ZooKeeperRemoveRequest::makeResponse() const { return std::make_shared<ZooKeeperRemoveResponse>(); }
@@ -478,6 +499,7 @@ void registerZooKeeperRequest(ZooKeeperRequestFactory & factory)
 ZooKeeperRequestFactory::ZooKeeperRequestFactory()
 {
     registerZooKeeperRequest<OpNum::Heartbeat, ZooKeeperHeartbeatRequest>(*this);
+    registerZooKeeperRequest<OpNum::Sync, ZooKeeperSyncRequest>(*this);
     registerZooKeeperRequest<OpNum::Auth, ZooKeeperAuthRequest>(*this);
     registerZooKeeperRequest<OpNum::Close, ZooKeeperCloseRequest>(*this);
     registerZooKeeperRequest<OpNum::Create, ZooKeeperCreateRequest>(*this);
diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h
index 8bc1cde8cd7..92b1e7c9858 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.h
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.h
@@ -75,6 +75,25 @@ struct ZooKeeperHeartbeatRequest final : ZooKeeperRequest
     bool isReadRequest() const override { return false; }
 };
 
+struct ZooKeeperSyncRequest final : ZooKeeperRequest
+{
+    String path;
+    String getPath() const override { return path; }
+    OpNum getOpNum() const override { return OpNum::Sync; }
+    void writeImpl(WriteBuffer & out) const override;
+    void readImpl(ReadBuffer & in) override;
+    ZooKeeperResponsePtr makeResponse() const override;
+    bool isReadRequest() const override { return false; }
+};
+
+struct ZooKeeperSyncResponse final : ZooKeeperResponse
+{
+    String path;
+    void readImpl(ReadBuffer & in) override;
+    void writeImpl(WriteBuffer & out) const override;
+    OpNum getOpNum() const override { return OpNum::Sync; }
+};
+
 struct ZooKeeperHeartbeatResponse final : ZooKeeperResponse
 {
     void readImpl(ReadBuffer &) override {}
diff --git a/src/Coordination/NuKeeperStorage.cpp b/src/Coordination/NuKeeperStorage.cpp
index a86b7432cbf..631f975cddc 100644
--- a/src/Coordination/NuKeeperStorage.cpp
+++ b/src/Coordination/NuKeeperStorage.cpp
@@ -97,6 +97,17 @@ struct NuKeeperStorageHeartbeatRequest final : public NuKeeperStorageRequest
     }
 };
 
+struct NuKeeperStorageSyncRequest final : public NuKeeperStorageRequest
+{
+    using NuKeeperStorageRequest::NuKeeperStorageRequest;
+    std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & /* container */, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
+    {
+        auto response = zk_request->makeResponse();
+        dynamic_cast<Coordination::ZooKeeperSyncResponse *>(response.get())->path = dynamic_cast<Coordination::ZooKeeperSyncRequest *>(zk_request.get())->path;
+        return {response, {}};
+    }
+};
+
 struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
 {
     using NuKeeperStorageRequest::NuKeeperStorageRequest;
@@ -575,6 +586,7 @@ void registerNuKeeperRequestWrapper(NuKeeperWrapperFactory & factory)
 NuKeeperWrapperFactory::NuKeeperWrapperFactory()
 {
     registerNuKeeperRequestWrapper<Coordination::OpNum::Heartbeat, NuKeeperStorageHeartbeatRequest>(*this);
+    registerNuKeeperRequestWrapper<Coordination::OpNum::Sync, NuKeeperStorageSyncRequest>(*this);
     //registerNuKeeperRequestWrapper<Coordination::OpNum::Auth, NuKeeperStorageAuthRequest>(*this);
     registerNuKeeperRequestWrapper<Coordination::OpNum::Close, NuKeeperStorageCloseRequest>(*this);
     registerNuKeeperRequestWrapper<Coordination::OpNum::Create, NuKeeperStorageCreateRequest>(*this);

From e557d60cdb0ed7bc29180fc2d494f33540f3e797 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 10 Feb 2021 16:27:22 +0300
Subject: [PATCH 0921/1238] fixes for sort order

---
 src/Interpreters/InterpreterSelectQuery.cpp   | 17 +++--
 .../01591_window_functions.reference          | 76 +++++++++++++++++++
 .../0_stateless/01591_window_functions.sql    | 42 ++++++++++
 3 files changed, 128 insertions(+), 7 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index f1a13e7afcb..1da4221cda8 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1836,12 +1836,8 @@ static bool windowDescriptionComparator(const WindowDescription * _left,
 {
     const auto & left = _left->full_sort_description;
     const auto & right = _right->full_sort_description;
-    if (left.size() != right.size())
-    {
-        return left.size() < right.size();
-    }
 
-    for (size_t i = 0; i < left.size(); ++i)
+    for (size_t i = 0; i < std::min(left.size(), right.size()); ++i)
     {
         if (left[i].column_name < right[i].column_name)
         {
@@ -1863,10 +1859,15 @@ static bool windowDescriptionComparator(const WindowDescription * _left,
             return true;
         }
 
-        assert(left[i] == right[i]);
+        if (left[i] != right[i])
+        {
+            return false;
+        }
     }
 
-    return false;
+    // Note that we check the length last, because we want to put together the
+    // sort orders that have common prefix but different length.
+    return left.size() > right.size();
 }
 
 static bool sortIsPrefix(const WindowDescription & _prefix,
@@ -1893,6 +1894,8 @@ static bool sortIsPrefix(const WindowDescription & _prefix,
 
 void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
 {
+    // Try to sort windows in such an order that the window with the longest
+    // sort description goes first, and all window that use its prefixes follow.
     std::vector<const WindowDescription *> windows_sorted;
     for (const auto & [_, w] : query_analyzer->windowDescriptions())
     {
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 9506d3ce75e..dcadcb699ec 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -836,3 +836,79 @@ settings max_block_size = 4;
 8	10	10	1
 9	0	0	0
 10	0	0	0
+-- Check that we put windows in such an order that we can reuse the sort.
+-- First, check that at least the result is correct when we have many windows
+-- with different sort order.
+select
+    number,
+    count(*) over (partition by p order by number),
+    count(*) over (partition by p order by number, o),
+    count(*) over (),
+    count(*) over (order by number),
+    count(*) over (order by o),
+    count(*) over (order by o, number),
+    count(*) over (order by number, o),
+    count(*) over (partition by p order by o, number),
+    count(*) over (partition by p),
+    count(*) over (partition by p order by o),
+    count(*) over (partition by p, o order by number)
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(16)) t
+order by number
+;
+0	1	1	16	1	4	1	1	1	3	1	1
+1	2	2	16	2	7	5	2	2	3	2	1
+2	3	3	16	3	10	8	3	3	3	3	1
+3	1	1	16	4	13	11	4	2	3	2	1
+4	2	2	16	5	16	14	5	3	3	3	1
+5	3	3	16	6	4	2	6	1	3	1	1
+6	1	1	16	7	7	6	7	1	3	1	1
+7	2	2	16	8	10	9	8	2	3	2	1
+8	3	3	16	9	13	12	9	3	3	3	1
+9	1	1	16	10	16	15	10	3	3	3	1
+10	2	2	16	11	4	3	11	1	3	1	1
+11	3	3	16	12	7	7	12	2	3	2	1
+12	1	1	16	13	10	10	13	1	3	1	1
+13	2	2	16	14	13	13	14	2	3	2	1
+14	3	3	16	15	16	16	15	3	3	3	1
+15	1	1	16	16	4	4	16	1	1	1	1
+-- The EXPLAIN for the above query would be difficult to understand, so check some
+-- simple cases instead.
+explain select
+    count(*) over (),
+    count(*) over (partition by p),
+    count(*) over (partition by p order by o)
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(16)) t
+;
+Expression ((Projection + Before ORDER BY))
+  Window (Window step for window \'\')
+    Window (Window step for window \'PARTITION BY p\')
+      Window (Window step for window \'PARTITION BY p ORDER BY o ASC\')
+        MergingSorted (Merge sorted streams for window \'PARTITION BY p ORDER BY o ASC\')
+          MergeSorting (Merge sorted blocks for window \'PARTITION BY p ORDER BY o ASC\')
+            PartialSorting (Sort each block for window \'PARTITION BY p ORDER BY o ASC\')
+              Expression ((Before window functions + (Projection + Before ORDER BY)))
+                SettingQuotaAndLimits (Set limits and quota after reading from storage)
+                  ReadFromStorage (SystemNumbers)
+explain select
+    count(*) over (order by o, number),
+    count(*) over (order by number)
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(16)) t
+;
+Expression ((Projection + Before ORDER BY))
+  Window (Window step for window \'ORDER BY number ASC\')
+    MergingSorted (Merge sorted streams for window \'ORDER BY number ASC\')
+      MergeSorting (Merge sorted blocks for window \'ORDER BY number ASC\')
+        PartialSorting (Sort each block for window \'ORDER BY number ASC\')
+          Window (Window step for window \'ORDER BY o ASC, number ASC\')
+            MergingSorted (Merge sorted streams for window \'ORDER BY o ASC, number ASC\')
+              MergeSorting (Merge sorted blocks for window \'ORDER BY o ASC, number ASC\')
+                PartialSorting (Sort each block for window \'ORDER BY o ASC, number ASC\')
+                  Expression ((Before window functions + (Projection + Before ORDER BY)))
+                    SettingQuotaAndLimits (Set limits and quota after reading from storage)
+                      ReadFromStorage (SystemNumbers)
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 49f84867182..04fd48bde9f 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -266,3 +266,45 @@ select x, min(x) over w, max(x) over w, count(x) over w from (
 window w as (order by x desc range between unbounded preceding and 2 preceding)
 order by x
 settings max_block_size = 4;
+
+
+-- Check that we put windows in such an order that we can reuse the sort.
+-- First, check that at least the result is correct when we have many windows
+-- with different sort order.
+select
+    number,
+    count(*) over (partition by p order by number),
+    count(*) over (partition by p order by number, o),
+    count(*) over (),
+    count(*) over (order by number),
+    count(*) over (order by o),
+    count(*) over (order by o, number),
+    count(*) over (order by number, o),
+    count(*) over (partition by p order by o, number),
+    count(*) over (partition by p),
+    count(*) over (partition by p order by o),
+    count(*) over (partition by p, o order by number)
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(16)) t
+order by number
+;
+
+-- The EXPLAIN for the above query would be difficult to understand, so check some
+-- simple cases instead.
+explain select
+    count(*) over (partition by p),
+    count(*) over (),
+    count(*) over (partition by p order by o)
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(16)) t
+;
+
+explain select
+    count(*) over (order by o, number),
+    count(*) over (order by number)
+from
+    (select number, intDiv(number, 3) p, mod(number, 5) o
+        from numbers(16)) t
+;

From 87e5218c91f9842a9ae5eb83599e6ab9f5dc1af9 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Wed, 10 Feb 2021 16:29:33 +0300
Subject: [PATCH 0922/1238] Update Client.cpp

---
 programs/client/Client.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 3ad42d41aed..3c27908741c 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -1386,7 +1386,7 @@ private:
                     {
                         if (i)
                         {
-                            fmt::print(stderr, ",");
+                            fmt::print(stderr, ", ");
                         }
                         fmt::print(stderr, "{} = '{}'", changes[i].name,
                             toString(changes[i].value));

From 76ccec28858dc817f33e58662ef73221ca0d44f2 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 10 Feb 2021 16:39:59 +0300
Subject: [PATCH 0923/1238] Fix XML

---
 programs/client/clickhouse-client.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml
index c6b7ba37009..66e7afd8f8c 100644
--- a/programs/client/clickhouse-client.xml
+++ b/programs/client/clickhouse-client.xml
@@ -31,7 +31,7 @@
     </prompt_by_server_display_name>
 
     <!-- 
-        Settings adjustable via command-line parameters (see clickhouse-client --help)
+        Settings adjustable via command-line parameters
         can take their defaults from that config file, see examples:
 
     <host>127.0.0.1</host>

From ba0cd9677c8f0421c371867f601b9a835fcee023 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 10 Feb 2021 16:44:00 +0300
Subject: [PATCH 0924/1238] update reference

---
 tests/queries/0_stateless/01591_window_functions.reference | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index dcadcb699ec..4616159fe22 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -876,8 +876,8 @@ order by number
 -- The EXPLAIN for the above query would be difficult to understand, so check some
 -- simple cases instead.
 explain select
-    count(*) over (),
     count(*) over (partition by p),
+    count(*) over (),
     count(*) over (partition by p order by o)
 from
     (select number, intDiv(number, 3) p, mod(number, 5) o

From 6c9f5e4991cc460318ad53a57bd40d68ca0a26fa Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Wed, 10 Feb 2021 17:16:27 +0300
Subject: [PATCH 0925/1238] try

---
 src/Formats/JSONEachRowUtils.cpp                    |  6 ++++++
 src/IO/ReadHelpers.cpp                              |  6 +++---
 .../01654_geometry_functions_benchmark.python       | 13 +++++++++++++
 ...parallel_parsing_infinite_segmentation.reference |  1 +
 .../01701_parallel_parsing_infinite_segmentation.sh |  9 +++++++++
 5 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/01654_geometry_functions_benchmark.python
 create mode 100644 tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.reference
 create mode 100755 tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh

diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp
index 6017f3983c6..980512c72d7 100644
--- a/src/Formats/JSONEachRowUtils.cpp
+++ b/src/Formats/JSONEachRowUtils.cpp
@@ -15,6 +15,12 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
 
     while (loadAtPosition(in, memory, pos) && (balance || memory.size() + static_cast<size_t>(pos - in.position()) < min_chunk_size))
     {
+        const auto current_object_size = memory.size() + static_cast<size_t>(pos - in.position());
+        if (current_object_size > 10 * min_chunk_size)
+            throw ParsingException("Size of JSON object is extremely large. Expected not greater than " +
+            std::to_string(min_chunk_size) + " bytes, but current is " + std::to_string(current_object_size) +
+            " bytes. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually", ErrorCodes::INCORRECT_DATA);
+
         if (quotes)
         {
             pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end());
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 5a159defe06..baa12297718 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1104,9 +1104,9 @@ void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current)
     assert(current >= in.position());
     assert(current <= in.buffer().end());
 
-    const int old_bytes = memory.size();
-    const int additional_bytes = current - in.position();
-    const int new_bytes = old_bytes + additional_bytes;
+    const size_t old_bytes = memory.size();
+    const size_t additional_bytes = current - in.position();
+    const size_t new_bytes = old_bytes + additional_bytes;
     /// There are no new bytes to add to memory.
     /// No need to do extra stuff.
     if (new_bytes == 0)
diff --git a/tests/queries/0_stateless/01654_geometry_functions_benchmark.python b/tests/queries/0_stateless/01654_geometry_functions_benchmark.python
new file mode 100644
index 00000000000..d1fe971af28
--- /dev/null
+++ b/tests/queries/0_stateless/01654_geometry_functions_benchmark.python
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3                                                                                                                                                                                              
+import os
+import sys
+import random
+import pandas as pd
+import numpy as np
+
+CURDIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
+
+from pure_http_client import ClickHouseClient
+
+
diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.reference b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.reference
new file mode 100644
index 00000000000..587579af915
--- /dev/null
+++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.reference
@@ -0,0 +1 @@
+Ok.
diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
new file mode 100755
index 00000000000..2fea04c6abe
--- /dev/null
+++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash                                                                                                                                                                                                                                           
+                                                                                                                                                                                                                                                              
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)                                                                                                                                                                                                          
+# shellcheck source=../shell_config.sh                                                                                                                                                                                                                        
+. "$CURDIR"/../shell_config.sh   
+
+python3 -c "print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 10000000, 'dbms' * 100000000))" > big_json.json
+
+clickhouse-local --input_format_parallel_parsing=1 --max_memory_usage=0 -q "select count() from file('big_json.json', 'JSONEachRow', 'a String, b String')" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||:
\ No newline at end of file

From 2e95dad834627959f1aa245ec52a557e78f1014b Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Wed, 10 Feb 2021 17:20:28 +0300
Subject: [PATCH 0926/1238] better

---
 .../01654_geometry_functions_benchmark.python       | 13 -------------
 1 file changed, 13 deletions(-)
 delete mode 100644 tests/queries/0_stateless/01654_geometry_functions_benchmark.python

diff --git a/tests/queries/0_stateless/01654_geometry_functions_benchmark.python b/tests/queries/0_stateless/01654_geometry_functions_benchmark.python
deleted file mode 100644
index d1fe971af28..00000000000
--- a/tests/queries/0_stateless/01654_geometry_functions_benchmark.python
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3                                                                                                                                                                                              
-import os
-import sys
-import random
-import pandas as pd
-import numpy as np
-
-CURDIR = os.path.dirname(os.path.realpath(__file__))
-sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
-
-from pure_http_client import ClickHouseClient
-
-

From 30d648dc3cfa12aef2ddf01a7424226edfbd91f4 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Wed, 10 Feb 2021 17:22:46 +0300
Subject: [PATCH 0927/1238] better

---
 .../0_stateless/01701_parallel_parsing_infinite_segmentation.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
index 2fea04c6abe..e9033a08632 100755
--- a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
+++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
@@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 python3 -c "print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 10000000, 'dbms' * 100000000))" > big_json.json
 
-clickhouse-local --input_format_parallel_parsing=1 --max_memory_usage=0 -q "select count() from file('big_json.json', 'JSONEachRow', 'a String, b String')" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||:
\ No newline at end of file
+${CLICKHOUSE_LOCAL} --input_format_parallel_parsing=1 --max_memory_usage=0 -q "select count() from file('big_json.json', 'JSONEachRow', 'a String, b String')" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||:
\ No newline at end of file

From 5bd9c8b122d5d37fe1a92bbcc7616bb3d82e44b1 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 10 Feb 2021 17:42:45 +0300
Subject: [PATCH 0928/1238] update reference

---
 .../0_stateless/01591_window_functions.reference | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 4616159fe22..46cbaa4e998 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -901,14 +901,14 @@ from
         from numbers(16)) t
 ;
 Expression ((Projection + Before ORDER BY))
-  Window (Window step for window \'ORDER BY number ASC\')
-    MergingSorted (Merge sorted streams for window \'ORDER BY number ASC\')
-      MergeSorting (Merge sorted blocks for window \'ORDER BY number ASC\')
-        PartialSorting (Sort each block for window \'ORDER BY number ASC\')
-          Window (Window step for window \'ORDER BY o ASC, number ASC\')
-            MergingSorted (Merge sorted streams for window \'ORDER BY o ASC, number ASC\')
-              MergeSorting (Merge sorted blocks for window \'ORDER BY o ASC, number ASC\')
-                PartialSorting (Sort each block for window \'ORDER BY o ASC, number ASC\')
+  Window (Window step for window \'ORDER BY o ASC, number ASC\')
+    MergingSorted (Merge sorted streams for window \'ORDER BY o ASC, number ASC\')
+      MergeSorting (Merge sorted blocks for window \'ORDER BY o ASC, number ASC\')
+        PartialSorting (Sort each block for window \'ORDER BY o ASC, number ASC\')
+          Window (Window step for window \'ORDER BY number ASC\')
+            MergingSorted (Merge sorted streams for window \'ORDER BY number ASC\')
+              MergeSorting (Merge sorted blocks for window \'ORDER BY number ASC\')
+                PartialSorting (Sort each block for window \'ORDER BY number ASC\')
                   Expression ((Before window functions + (Projection + Before ORDER BY)))
                     SettingQuotaAndLimits (Set limits and quota after reading from storage)
                       ReadFromStorage (SystemNumbers)

From bc58f4827fbd2522dac306296e9dfb23fbd4fc5c Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Wed, 10 Feb 2021 17:45:45 +0300
Subject: [PATCH 0929/1238] Increase timeout in tests

---
 tests/integration/test_send_crash_reports/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_send_crash_reports/test.py b/tests/integration/test_send_crash_reports/test.py
index e22cc9681a6..65d49637b13 100644
--- a/tests/integration/test_send_crash_reports/test.py
+++ b/tests/integration/test_send_crash_reports/test.py
@@ -29,12 +29,12 @@ def test_send_segfault(started_node, ):
 
     started_node.copy_file_to_container(os.path.join(SCRIPT_DIR, "fake_sentry_server.py"), "/fake_sentry_server.py")
     started_node.exec_in_container(["bash", "-c", "python3 /fake_sentry_server.py > /fake_sentry_server.log 2>&1"], detach=True, user="root")
-    time.sleep(0.5)
+    time.sleep(1)
     started_node.exec_in_container(["bash", "-c", "pkill -11 clickhouse"], user="root")
 
     result = None
     for attempt in range(1, 6):
-        time.sleep(0.25 * attempt)
+        time.sleep(attempt)
         result = started_node.exec_in_container(['cat', fake_sentry_server.RESULT_PATH], user='root')
         if result == 'OK':
             break

From 47f62e899b46a1e207a43f0a40f8f834ae113ea9 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Wed, 10 Feb 2021 17:52:28 +0300
Subject: [PATCH 0930/1238] style

---
 src/Formats/JSONEachRowUtils.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp
index 980512c72d7..407e3f37c5c 100644
--- a/src/Formats/JSONEachRowUtils.cpp
+++ b/src/Formats/JSONEachRowUtils.cpp
@@ -3,6 +3,10 @@
 
 namespace DB
 {
+namespace ErrorCodes
+{
+    extern const int INCORRECT_DATA;
+}
 
 std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
 {

From fe00f7290859c3a43ee56e064c9b4d2be0270d9e Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Wed, 10 Feb 2021 18:54:47 +0300
Subject: [PATCH 0931/1238] Update settings.md

---
 docs/en/operations/settings/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index e5316834381..c7ee48c11bf 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2594,7 +2594,7 @@ Default value: `16`.
 
 ## optimize_on_insert {#optimize-on-insert}
 
-Enables or disables data transformation before the insertion, as if merge was done on this block (e.g. replacing, collapsing, aggregating...).
+Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine).
 
 Possible values:
 

From 023deaaec6defcc5f839a851ed96054084e16200 Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Wed, 10 Feb 2021 18:55:48 +0300
Subject: [PATCH 0932/1238] Update settings.md

---
 docs/ru/operations/settings/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 1d175c5fe79..1352fe850df 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -2475,7 +2475,7 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
 
 ## optimize_on_insert {#optimize-on-insert}
 
-Включает или выключает преобразование данных перед добавлением в таблицу, как будто над добавляемым блоком предварительно было произведено слияние (например, выполняются замена, сворачивание, агрегирование данных).
+Включает или выключает преобразование данных перед добавлением в таблицу, как будто над добавляемым блоком предварительно было произведено слияние (в соответствии с движком таблицы).
 
 Возможные значения:
 

From 86ff45c50e4245f8f9af46be36b071532d1e1118 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Wed, 10 Feb 2021 19:19:48 +0300
Subject: [PATCH 0933/1238] Aggregate function deltaSum use restrict keyword

---
 src/AggregateFunctions/AggregateFunctionDeltaSum.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
index 11824c9d51f..d5760de84ae 100644
--- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h
+++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h
@@ -43,7 +43,7 @@ public:
 
     DataTypePtr getReturnType() const override { return std::make_shared<DataTypeNumber<T>>(); }
 
-    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
+    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
         auto value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
 
@@ -62,7 +62,7 @@ public:
         }
     }
 
-    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
+    void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
     {
         auto place_data = &this->data(place);
         auto rhs_data = &this->data(rhs);
@@ -102,7 +102,7 @@ public:
         // Otherwise lhs either has data or is uninitialized, so we don't need to modify its values.
     }
 
-    void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
     {
         writeIntBinary(this->data(place).sum, buf);
         writeIntBinary(this->data(place).first, buf);
@@ -111,7 +111,7 @@ public:
         writePODBinary<bool>(this->data(place).seen_last, buf);
     }
 
-    void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
     {
         readIntBinary(this->data(place).sum, buf);
         readIntBinary(this->data(place).first, buf);
@@ -120,7 +120,7 @@ public:
         readPODBinary<bool>(this->data(place).seen_last, buf);
     }
 
-    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).sum);
     }

From 30b2554fa38ca27bbe3d4388fac8b5ac317967c8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 10 Feb 2021 20:48:39 +0300
Subject: [PATCH 0934/1238] Fix error

---
 src/Server/HTTPHandler.cpp     | 1 -
 src/Server/TCPHandler.cpp      | 3 ++-
 src/Storages/StorageMemory.cpp | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index 5e0d1f0ac66..eb4d6119c6f 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -800,7 +800,6 @@ bool DynamicQueryHandler::customizeQueryParam(Context & context, const std::stri
 
 std::string DynamicQueryHandler::getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context)
 {
-
     if (likely(!startsWith(request.getContentType(), "multipart/form-data")))
     {
         /// Part of the query can be passed in the 'query' parameter and the rest in the request body
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index fb42df19746..49f6eda3bab 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1188,7 +1188,7 @@ bool TCPHandler::receiveData(bool scalar)
         {
             /// If there is an insert request, then the data should be written directly to `state.io.out`.
             /// Otherwise, we write the blocks in the temporary `external_table_name` table.
-            if (!state.need_receive_data_for_insert && !state.need_receive_data_for_input)
+            if (!state.need_receive_data_for_insert && !state.need_receive_data_for_input && !state.io.out)
             {
                 auto resolved = query_context->tryResolveStorageID(temporary_id, Context::ResolveExternal);
                 StoragePtr storage;
@@ -1206,6 +1206,7 @@ bool TCPHandler::receiveData(bool scalar)
                 /// The data will be written directly to the table.
                 state.io.out = storage->write(ASTPtr(), metadata_snapshot, *query_context);
             }
+
             if (state.need_receive_data_for_input)
                 state.block_for_input = block;
             else
diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp
index 9d0c67ac34c..4530d93c274 100644
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@@ -104,12 +104,13 @@ private:
 class MemoryBlockOutputStream : public IBlockOutputStream
 {
 public:
-    explicit MemoryBlockOutputStream(
+    MemoryBlockOutputStream(
         StorageMemory & storage_,
         const StorageMetadataPtr & metadata_snapshot_)
         : storage(storage_)
         , metadata_snapshot(metadata_snapshot_)
-    {}
+    {
+    }
 
     Block getHeader() const override { return metadata_snapshot->getSampleBlock(); }
 

From be831d09f77a63bb559b8ecbb67495ef4d4135b9 Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Wed, 10 Feb 2021 21:09:13 +0300
Subject: [PATCH 0935/1238] Update Pytest check (#18972)

* [WIP]

* Update skip-list

* Update ci_config.json

* Do not sync inserts for test

* Fix more tests

* Fix another test

* Enable one more test

* More fixed tests

* More test fixes

* Do not absolutize server path for now

* More test fixes

* Unset CLICKHOUSE_LOG_COMMENT where necessary

* Remove debugging set -e

* Fix more tests

* Fix test reference

* Fix style check
---
 base/daemon/BaseDaemon.cpp                    |   4 +-
 docker/test/stateless_pytest/Dockerfile       |  10 +-
 .../0_stateless/00302_http_compression.sh     |   5 +
 .../0_stateless/00474_readonly_settings.sh    |   2 +-
 .../0_stateless/00506_union_distributed.sql   |   3 +-
 ...7_low_cardinaliry_distributed_group_by.sql |   2 +
 ...800_low_cardinality_distributed_insert.sql |   2 +
 ...nsert_into_distributed_different_types.sql |   3 +-
 .../01023_materialized_view_query_context.sql |   3 +
 ...alized_view_with_join_over_distributed.sql |   2 +
 ...099_parallel_distributed_insert_select.sql |   2 +
 .../01187_set_profile_as_setting.sh           |   2 +-
 .../01235_live_view_over_distributed.sql      |   3 +-
 ...buted_with_subquery_select_table_alias.sql |   1 +
 .../01274_alter_rename_column_distributed.sql |   2 +
 .../01455_opentelemetry_distributed.sh        |   2 +-
 .../0_stateless/01475_read_subcolumns.sql     |   5 +
 .../0_stateless/01475_read_subcolumns_2.sql   |   6 +-
 .../0_stateless/01475_read_subcolumns_3.sql   |   2 +
 .../01475_read_subcolumns_storages.sh         |   2 +
 .../0_stateless/01526_initial_query_id.sh     |   2 +-
 .../0_stateless/01533_multiple_nested.sql     |   6 +-
 .../01548_parallel_parsing_max_memory.sh      |   4 +-
 .../01564_test_hint_woes.reference            |   2 +
 .../0_stateless/01564_test_hint_woes.sql      |   3 +
 ...01586_storage_join_low_cardinality_key.sql |   2 +
 tests/queries/0_stateless/01606_git_import.sh |   2 +
 .../01621_clickhouse_compressor.sh            |  21 +--
 .../01622_defaults_for_url_engine.sh          |   6 +-
 .../01652_ignore_and_low_cardinality.sql      |   2 +
 .../01656_test_query_log_factories_info.sql   |   2 +
 ...665_merge_tree_min_for_concurrent_read.sql |   2 +
 tests/queries/conftest.py                     |   3 +
 tests/queries/query_test.py                   |  58 ++++---
 tests/queries/server.py                       | 144 +++++++++++++++++-
 tests/queries/shell_config.sh                 |   7 +-
 36 files changed, 259 insertions(+), 70 deletions(-)

diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp
index c0026cbe64d..18449dad855 100644
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@@ -986,7 +986,7 @@ void BaseDaemon::setupWatchdog()
         if (errno == ECHILD)
         {
             logger().information("Child process no longer exists.");
-            _exit(status);
+            _exit(WEXITSTATUS(status));
         }
 
         if (WIFEXITED(status))
@@ -1020,7 +1020,7 @@ void BaseDaemon::setupWatchdog()
 
         /// Automatic restart is not enabled but you can play with it.
 #if 1
-        _exit(status);
+        _exit(WEXITSTATUS(status));
 #else
         logger().information("Will restart.");
         if (argv0)
diff --git a/docker/test/stateless_pytest/Dockerfile b/docker/test/stateless_pytest/Dockerfile
index 4d0274143d6..58846f90fa7 100644
--- a/docker/test/stateless_pytest/Dockerfile
+++ b/docker/test/stateless_pytest/Dockerfile
@@ -5,7 +5,10 @@ RUN apt-get update -y && \
     apt-get install -y --no-install-recommends \
         python3-pip \
         python3-setuptools \
-        python3-wheel
+        python3-wheel \
+        brotli \
+        netcat-openbsd \
+        zstd
 
 RUN python3 -m pip install \
     wheel \
@@ -15,7 +18,10 @@ RUN python3 -m pip install \
     pytest-randomly \
     pytest-rerunfailures \
     pytest-timeout \
-    pytest-xdist
+    pytest-xdist \
+    pandas \
+    numpy \
+    scipy
 
 CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
     dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \
diff --git a/tests/queries/0_stateless/00302_http_compression.sh b/tests/queries/0_stateless/00302_http_compression.sh
index 829475e8602..cfa9a930f09 100755
--- a/tests/queries/0_stateless/00302_http_compression.sh
+++ b/tests/queries/0_stateless/00302_http_compression.sh
@@ -4,6 +4,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
+if ! command -v gzip &> /dev/null; then echo "gzip not found" 1>&2; exit 1; fi
+if ! command -v brotli &> /dev/null; then echo "brotli not found" 1>&2; exit 1; fi
+if ! command -v xz &> /dev/null; then echo "xz not found" 1>&2; exit 1; fi
+if ! command -v zstd &> /dev/null; then echo "zstd not found" 1>&2; exit 1; fi
+
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1"                                     -d 'SELECT number FROM system.numbers LIMIT 10';
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=0" -H 'Accept-Encoding: gzip'          -d 'SELECT number FROM system.numbers LIMIT 10';
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: gzip'          -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d;
diff --git a/tests/queries/0_stateless/00474_readonly_settings.sh b/tests/queries/0_stateless/00474_readonly_settings.sh
index 013d7f23756..0887ecfa14e 100755
--- a/tests/queries/0_stateless/00474_readonly_settings.sh
+++ b/tests/queries/0_stateless/00474_readonly_settings.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-CLICKHOUSE_LOG_COMMENT=''
+unset CLICKHOUSE_LOG_COMMENT
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/0_stateless/00506_union_distributed.sql b/tests/queries/0_stateless/00506_union_distributed.sql
index 0bd4dd43ac9..3f631b8da56 100644
--- a/tests/queries/0_stateless/00506_union_distributed.sql
+++ b/tests/queries/0_stateless/00506_union_distributed.sql
@@ -1,6 +1,7 @@
-
 -- https://github.com/ClickHouse/ClickHouse/issues/1059
 
+SET insert_distributed_sync = 1;
+
 DROP TABLE IF EXISTS union1;
 DROP TABLE IF EXISTS union2;
 DROP TABLE IF EXISTS union3;
diff --git a/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql b/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql
index b23d8a566c8..d4d260ee92e 100644
--- a/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql
+++ b/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql
@@ -1,4 +1,6 @@
+set insert_distributed_sync = 1;
 set allow_suspicious_low_cardinality_types = 1;
+
 DROP TABLE IF EXISTS test_low_null_float;
 DROP TABLE IF EXISTS dist_00717;
 
diff --git a/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql b/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql
index 15573d859bb..196dfd84c7f 100644
--- a/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql
+++ b/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql
@@ -1,3 +1,5 @@
+SET insert_distributed_sync = 1;
+
 DROP TABLE IF EXISTS low_cardinality;
 DROP TABLE IF EXISTS low_cardinality_all;
 
diff --git a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql
index 6324c6a6c10..3b562801f92 100644
--- a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql
+++ b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql
@@ -1,3 +1,5 @@
+set insert_distributed_sync=1;
+
 DROP TABLE IF EXISTS dist_00967;
 DROP TABLE IF EXISTS underlying_00967;
 
@@ -5,7 +7,6 @@ DROP TABLE IF EXISTS underlying_00967;
 SET send_logs_level='error';
 
 CREATE TABLE dist_00967 (key UInt64) Engine=Distributed('test_shard_localhost', currentDatabase(), underlying_00967);
--- fails for TinyLog()/MergeTree()/... but not for Memory()
 CREATE TABLE underlying_00967 (key Nullable(UInt64)) Engine=TinyLog();
 INSERT INTO dist_00967 SELECT toUInt64(number) FROM system.numbers LIMIT 1;
 
diff --git a/tests/queries/0_stateless/01023_materialized_view_query_context.sql b/tests/queries/0_stateless/01023_materialized_view_query_context.sql
index 7ec8d8fd506..351379d8b14 100644
--- a/tests/queries/0_stateless/01023_materialized_view_query_context.sql
+++ b/tests/queries/0_stateless/01023_materialized_view_query_context.sql
@@ -1,5 +1,8 @@
 -- Create dictionary, since dictGet*() uses DB::Context in executeImpl()
 -- (To cover scope of the Context in DB::PushingToViewsBlockOutputStream::process)
+
+set insert_distributed_sync=1;
+
 DROP TABLE IF EXISTS mv;
 DROP DATABASE IF EXISTS dict_in_01023;
 CREATE DATABASE dict_in_01023;
diff --git a/tests/queries/0_stateless/01046_materialized_view_with_join_over_distributed.sql b/tests/queries/0_stateless/01046_materialized_view_with_join_over_distributed.sql
index 7aac720865d..318f48dc833 100644
--- a/tests/queries/0_stateless/01046_materialized_view_with_join_over_distributed.sql
+++ b/tests/queries/0_stateless/01046_materialized_view_with_join_over_distributed.sql
@@ -1,5 +1,7 @@
 -- from https://github.com/ClickHouse/ClickHouse/issues/5142
 
+set insert_distributed_sync = 1;
+
 DROP TABLE IF EXISTS t;
 DROP TABLE IF EXISTS t_d;
 DROP TABLE IF EXISTS t_v;
diff --git a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql
index 4ae655b1ec9..222c05ae827 100644
--- a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql
+++ b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql
@@ -1,3 +1,5 @@
+-- set insert_distributed_sync = 1;  -- see https://github.com/ClickHouse/ClickHouse/issues/18971
+
 DROP TABLE IF EXISTS local_01099_a;
 DROP TABLE IF EXISTS local_01099_b;
 DROP TABLE IF EXISTS distributed_01099_a;
diff --git a/tests/queries/0_stateless/01187_set_profile_as_setting.sh b/tests/queries/0_stateless/01187_set_profile_as_setting.sh
index e7dfb83fe76..ec07f4d3687 100755
--- a/tests/queries/0_stateless/01187_set_profile_as_setting.sh
+++ b/tests/queries/0_stateless/01187_set_profile_as_setting.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-CLICKHOUSE_LOG_COMMENT=''
+unset CLICKHOUSE_LOG_COMMENT
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/0_stateless/01235_live_view_over_distributed.sql b/tests/queries/0_stateless/01235_live_view_over_distributed.sql
index dd9ff80f30e..abc628475db 100644
--- a/tests/queries/0_stateless/01235_live_view_over_distributed.sql
+++ b/tests/queries/0_stateless/01235_live_view_over_distributed.sql
@@ -1,3 +1,4 @@
+set insert_distributed_sync = 1;
 SET allow_experimental_live_view = 1;
 
 DROP TABLE IF EXISTS lv;
@@ -7,7 +8,7 @@ DROP TABLE IF EXISTS visits_layer;
 CREATE TABLE visits(StartDate Date) ENGINE MergeTree ORDER BY(StartDate);
 CREATE TABLE visits_layer(StartDate Date) ENGINE Distributed(test_cluster_two_shards_localhost,  currentDatabase(), 'visits', rand());
 
-CREATE LIVE VIEW lv AS SELECT * FROM visits_layer ORDER BY StartDate; 
+CREATE LIVE VIEW lv AS SELECT * FROM visits_layer ORDER BY StartDate;
 
 INSERT INTO visits_layer (StartDate) VALUES ('2020-01-01');
 INSERT INTO visits_layer (StartDate) VALUES ('2020-01-02');
diff --git a/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql b/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql
index a572074de3c..de35b0c6c9d 100644
--- a/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql
+++ b/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql
@@ -1,4 +1,5 @@
 SET allow_experimental_live_view = 1;
+SET insert_distributed_sync = 1;
 
 DROP TABLE IF EXISTS lv;
 DROP TABLE IF EXISTS visits;
diff --git a/tests/queries/0_stateless/01274_alter_rename_column_distributed.sql b/tests/queries/0_stateless/01274_alter_rename_column_distributed.sql
index a35dc7cca56..8799680125f 100644
--- a/tests/queries/0_stateless/01274_alter_rename_column_distributed.sql
+++ b/tests/queries/0_stateless/01274_alter_rename_column_distributed.sql
@@ -1,3 +1,5 @@
+set insert_distributed_sync = 1;
+
 DROP TABLE IF EXISTS visits;
 DROP TABLE IF EXISTS visits_dist;
 
diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
index 403a4b1b6b3..bf1d5b31682 100755
--- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
+++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 set -ue
 
-CLICKHOUSE_LOG_COMMENT=''
+unset CLICKHOUSE_LOG_COMMENT
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/0_stateless/01475_read_subcolumns.sql b/tests/queries/0_stateless/01475_read_subcolumns.sql
index ce85dd72abf..16832c4fc59 100644
--- a/tests/queries/0_stateless/01475_read_subcolumns.sql
+++ b/tests/queries/0_stateless/01475_read_subcolumns.sql
@@ -61,3 +61,8 @@ SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'FileOpen')]
 FROM system.query_log
 WHERE (type = 'QueryFinish') AND (lower(query) LIKE lower('SELECT m.% FROM %t_map%'))
     AND event_time > now() - INTERVAL 10 SECOND AND current_database = currentDatabase();
+
+DROP TABLE t_arr;
+DROP TABLE t_nul;
+DROP TABLE t_tup;
+DROP TABLE t_map;
diff --git a/tests/queries/0_stateless/01475_read_subcolumns_2.sql b/tests/queries/0_stateless/01475_read_subcolumns_2.sql
index b8959cf27f7..e827d6c360a 100644
--- a/tests/queries/0_stateless/01475_read_subcolumns_2.sql
+++ b/tests/queries/0_stateless/01475_read_subcolumns_2.sql
@@ -12,7 +12,7 @@ CREATE TABLE subcolumns
     arr2 Array(Array(Nullable(String))),
     lc LowCardinality(String),
     nested Nested(col1 String, col2 Nullable(UInt32))
-) 
+)
 ENGINE = MergeTree order by tuple() SETTINGS min_bytes_for_wide_part = '10M';
 
 INSERT INTO subcolumns VALUES (([1, NULL], 2, 'a'), ['foo', NULL, 'bar'], [['123'], ['456', '789']], 'qqqq', ['zzz', 'xxx'], [42, 43]);
@@ -37,7 +37,7 @@ CREATE TABLE subcolumns
     arr2 Array(Array(Nullable(String))),
     lc LowCardinality(String),
     nested Nested(col1 String, col2 Nullable(UInt32))
-) 
+)
 ENGINE = MergeTree order by tuple() SETTINGS min_bytes_for_wide_part = 0;
 
 INSERT INTO subcolumns VALUES (([1, NULL], 2, 'a'), ['foo', NULL, 'bar'], [['123'], ['456', '789']], 'qqqq', ['zzz', 'xxx'], [42, 43]);
@@ -47,3 +47,5 @@ SELECT t.a.size0, t.a.null, t.u, t.s, t.s.null FROM subcolumns;
 SELECT sumArray(arr.null), sum(arr.size0) FROM subcolumns;
 SELECT arr2, arr2.size0, arr2.size1, arr2.null FROM subcolumns;
 -- SELECT nested.col1, nested.col2, nested.size0, nested.size0, nested.col2.null FROM subcolumns;
+
+DROP TABLE subcolumns;
diff --git a/tests/queries/0_stateless/01475_read_subcolumns_3.sql b/tests/queries/0_stateless/01475_read_subcolumns_3.sql
index 66bcd7dbc91..54598f19bdc 100644
--- a/tests/queries/0_stateless/01475_read_subcolumns_3.sql
+++ b/tests/queries/0_stateless/01475_read_subcolumns_3.sql
@@ -37,3 +37,5 @@ SELECT count() FROM map_subcolumns PREWHERE has(m.keys, 'b');
 
 SELECT id, m.size0 FROM map_subcolumns;
 SELECT count() FROM map_subcolumns WHERE m.size0 > 2;
+
+DROP TABLE map_subcolumns;
diff --git a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh
index 684d65ceb25..be22b1b4185 100755
--- a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh
+++ b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh
@@ -23,3 +23,5 @@ for engine in "${ENGINES[@]}"; do
     $CLICKHOUSE_CLIENT --query "SELECT * FROM subcolumns"
     $CLICKHOUSE_CLIENT --query "SELECT n, n.null, a1, a1.size0, a2, a2.size0, a2.size1, a2.size2, a3, a3.size0, a3.null, t, t.s, t.v, m, m.keys, m.values FROM subcolumns"
 done
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE subcolumns"
diff --git a/tests/queries/0_stateless/01526_initial_query_id.sh b/tests/queries/0_stateless/01526_initial_query_id.sh
index beb1f14bda4..f9d739b57cd 100755
--- a/tests/queries/0_stateless/01526_initial_query_id.sh
+++ b/tests/queries/0_stateless/01526_initial_query_id.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 set -ue
 
-CLICKHOUSE_LOG_COMMENT=''
+unset CLICKHOUSE_LOG_COMMENT
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql
index 6374d6fca21..38c80617334 100644
--- a/tests/queries/0_stateless/01533_multiple_nested.sql
+++ b/tests/queries/0_stateless/01533_multiple_nested.sql
@@ -8,7 +8,7 @@ CREATE TABLE nested
     col2 Nested(a UInt32, n Nested(s String, b UInt32)),
     col3 Nested(n1 Nested(a UInt32, b UInt32), n2 Nested(s String, t String))
 )
-ENGINE = MergeTree 
+ENGINE = MergeTree
 ORDER BY tuple()
 SETTINGS min_bytes_for_wide_part = 0;
 
@@ -55,7 +55,7 @@ CREATE TABLE nested
     id UInt32,
     col1 Nested(a UInt32, n Nested(s String, b UInt32))
 )
-ENGINE = MergeTree 
+ENGINE = MergeTree
 ORDER BY id
 SETTINGS min_bytes_for_wide_part = 0;
 
@@ -64,3 +64,5 @@ SELECT id % 10, sum(length(col1)), sumArray(arrayMap(x -> length(x), col1.n.b))
 
 SELECT arraySum(col1.a), arrayMap(x -> x * x * 2, col1.a) FROM nested ORDER BY id LIMIT 5;
 SELECT untuple(arrayJoin(arrayJoin(col1.n))) FROM nested ORDER BY id LIMIT 10 OFFSET 10;
+
+DROP TABLE nested;
diff --git a/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh
index d7ee2840763..8c4900043d0 100755
--- a/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh
+++ b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh
@@ -4,6 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-yes http://foobarfoobarfoobarfoobarfoobarfoobarfoobar.com | head -c1G > 1g.csv
+yes http://foobarfoobarfoobarfoobarfoobarfoobarfoobar.com | head -c1G > ${CLICKHOUSE_TMP}/1g.csv
 
-$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=100Mi -q "select count() from file('1g.csv', 'TSV', 'URL String')"
\ No newline at end of file
+$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=100Mi -q "select count() from file('${CLICKHOUSE_TMP}/1g.csv', 'TSV', 'URL String')"
diff --git a/tests/queries/0_stateless/01564_test_hint_woes.reference b/tests/queries/0_stateless/01564_test_hint_woes.reference
index 892ca733d7c..9ce4572eab4 100644
--- a/tests/queries/0_stateless/01564_test_hint_woes.reference
+++ b/tests/queries/0_stateless/01564_test_hint_woes.reference
@@ -29,3 +29,5 @@ INSERT INTO t0(c0, c1) VALUES ("1",1) ; -- { clientError 47 }
 INSERT INTO t0(c0, c1) VALUES ('1', 1) ;
 -- the return code must be zero after the final query has failed with expected error
 insert into values_01564 values (11); -- { serverError 469 }
+drop table t0;
+drop table values_01564;
diff --git a/tests/queries/0_stateless/01564_test_hint_woes.sql b/tests/queries/0_stateless/01564_test_hint_woes.sql
index ec2c319e8d1..fee85130b03 100644
--- a/tests/queries/0_stateless/01564_test_hint_woes.sql
+++ b/tests/queries/0_stateless/01564_test_hint_woes.sql
@@ -49,3 +49,6 @@ INSERT INTO t0(c0, c1) VALUES ('1', 1) ;
 
 -- the return code must be zero after the final query has failed with expected error
 insert into values_01564 values (11); -- { serverError 469 }
+
+drop table t0;
+drop table values_01564;
diff --git a/tests/queries/0_stateless/01586_storage_join_low_cardinality_key.sql b/tests/queries/0_stateless/01586_storage_join_low_cardinality_key.sql
index 4b613b6d7ce..28507e25fd4 100644
--- a/tests/queries/0_stateless/01586_storage_join_low_cardinality_key.sql
+++ b/tests/queries/0_stateless/01586_storage_join_low_cardinality_key.sql
@@ -9,3 +9,5 @@ INSERT INTO low_card VALUES ( '1' );
 SELECT * FROM low_card;
 SELECT * FROM low_card WHERE lc = '1';
 SELECT CAST(lc AS String) FROM low_card;
+
+DROP TABLE low_card;
diff --git a/tests/queries/0_stateless/01606_git_import.sh b/tests/queries/0_stateless/01606_git_import.sh
index 16a0b92abe7..6d425c9bceb 100755
--- a/tests/queries/0_stateless/01606_git_import.sh
+++ b/tests/queries/0_stateless/01606_git_import.sh
@@ -6,6 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 # Clone some not too large repository and create a database from it.
 
+cd $CLICKHOUSE_TMP || exit
+
 # Protection for network errors
 for _ in {1..10}; do
     rm -rf ./clickhouse-odbc
diff --git a/tests/queries/0_stateless/01621_clickhouse_compressor.sh b/tests/queries/0_stateless/01621_clickhouse_compressor.sh
index 5292bcef52a..3157cb0e887 100755
--- a/tests/queries/0_stateless/01621_clickhouse_compressor.sh
+++ b/tests/queries/0_stateless/01621_clickhouse_compressor.sh
@@ -7,27 +7,18 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 set -e
 
-TEMP_DIR="$(mktemp -d /tmp/clickhouse.test..XXXXXX)"
-cd "${TEMP_DIR:?}"
-
-function cleanup()
-{
-    rm -fr "${TEMP_DIR:?}"
-}
-trap cleanup EXIT
-
 # This is random garbage, so compression ratio will be very low.
-tr -cd 'a-z0-9' < /dev/urandom | head -c1M > input
+tr -cd 'a-z0-9' < /dev/urandom | head -c1M > ${CLICKHOUSE_TMP}/input
 
 # stdin/stdout streams
-$CLICKHOUSE_COMPRESSOR < input > output
-diff -q <($CLICKHOUSE_COMPRESSOR --decompress < output) input
+$CLICKHOUSE_COMPRESSOR < ${CLICKHOUSE_TMP}/input > ${CLICKHOUSE_TMP}/output
+diff -q <($CLICKHOUSE_COMPRESSOR --decompress < ${CLICKHOUSE_TMP}/output) ${CLICKHOUSE_TMP}/input
 
 # positional arguments, and that fact that input/output will be overwritten
-$CLICKHOUSE_COMPRESSOR input output
-diff -q <($CLICKHOUSE_COMPRESSOR --decompress output) input
+$CLICKHOUSE_COMPRESSOR ${CLICKHOUSE_TMP}/input ${CLICKHOUSE_TMP}/output
+diff -q <($CLICKHOUSE_COMPRESSOR --decompress ${CLICKHOUSE_TMP}/output) ${CLICKHOUSE_TMP}/input
 
 # --offset-in-decompressed-block
-diff -q <($CLICKHOUSE_COMPRESSOR --decompress --offset-in-decompressed-block 10 output) <(tail -c+$((10+1)) input)
+diff -q <($CLICKHOUSE_COMPRESSOR --decompress --offset-in-decompressed-block 10 ${CLICKHOUSE_TMP}/output) <(tail -c+$((10+1)) ${CLICKHOUSE_TMP}/input)
 
 # TODO: --offset-in-compressed-file using some .bin file (via clickhouse-local + check-marks)
diff --git a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh
index e7deace8b46..7afdbbc6b66 100755
--- a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh
+++ b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh
@@ -7,12 +7,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 PORT="$(($RANDOM%63000+2001))"
 
-TEMP_FILE="$CURDIR/01622_defaults_for_url_engine.tmp"
+TEMP_FILE="${CLICKHOUSE_TMP}/01622_defaults_for_url_engine.tmp"
 
 function thread1
 {
-    while true; do 
-        echo -e "HTTP/1.1 200 OK\n\n{\"a\": 1}" | nc -l -p $1 -q 1; 
+    while true; do
+        echo -e "HTTP/1.1 200 OK\n\n{\"a\": 1}" | nc -l -p $1 -q 1;
     done
 }
 
diff --git a/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql b/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql
index b3d3ad81834..d664ec606b5 100644
--- a/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql
+++ b/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql
@@ -4,3 +4,5 @@ SELECT ignore(10, ignore(*), ignore(ignore(-2, 1025, *)), NULL, *), * FROM lc_nu
 
 
 SELECT ignore(toLowCardinality(1), toLowCardinality(2), 3);
+
+DROP TABLE lc_null_int8_defnull;
diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
index aa9bdd42a71..9f374def8b5 100644
--- a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
+++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
@@ -56,3 +56,5 @@ WHERE current_database = currentDatabase() AND type == 'QueryFinish' AND (query
 ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames;
 SELECT '';
 
+DROP TABLE test_query_log_factories_info1.memory_table;
+DROP DATABASE test_query_log_factories_info1;
diff --git a/tests/queries/0_stateless/01665_merge_tree_min_for_concurrent_read.sql b/tests/queries/0_stateless/01665_merge_tree_min_for_concurrent_read.sql
index ca324acdce3..e233f1e6169 100644
--- a/tests/queries/0_stateless/01665_merge_tree_min_for_concurrent_read.sql
+++ b/tests/queries/0_stateless/01665_merge_tree_min_for_concurrent_read.sql
@@ -4,3 +4,5 @@ INSERT INTO data_01655 VALUES (1);
 SELECT * FROM data_01655 SETTINGS merge_tree_min_rows_for_concurrent_read=0, merge_tree_min_bytes_for_concurrent_read=0;
 -- UINT64_MAX
 SELECT * FROM data_01655 SETTINGS merge_tree_min_rows_for_concurrent_read=18446744073709551615, merge_tree_min_bytes_for_concurrent_read=18446744073709551615;
+
+DROP TABLE data_01655;
diff --git a/tests/queries/conftest.py b/tests/queries/conftest.py
index 2f19ae7c479..40a9a6b3a2e 100644
--- a/tests/queries/conftest.py
+++ b/tests/queries/conftest.py
@@ -25,6 +25,9 @@ def bin_prefix(cmdopts):
     prefix = 'clickhouse'
     if cmdopts['builddir'] is not None:
         prefix = os.path.join(cmdopts['builddir'], 'programs', prefix)
+    # FIXME: does this hangs the server start for some reason?
+    # if not os.path.isabs(prefix):
+    #     prefix = os.path.abspath(prefix)
     return prefix
 
 
diff --git a/tests/queries/query_test.py b/tests/queries/query_test.py
index c4e7e613175..3dea639187e 100644
--- a/tests/queries/query_test.py
+++ b/tests/queries/query_test.py
@@ -14,13 +14,10 @@ SKIP_LIST = [
     "00987_distributed_stack_overflow",
 
     # just fail
-    "00302_http_compression",
-    "00463_long_sessions_in_http_interface",
     "00505_secure",
     "00505_shard_secure",
-    "00506_union_distributed",  # flaky
     "00646_url_engine",
-    "00821_distributed_storage_with_join_on.sql",  # flaky
+    "00725_memory_tracking",  # BROKEN
     "00834_cancel_http_readonly_queries_on_client_close",
     "00933_test_fix_extra_seek_on_compressed_cache",
     "00965_logs_level_bugfix",
@@ -30,10 +27,6 @@ SKIP_LIST = [
     "01014_lazy_database_concurrent_recreate_reattach_and_show_tables",
     "01018_Distributed__shard_num",
     "01018_ip_dictionary",
-    "01023_materialized_view_query_context",  # flaky
-    "01035_lc_empty_part_bug",  # flaky
-    "01037_polygon_dicts_simple_functions.sh",  # flaky
-    "01046_materialized_view_with_join_over_distributed",  # flaky
     "01050_clickhouse_dict_source_with_subquery",
     "01053_ssd_dictionary",
     "01054_cache_dictionary_overflow_cell",
@@ -43,25 +36,25 @@ SKIP_LIST = [
     "01086_odbc_roundtrip",
     "01088_benchmark_query_id",
     "01098_temporary_and_external_tables",
-    "01099_parallel_distributed_insert_select",  # flaky
+    "01099_parallel_distributed_insert_select",
     "01103_check_cpu_instructions_at_startup",
     "01114_database_atomic",
     "01148_zookeeper_path_macros_unfolding",
-    "01193_metadata_loading.sh",  # flaky
-    "01274_alter_rename_column_distributed",  # flaky
+    "01181_db_atomic_drop_on_cluster",  # tcp port in reference
     "01280_ssd_complex_key_dictionary",
     "01293_client_interactive_vertical_multiline",  # expect-test
     "01293_client_interactive_vertical_singleline",  # expect-test
+    "01293_system_distribution_queue",  # FLAKY
     "01293_show_clusters",
     "01294_lazy_database_concurrent_recreate_reattach_and_show_tables",
     "01294_system_distributed_on_cluster",
     "01300_client_save_history_when_terminated",  # expect-test
     "01304_direct_io",
     "01306_benchmark_json",
+    "01035_lc_empty_part_bug",  # FLAKY
     "01320_create_sync_race_condition_zookeeper",
     "01355_CSV_input_format_allow_errors",
     "01370_client_autocomplete_word_break_characters",  # expect-test
-    "01375_storage_file_tsv_csv_with_names_write_prefix",  # flaky
     "01376_GROUP_BY_injective_elimination_dictGet",
     "01393_benchmark_secure_port",
     "01418_custom_settings",
@@ -72,6 +65,7 @@ SKIP_LIST = [
     "01507_clickhouse_server_start_with_embedded_config",
     "01514_distributed_cancel_query_on_error",
     "01520_client_print_query_id",  # expect-test
+    "01526_client_start_and_exit",  # expect-test
     "01527_dist_sharding_key_dictGet_reload",
     "01545_url_file_format_settings",
     "01553_datetime64_comparison",
@@ -79,17 +73,18 @@ SKIP_LIST = [
     "01558_ttest_scipy",
     "01561_mann_whitney_scipy",
     "01582_distinct_optimization",
-    "01586_storage_join_low_cardinality_key",
-    "01599_multiline_input_and_singleline_comments",
-    "01600_benchmark_query",
+    "01599_multiline_input_and_singleline_comments",  # expect-test
     "01601_custom_tld",
-    "01601_proxy_protocol",
+    "01610_client_spawn_editor",  # expect-test
+    "01676_clickhouse_client_autocomplete",  # expect-test (partially)
+    "01683_text_log_deadlock",  # secure tcp
 ]
 
 
 def check_result(result, error, return_code, reference, replace_map):
-    for old, new in replace_map.items():
-        result = result.replace(old.encode('utf-8'), new.encode('utf-8'))
+    if replace_map:
+        for old, new in replace_map.items():
+            result = result.replace(old.encode('utf-8'), new.encode('utf-8'))
 
     if return_code != 0:
         try:
@@ -106,9 +101,9 @@ def check_result(result, error, return_code, reference, replace_map):
             pytrace=False)
 
 
-def run_client(bin_prefix, port, query, reference, replace_map={}):
+def run_client(bin_prefix, port, database, query, reference, replace_map=None):
     # We can't use `text=True` since some tests may return binary data
-    client = subprocess.Popen([bin_prefix + '-client', '--port', str(port), '-m', '-n', '--testmode'],
+    client = subprocess.Popen([bin_prefix + '-client', '--port', str(port), '-d', database, '-m', '-n', '--testmode'],
                               stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     result, error = client.communicate(query.encode('utf-8'))
     assert client.returncode is not None, "Client should exit after processing all queries"
@@ -116,12 +111,13 @@ def run_client(bin_prefix, port, query, reference, replace_map={}):
     check_result(result, error, client.returncode, reference, replace_map)
 
 
-def run_shell(bin_prefix, server, database, path, reference, replace_map={}):
+def run_shell(bin_prefix, server, database, path, reference, replace_map=None):
     env = {
         'CLICKHOUSE_BINARY': bin_prefix,
         'CLICKHOUSE_DATABASE': database,
         'CLICKHOUSE_PORT_TCP': str(server.tcp_port),
         'CLICKHOUSE_PORT_TCP_SECURE': str(server.tcps_port),
+        'CLICKHOUSE_PORT_TCP_WITH_PROXY': str(server.proxy_port),
         'CLICKHOUSE_PORT_HTTP': str(server.http_port),
         'CLICKHOUSE_PORT_INTERSERVER': str(server.inter_port),
         'CLICKHOUSE_TMP': server.tmp_dir,
@@ -136,6 +132,7 @@ def run_shell(bin_prefix, server, database, path, reference, replace_map={}):
 
 def random_str(length=10):
     alphabet = string.ascii_lowercase + string.digits
+    random.seed(os.urandom(8))
     return ''.join(random.choice(alphabet) for _ in range(length))
 
 
@@ -159,17 +156,18 @@ def test_sql_query(bin_prefix, sql_query, standalone_server):
         reference = file.read()
 
     random_name = 'test_{random}'.format(random=random_str())
-    query = 'CREATE DATABASE {random}; USE {random}; {query}'.format(random=random_name, query=query)
-    run_client(bin_prefix, tcp_port, query, reference, {random_name: 'default'})
+    run_client(bin_prefix, tcp_port, 'default', 'CREATE DATABASE {random};'.format(random=random_name), b'')
+
+    run_client(bin_prefix, tcp_port, random_name, query, reference, {random_name: 'default'})
 
     query = "SELECT 'SHOW ORPHANED TABLES'; SELECT name FROM system.tables WHERE database != 'system' ORDER BY (database, name);"
-    run_client(bin_prefix, tcp_port, query, b'SHOW ORPHANED TABLES\n')
+    run_client(bin_prefix, tcp_port, 'default', query, b'SHOW ORPHANED TABLES\n')
 
     query = 'DROP DATABASE {random};'.format(random=random_name)
-    run_client(bin_prefix, tcp_port, query, b'')
+    run_client(bin_prefix, tcp_port, 'default', query, b'')
 
     query = "SELECT 'SHOW ORPHANED DATABASES'; SHOW DATABASES;"
-    run_client(bin_prefix, tcp_port, query, b'SHOW ORPHANED DATABASES\ndefault\nsystem\n')
+    run_client(bin_prefix, tcp_port, 'default', query, b'SHOW ORPHANED DATABASES\ndefault\nsystem\n')
 
 
 def test_shell_query(bin_prefix, shell_query, standalone_server):
@@ -191,15 +189,15 @@ def test_shell_query(bin_prefix, shell_query, standalone_server):
 
     random_name = 'test_{random}'.format(random=random_str())
     query = 'CREATE DATABASE {random};'.format(random=random_name)
-    run_client(bin_prefix, tcp_port, query, b'')
+    run_client(bin_prefix, tcp_port, 'default', query, b'')
 
     run_shell(bin_prefix, standalone_server, random_name, shell_path, reference, {random_name: 'default'})
 
     query = "SELECT 'SHOW ORPHANED TABLES'; SELECT name FROM system.tables WHERE database != 'system' ORDER BY (database, name);"
-    run_client(bin_prefix, tcp_port, query, b'SHOW ORPHANED TABLES\n')
+    run_client(bin_prefix, tcp_port, 'default', query, b'SHOW ORPHANED TABLES\n')
 
     query = 'DROP DATABASE {random};'.format(random=random_name)
-    run_client(bin_prefix, tcp_port, query, b'')
+    run_client(bin_prefix, tcp_port, 'default', query, b'')
 
     query = "SELECT 'SHOW ORPHANED DATABASES'; SHOW DATABASES;"
-    run_client(bin_prefix, tcp_port, query, b'SHOW ORPHANED DATABASES\ndefault\nsystem\n')
+    run_client(bin_prefix, tcp_port, 'default', query, b'SHOW ORPHANED DATABASES\ndefault\nsystem\n')
diff --git a/tests/queries/server.py b/tests/queries/server.py
index 599de2400e3..ed12931e658 100644
--- a/tests/queries/server.py
+++ b/tests/queries/server.py
@@ -37,6 +37,7 @@ class ServerThread(threading.Thread):
         self.tcps_port = port_base + 4
         self.https_port = port_base + 5
         self.odbc_port = port_base + 6
+        self.proxy_port = port_base + 7
 
         self._args = [
             '--config-file={config_path}'.format(config_path=self.server_config),
@@ -44,6 +45,7 @@ class ServerThread(threading.Thread):
             '--tcp_port={tcp_port}'.format(tcp_port=self.tcp_port),
             '--http_port={http_port}'.format(http_port=self.http_port),
             '--interserver_http_port={inter_port}'.format(inter_port=self.inter_port),
+            '--tcp_with_proxy_port={proxy_port}'.format(proxy_port=self.proxy_port),
             # TODO: SSL certificate is not specified '--tcp_port_secure={tcps_port}'.format(tcps_port=self.tcps_port),
         ]
 
@@ -76,8 +78,8 @@ class ServerThread(threading.Thread):
                     print('Successful server response:', s.recv(1024))  # FIXME: read whole buffered response
                     s.shutdown(socket.SHUT_RDWR)
                     s.close()
-                except Exception as e:
-                    print('Failed to connect to server:', e, file=sys.stderr)
+                except Exception:
+                    # Failed to connect to server - try again
                     continue
                 else:
                     break
@@ -96,6 +98,10 @@ class ServerThread(threading.Thread):
 
         self._lock.release()
 
+        if not retries:
+            print('Failed to start server', file=sys.stderr)
+            return
+
         while self._proc.returncode is None:
             self._proc.communicate()
 
@@ -297,6 +303,10 @@ ServerThread.DEFAULT_SERVER_CONFIG = \
         <implementation>testkeeper</implementation>
     </zookeeper>
 
+    <distributed_ddl>
+        <path>/clickhouse/task_queue/ddl</path>
+    </distributed_ddl>
+
     <part_log>
         <database>system</database>
         <table>part_log</table>
@@ -1112,6 +1122,136 @@ ServerThread.DEFAULT_DICTIONARIES_CONFIG = \
             </attribute>
         </structure>
     </dictionary>
+
+    <dictionary>
+        <name>simple_executable_cache_dictionary_no_implicit_key</name>
+        <structure>
+            <id>
+                <name>id</name>
+                <type>UInt64</type>
+            </id>
+
+            <attribute>
+                <name>value</name>
+                <type>String</type>
+                <null_value></null_value>
+            </attribute>
+        </structure>
+        <source>
+            <executable>
+                <command>echo "1\tValue"</command>
+                <format>TabSeparated</format>
+                <implicit_key>false</implicit_key>
+            </executable>
+        </source>
+        <layout>
+            <cache>
+                <size_in_cells>10000</size_in_cells>
+            </cache>
+        </layout>
+        <lifetime>300</lifetime>
+    </dictionary>
+
+    <dictionary>
+        <name>simple_executable_cache_dictionary_implicit_key</name>
+        <structure>
+            <id>
+                <name>id</name>
+                <type>UInt64</type>
+            </id>
+
+            <attribute>
+                <name>value</name>
+                <type>String</type>
+                <null_value></null_value>
+            </attribute>
+        </structure>
+        <source>
+            <executable>
+                <command>echo "Value"</command>
+                <format>TabSeparated</format>
+                <implicit_key>true</implicit_key>
+            </executable>
+        </source>
+        <layout>
+            <cache>
+                <size_in_cells>10000</size_in_cells>
+            </cache>
+        </layout>
+        <lifetime>300</lifetime>
+    </dictionary>
+
+    <dictionary>
+        <name>complex_executable_cache_dictionary_no_implicit_key</name>
+        <structure>
+            <key>
+                <attribute>
+                    <name>id</name>
+                    <type>UInt64</type>
+                    <null_value></null_value>
+                </attribute>
+                <attribute>
+                    <name>id_key</name>
+                    <type>String</type>
+                    <null_value></null_value>
+                </attribute>
+            </key>
+            <attribute>
+                <name>value</name>
+                <type>String</type>
+                <null_value></null_value>
+            </attribute>
+        </structure>
+        <source>
+            <executable>
+                <command>echo "1\tFirstKey\tValue"</command>
+                <format>TabSeparated</format>
+                <implicit_key>false</implicit_key>
+            </executable>
+        </source>
+        <layout>
+            <complex_key_cache>
+                <size_in_cells>10000</size_in_cells>
+            </complex_key_cache>
+        </layout>
+        <lifetime>300</lifetime>
+    </dictionary>
+
+    <dictionary>
+        <name>complex_executable_cache_dictionary_implicit_key</name>
+        <structure>
+            <key>
+                <attribute>
+                    <name>id</name>
+                    <type>UInt64</type>
+                    <null_value></null_value>
+                </attribute>
+                <attribute>
+                    <name>id_key</name>
+                    <type>String</type>
+                    <null_value></null_value>
+                </attribute>
+            </key>
+            <attribute>
+                <name>value</name>
+                <type>String</type>
+                <null_value></null_value>
+            </attribute>
+        </structure>
+        <source>
+            <executable>
+                <command>echo "Value"</command>
+                <format>TabSeparated</format>
+                <implicit_key>true</implicit_key>
+            </executable>
+        </source>
+        <layout>
+            <complex_key_cache>
+                <size_in_cells>10000</size_in_cells>
+            </complex_key_cache>
+        </layout>
+        <lifetime>300</lifetime>
+    </dictionary>
 </yandex>
 """
 
diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh
index 361f8c444f9..eed77fb107d 100644
--- a/tests/queries/shell_config.sh
+++ b/tests/queries/shell_config.sh
@@ -8,11 +8,12 @@ export CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL
 [ -v CLICKHOUSE_CONFIG_CLIENT ] && CLICKHOUSE_CLIENT_OPT0+=" --config-file=${CLICKHOUSE_CONFIG_CLIENT} "
 [ -v CLICKHOUSE_HOST ] && CLICKHOUSE_CLIENT_OPT0+=" --host=${CLICKHOUSE_HOST} "
 [ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} "
+[ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_BENCHMARK_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} "
 [ -v CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} "
 [ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
-[ -n "$CLICKHOUSE_LOG_COMMENT" ] && CLICKHOUSE_CLIENT_OPT0+=" --log_comment='${CLICKHOUSE_LOG_COMMENT}' "
+[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_CLIENT_OPT0+=" --log_comment='${CLICKHOUSE_LOG_COMMENT}' "
 [ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_BENCHMARK_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
-[ -n "$CLICKHOUSE_LOG_COMMENT" ] && CLICKHOUSE_BENCHMARK_OPT0+=" --log_comment='${CLICKHOUSE_LOG_COMMENT}' "
+[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_BENCHMARK_OPT0+=" --log_comment='${CLICKHOUSE_LOG_COMMENT}' "
 
 export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"}
 [ -x "$CLICKHOUSE_BINARY-client" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY-client}
@@ -62,7 +63,7 @@ else
   export CLICKHOUSE_URL_PARAMS="database=${CLICKHOUSE_DATABASE}"
 fi
 # Note: missing url encoding of the log comment.
-[ -n "$CLICKHOUSE_LOG_COMMENT" ] && export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&log_comment=${CLICKHOUSE_LOG_COMMENT}"
+[ -v CLICKHOUSE_LOG_COMMENT ] && export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&log_comment=${CLICKHOUSE_LOG_COMMENT}"
 
 export CLICKHOUSE_URL=${CLICKHOUSE_URL:="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/"}
 export CLICKHOUSE_URL_HTTPS=${CLICKHOUSE_URL_HTTPS:="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/"}

From 935870b2c2b8cdc57ba64bb3006e80870acd2a0d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 27 Jan 2021 21:05:18 +0300
Subject: [PATCH 0936/1238] Add separate config directive for Buffer profile

If you push data via Buffer engine then all your queries will be done
from one user, however this is not always desired behavior, since this
will not allow to limit queries with max_concurrent_queries_for_user and
similar.
---
 programs/server/config.xml     |  8 +++++++-
 src/Interpreters/Context.cpp   | 12 ++++++++++++
 src/Interpreters/Context.h     |  3 +++
 src/Storages/StorageBuffer.cpp | 27 ++++++++++++++++++---------
 src/Storages/StorageBuffer.h   | 12 ++----------
 5 files changed, 42 insertions(+), 20 deletions(-)

diff --git a/programs/server/config.xml b/programs/server/config.xml
index 849d3dc32ba..ca57987d901 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -421,9 +421,15 @@
     <!-- Comma-separated list of prefixes for user-defined settings. -->
     <custom_settings_prefixes></custom_settings_prefixes>
 
-    <!-- System profile of settings. This settings are used by internal processes (Buffer storage, Distributed DDL worker and so on). -->
+    <!-- System profile of settings. This settings are used by internal processes (Distributed DDL worker and so on). -->
     <!-- <system_profile>default</system_profile> -->
 
+    <!-- Buffer profile of settings.
+         This settings are used by Buffer storage to flush data to the underlying table.
+         Default: used from system_profile directive.
+    -->
+    <!-- <buffer_profile>default</buffer_profile> -->
+
     <!-- Default database. -->
     <default_database>default</default_database>
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 5c99d39dc2e..eec71bbd92a 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -331,6 +331,7 @@ struct ContextShared
     mutable std::optional<ExternalModelsLoader> external_models_loader;
     String default_profile_name;                            /// Default profile name used for default values.
     String system_profile_name;                             /// Profile used by system processes
+    String buffer_profile_name;                             /// Profile used by Buffer engine for flushing to the underlying
     AccessControlManager access_control_manager;
     mutable UncompressedCachePtr uncompressed_cache;        /// The cache of decompressed blocks.
     mutable MarkCachePtr mark_cache;                        /// Cache of marks in compressed files.
@@ -1297,6 +1298,13 @@ Context & Context::getGlobalContext()
     return *global_context;
 }
 
+const Context & Context::getBufferContext() const
+{
+    if (!buffer_context)
+        throw Exception("Logical error: there is no buffer context", ErrorCodes::LOGICAL_ERROR);
+    return *buffer_context;
+}
+
 
 const EmbeddedDictionaries & Context::getEmbeddedDictionaries() const
 {
@@ -2219,6 +2227,10 @@ void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & confi
 
     shared->system_profile_name = config.getString("system_profile", shared->default_profile_name);
     setProfile(shared->system_profile_name);
+
+    shared->buffer_profile_name = config.getString("buffer_profile", shared->system_profile_name);
+    buffer_context = std::make_shared<Context>(*this);
+    buffer_context->setProfile(shared->buffer_profile_name);
 }
 
 String Context::getDefaultProfileName() const
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 98ca3909fea..909b27eaeaa 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -254,6 +254,7 @@ private:
     Context * query_context = nullptr;
     Context * session_context = nullptr;    /// Session context or nullptr. Could be equal to this.
     Context * global_context = nullptr;     /// Global context. Could be equal to this.
+    std::shared_ptr<Context> buffer_context;/// Buffer context. Could be equal to this.
 
 public:
     // Top-level OpenTelemetry trace context for the query. Makes sense only for
@@ -542,6 +543,8 @@ public:
     Context & getGlobalContext();
     bool hasGlobalContext() const { return global_context != nullptr; }
 
+    const Context & getBufferContext() const;
+
     void setQueryContext(Context & context_) { query_context = &context_; }
     void setSessionContext(Context & context_) { session_context = &context_; }
 
diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index ce74567c62b..024ad7e001f 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -72,14 +72,14 @@ StorageBuffer::StorageBuffer(
     const StorageID & destination_id_,
     bool allow_materialized_)
     : IStorage(table_id_)
-    , global_context(context_.getGlobalContext())
+    , buffer_context(context_.getBufferContext())
     , num_shards(num_shards_), buffers(num_shards_)
     , min_thresholds(min_thresholds_)
     , max_thresholds(max_thresholds_)
     , destination_id(destination_id_)
     , allow_materialized(allow_materialized_)
     , log(&Poco::Logger::get("StorageBuffer (" + table_id_.getFullTableName() + ")"))
-    , bg_pool(global_context.getBufferFlushSchedulePool())
+    , bg_pool(buffer_context.getBufferFlushSchedulePool())
 {
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(columns_);
@@ -470,7 +470,7 @@ public:
         StoragePtr destination;
         if (storage.destination_id)
         {
-            destination = DatabaseCatalog::instance().tryGetTable(storage.destination_id, storage.global_context);
+            destination = DatabaseCatalog::instance().tryGetTable(storage.destination_id, storage.buffer_context);
             if (destination.get() == &storage)
                 throw Exception("Destination table is myself. Write will cause infinite loop.", ErrorCodes::INFINITE_LOOP);
         }
@@ -586,9 +586,9 @@ bool StorageBuffer::mayBenefitFromIndexForIn(
 
 void StorageBuffer::startup()
 {
-    if (global_context.getSettingsRef().readonly)
+    if (buffer_context.getSettingsRef().readonly)
     {
-        LOG_WARNING(log, "Storage {} is run with readonly settings, it will not be able to insert data. Set appropriate system_profile to fix this.", getName());
+        LOG_WARNING(log, "Storage {} is run with readonly settings, it will not be able to insert data. Set appropriate buffer_profile to fix this.", getName());
     }
 
     flush_handle = bg_pool.createTask(log->name() + "/Bg", [this]{ backgroundFlush(); });
@@ -605,7 +605,7 @@ void StorageBuffer::shutdown()
 
     try
     {
-        optimize(nullptr /*query*/, getInMemoryMetadataPtr(), {} /*partition*/, false /*final*/, false /*deduplicate*/, {}, global_context);
+        optimize(nullptr /*query*/, getInMemoryMetadataPtr(), {} /*partition*/, false /*final*/, false /*deduplicate*/, {}, buffer_context);
     }
     catch (...)
     {
@@ -646,6 +646,15 @@ bool StorageBuffer::optimize(
     return true;
 }
 
+bool StorageBuffer::supportsPrewhere() const
+{
+    if (!destination_id)
+        return false;
+    auto dest = DatabaseCatalog::instance().tryGetTable(destination_id, buffer_context);
+    if (dest && dest.get() != this)
+        return dest->supportsPrewhere();
+    return false;
+}
 
 bool StorageBuffer::checkThresholds(const Buffer & buffer, time_t current_time, size_t additional_rows, size_t additional_bytes) const
 {
@@ -752,7 +761,7 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds, bool loc
     Stopwatch watch;
     try
     {
-        writeBlockToDestination(block_to_write, DatabaseCatalog::instance().tryGetTable(destination_id, global_context));
+        writeBlockToDestination(block_to_write, DatabaseCatalog::instance().tryGetTable(destination_id, buffer_context));
         if (reset_block_structure)
             buffer.data.clear();
     }
@@ -834,7 +843,7 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl
     for (const auto & column : block_to_write)
         list_of_columns->children.push_back(std::make_shared<ASTIdentifier>(column.name));
 
-    auto insert_context = Context(global_context);
+    auto insert_context = Context(buffer_context);
     insert_context.makeQueryContext();
 
     InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized};
@@ -911,7 +920,7 @@ void StorageBuffer::checkAlterIsPossible(const AlterCommands & commands, const S
 std::optional<UInt64> StorageBuffer::totalRows(const Settings & settings) const
 {
     std::optional<UInt64> underlying_rows;
-    auto underlying = DatabaseCatalog::instance().tryGetTable(destination_id, global_context);
+    auto underlying = DatabaseCatalog::instance().tryGetTable(destination_id, buffer_context);
 
     if (underlying)
         underlying_rows = underlying->totalRows(settings);
diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h
index 9656c78637b..46907ca196b 100644
--- a/src/Storages/StorageBuffer.h
+++ b/src/Storages/StorageBuffer.h
@@ -93,15 +93,7 @@ public:
         const Context & context) override;
 
     bool supportsSampling() const override { return true; }
-    bool supportsPrewhere() const override
-    {
-        if (!destination_id)
-            return false;
-        auto dest = DatabaseCatalog::instance().tryGetTable(destination_id, global_context);
-        if (dest && dest.get() != this)
-            return dest->supportsPrewhere();
-        return false;
-    }
+    bool supportsPrewhere() const override;
     bool supportsFinal() const override { return true; }
     bool supportsIndexForIn() const override { return true; }
 
@@ -120,7 +112,7 @@ public:
 
 
 private:
-    const Context & global_context;
+    const Context & buffer_context;
 
     struct Buffer
     {

From 695e3a797ae59854448aa63c57398889217e3a09 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Wed, 10 Feb 2021 21:55:11 +0300
Subject: [PATCH 0937/1238] some optimizations

---
 src/Processors/Transforms/WindowTransform.cpp | 301 ++++++++++--------
 src/Processors/Transforms/WindowTransform.h   |  30 +-
 2 files changed, 184 insertions(+), 147 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 914289bca2f..7a53d328c50 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -12,6 +12,105 @@ namespace ErrorCodes
     extern const int NOT_IMPLEMENTED;
 }
 
+// Compares ORDER BY column values at given rows to find the boundaries of frame:
+// [compared] with [reference] +/- offset. Return value is -1/0/+1, like in
+// sorting predicates -- -1 means [compared] is less than [reference] +/- offset.
+template <typename ColumnType>
+static int compareValuesWithOffset(const IColumn * _compared_column,
+    size_t compared_row, const IColumn * _reference_column,
+    size_t reference_row,
+    uint64_t _offset,
+    bool offset_is_preceding)
+{
+    // Casting the columns to the known type here makes it faster, probably
+    // because the getData call can be devirtualized.
+    const auto * compared_column = assert_cast<const ColumnType *>(
+        _compared_column);
+    const auto * reference_column = assert_cast<const ColumnType *>(
+        _reference_column);
+    const auto offset = static_cast<typename ColumnType::ValueType>(_offset);
+
+    const auto compared_value_data = compared_column->getDataAt(compared_row);
+    assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
+    auto compared_value = unalignedLoad<typename ColumnType::ValueType>(
+        compared_value_data.data);
+
+    const auto reference_value_data = reference_column->getDataAt(reference_row);
+    assert(reference_value_data.size == sizeof(typename ColumnType::ValueType));
+    auto reference_value = unalignedLoad<typename ColumnType::ValueType>(
+        reference_value_data.data);
+
+    bool is_overflow;
+    bool overflow_to_negative;
+    if (offset_is_preceding)
+    {
+        is_overflow = __builtin_sub_overflow(reference_value, offset,
+            &reference_value);
+        overflow_to_negative = offset > 0;
+    }
+    else
+    {
+        is_overflow = __builtin_add_overflow(reference_value, offset,
+            &reference_value);
+        overflow_to_negative = offset < 0;
+    }
+
+//    fmt::print(stderr,
+//        "compared [{}] = {}, ref [{}] = {}, offset {} preceding {} overflow {} to negative {}\n",
+//        compared_row, toString(compared_value),
+//        reference_row, toString(reference_value),
+//        toString(offset), offset_is_preceding,
+//        is_overflow, overflow_to_negative);
+
+    if (is_overflow)
+    {
+        if (overflow_to_negative)
+        {
+            // Overflow to the negative, [compared] must be greater.
+            return 1;
+        }
+        else
+        {
+            // Overflow to the positive, [compared] must be less.
+            return -1;
+        }
+    }
+    else
+    {
+        // No overflow, compare normally.
+        return compared_value < reference_value ? -1
+            : compared_value == reference_value ? 0 : 1;
+    }
+}
+
+// Helper macros to dispatch on type of the ORDER BY column
+#define APPLY_FOR_ONE_TYPE(FUNCTION, TYPE) \
+else if (typeid_cast<const TYPE *>(column)) \
+{ \
+    /* clang-tidy you're dumb, I can't put FUNCTION in braces here. */ \
+    compare_values_with_offset = FUNCTION<TYPE>; /* NOLINT */ \
+}
+
+#define APPLY_FOR_TYPES(FUNCTION) \
+if (false) /* NOLINT */ \
+{ \
+    /* Do nothing, a starter condition. */ \
+} \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int8>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt8>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int16>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt16>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int32>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt32>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int64>) \
+APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt64>) \
+else \
+{ \
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, \
+        "The RANGE OFFSET frame for '{}' ORDER BY column is not implemented", \
+        demangle(typeid(*column).name())); \
+}
+
 WindowTransform::WindowTransform(const Block & input_header_,
         const Block & output_header_,
         const WindowDescription & window_description_,
@@ -63,6 +162,20 @@ WindowTransform::WindowTransform(const Block & input_header_,
         order_by_indices.push_back(
             input_header.getPositionByName(column.column_name));
     }
+
+    // Choose a row comparison function for RANGE OFFSET frame based on the
+    // type of the ORDER BY column.
+    if (window_description.frame.type == WindowFrame::FrameType::Range
+        && (window_description.frame.begin_type
+                == WindowFrame::BoundaryType::Offset
+            || window_description.frame.end_type
+                == WindowFrame::BoundaryType::Offset))
+    {
+        assert(order_by_indices.size() == 1);
+        const IColumn * column = input_header.getByPosition(
+            order_by_indices[0]).column.get();
+        APPLY_FOR_TYPES(compareValuesWithOffset)
+    }
 }
 
 WindowTransform::~WindowTransform()
@@ -290,85 +403,22 @@ void WindowTransform::advanceFrameStartRowsOffset()
     assert(offset_left >= 0);
 }
 
-// Compares ORDER BY column values at given rows to find the boundaries of frame:
-// [compared] with [reference] +/- offset. Return value is -1/0/+1, like in
-// sorting predicates -- -1 means [compared] is less than [reference] +/- offset.
-template <typename ColumnType>
-static int compareValuesWithOffset(const ColumnType * compared_column,
-    size_t compared_row, const ColumnType * reference_column,
-    size_t reference_row,
-    typename ColumnType::ValueType offset,
-    bool offset_is_preceding)
-{
-    const auto compared_value_data = compared_column->getDataAt(compared_row);
-    assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
-    auto compared_value = unalignedLoad<typename ColumnType::ValueType>(
-        compared_value_data.data);
 
-    const auto reference_value_data = reference_column->getDataAt(reference_row);
-    assert(reference_value_data.size == sizeof(typename ColumnType::ValueType));
-    auto reference_value = unalignedLoad<typename ColumnType::ValueType>(
-        reference_value_data.data);
-
-    bool is_overflow;
-    bool overflow_to_negative;
-    if (offset_is_preceding)
-    {
-        is_overflow = __builtin_sub_overflow(reference_value, offset,
-            &reference_value);
-        overflow_to_negative = offset > 0;
-    }
-    else
-    {
-        is_overflow = __builtin_add_overflow(reference_value, offset,
-            &reference_value);
-        overflow_to_negative = offset < 0;
-    }
-
-//    fmt::print(stderr,
-//        "compared [{}] = {}, ref [{}] = {}, offset {} preceding {} overflow {} to negative {}\n",
-//        compared_row, toString(compared_value),
-//        reference_row, toString(reference_value),
-//        toString(offset), offset_is_preceding,
-//        is_overflow, overflow_to_negative);
-
-    if (is_overflow)
-    {
-        if (overflow_to_negative)
-        {
-            // Overflow to the negative, [compared] must be greater.
-            return 1;
-        }
-        else
-        {
-            // Overflow to the positive, [compared] must be less.
-            return -1;
-        }
-    }
-    else
-    {
-        // No overflow, compare normally.
-        return compared_value < reference_value ? -1
-            : compared_value == reference_value ? 0 : 1;
-    }
-}
-
-template <typename ColumnType>
 void WindowTransform::advanceFrameStartRangeOffset()
 {
     // See the comment for advanceFrameEndRangeOffset().
     const int direction = window_description.order_by[0].direction;
     const bool preceding = window_description.frame.begin_preceding
         == (direction > 0);
-    const auto * reference_column = assert_cast<const ColumnType *>(
-        inputAt(current_row)[order_by_indices[0]].get());
+    const auto * reference_column
+        = inputAt(current_row)[order_by_indices[0]].get();
     for (; frame_start < partition_end; advanceRowNumber(frame_start))
     {
         // The first frame value is [current_row] with offset, so we advance
         // while [frames_start] < [current_row] with offset.
-        const auto * compared_column = assert_cast<const ColumnType *>(
-            inputAt(frame_start)[order_by_indices[0]].get());
-        if (compareValuesWithOffset(compared_column, frame_start.row,
+        const auto * compared_column
+            = inputAt(frame_start)[order_by_indices[0]].get();
+        if (compare_values_with_offset(compared_column, frame_start.row,
             reference_column, current_row.row,
             window_description.frame.begin_offset,
             preceding)
@@ -382,43 +432,6 @@ void WindowTransform::advanceFrameStartRangeOffset()
     frame_started = partition_ended;
 }
 
-// Helper macros to dispatch on type of the ORDER BY column
-#define APPLY_FOR_ONE_TYPE(FUNCTION, TYPE) \
-else if (typeid_cast<const TYPE *>(column)) \
-{ \
-    /* clang-tidy you're dumb, I can't put FUNCTION in braces here. */ \
-    FUNCTION<TYPE>(); /* NOLINT */ \
-}
-
-#define APPLY_FOR_TYPES(FUNCTION) \
-if (false) /* NOLINT */ \
-{ \
-    /* Do nothing, a starter condition. */ \
-} \
-APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int8>) \
-APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt8>) \
-APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int16>) \
-APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt16>) \
-APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int32>) \
-APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt32>) \
-APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int64>) \
-APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt64>) \
-else \
-{ \
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, \
-        "The RANGE OFFSET frame for '{}' ORDER BY column is not implemented", \
-        demangle(typeid(*column).name())); \
-}
-
-void WindowTransform::advanceFrameStartRangeOffsetDispatch()
-{
-    // Dispatch on the type of the ORDER BY column.
-    assert(order_by_indices.size() == 1);
-    const IColumn * column = inputAt(current_row)[order_by_indices[0]].get();
-
-    APPLY_FOR_TYPES(advanceFrameStartRangeOffset)
-}
-
 void WindowTransform::advanceFrameStart()
 {
     if (frame_started)
@@ -451,7 +464,7 @@ void WindowTransform::advanceFrameStart()
                     advanceFrameStartRowsOffset();
                     break;
                 case WindowFrame::FrameType::Range:
-                    advanceFrameStartRangeOffsetDispatch();
+                    advanceFrameStartRangeOffset();
                     break;
                 default:
                     throw Exception(ErrorCodes::NOT_IMPLEMENTED,
@@ -631,7 +644,6 @@ void WindowTransform::advanceFrameEndRowsOffset()
     assert(offset_left >= 0);
 }
 
-template <typename ColumnType>
 void WindowTransform::advanceFrameEndRangeOffset()
 {
     // PRECEDING/FOLLOWING change direction for DESC order.
@@ -639,16 +651,16 @@ void WindowTransform::advanceFrameEndRangeOffset()
     const int direction = window_description.order_by[0].direction;
     const bool preceding = window_description.frame.end_preceding
         == (direction > 0);
-    const auto * reference_column = assert_cast<const ColumnType *>(
-        inputAt(current_row)[order_by_indices[0]].get());
+    const auto * reference_column
+        = inputAt(current_row)[order_by_indices[0]].get();
     for (; frame_end < partition_end; advanceRowNumber(frame_end))
     {
         // The last frame value is current_row with offset, and we need a
         // past-the-end pointer, so we advance while
         // [frame_end] <= [current_row] with offset.
-        const auto * compared_column = assert_cast<const ColumnType *>(
-            inputAt(frame_end)[order_by_indices[0]].get());
-        if (compareValuesWithOffset(compared_column, frame_end.row,
+        const auto * compared_column
+            = inputAt(frame_end)[order_by_indices[0]].get();
+        if (compare_values_with_offset(compared_column, frame_end.row,
             reference_column, current_row.row,
             window_description.frame.end_offset,
             preceding)
@@ -662,15 +674,6 @@ void WindowTransform::advanceFrameEndRangeOffset()
     frame_ended = partition_ended;
 }
 
-void WindowTransform::advanceFrameEndRangeOffsetDispatch()
-{
-    // Dispatch on the type of the ORDER BY column.
-    assert(order_by_indices.size() == 1);
-    const IColumn * column = inputAt(current_row)[order_by_indices[0]].get();
-
-    APPLY_FOR_TYPES(advanceFrameEndRangeOffset)
-}
-
 void WindowTransform::advanceFrameEnd()
 {
     // No reason for this function to be called again after it succeeded.
@@ -693,7 +696,7 @@ void WindowTransform::advanceFrameEnd()
                     advanceFrameEndRowsOffset();
                     break;
                 case WindowFrame::FrameType::Range:
-                    advanceFrameEndRangeOffsetDispatch();
+                    advanceFrameEndRangeOffset();
                     break;
                 default:
                     throw Exception(ErrorCodes::NOT_IMPLEMENTED,
@@ -763,24 +766,43 @@ void WindowTransform::updateAggregationState()
             a->create(buf);
         }
 
-        for (auto row = rows_to_add_start; row < rows_to_add_end;
-            advanceRowNumber(row))
-        {
-            if (row.block != ws.cached_block_number)
-            {
-                const auto & block
-                    = blocks[row.block - first_block_number];
-                ws.argument_columns.clear();
-                for (const auto i : ws.argument_column_indices)
-                {
-                    ws.argument_columns.push_back(block.input_columns[i].get());
-                }
-                ws.cached_block_number = row.block;
-            }
+        // To achieve better performance, we will have to loop over blocks and
+        // rows manually, instead of using advanceRowNumber().
+        // For this purpose, the past-the-end block can be different than the
+        // block of the past-the-end row (it's usually the next block).
+        const auto past_the_end_block = rows_to_add_end.row == 0
+            ? rows_to_add_end.block
+            : rows_to_add_end.block + 1;
 
-//            fmt::print(stderr, "(2) add row {}\n", row);
+        for (auto block_number = rows_to_add_start.block;
+             block_number < past_the_end_block;
+             ++block_number)
+        {
+            auto & block = blockAt(block_number);
+
+            ws.argument_columns.clear();
+            for (const auto i : ws.argument_column_indices)
+            {
+                ws.argument_columns.push_back(block.input_columns[i].get());
+            }
+            ws.cached_block_number = block_number;
+
+            // First and last blocks may be processed partially, and other blocks
+            // are processed in full.
+            const auto first_row = block_number == rows_to_add_start.block
+                ? rows_to_add_start.row : 0;
+            const auto past_the_end_row = block_number == rows_to_add_end.block
+                ? rows_to_add_end.row : block.rows;
+
+            // We should add an addBatch analog that can accept a starting offset.
+            // For now, add the values one by one.
             auto * columns = ws.argument_columns.data();
-            a->add(buf, columns, row.row, arena.get());
+            // Removing arena.get() from the loop makes it faster somehow...
+            auto * _arena = arena.get();
+            for (auto row = first_row; row < past_the_end_row; ++row)
+            {
+                a->add(buf, columns, row, _arena);
+            }
         }
     }
 
@@ -793,6 +815,7 @@ void WindowTransform::writeOutCurrentRow()
     assert(current_row < partition_end);
     assert(current_row.block >= first_block_number);
 
+    const auto & block = blockAt(current_row);
     for (size_t wi = 0; wi < workspaces.size(); ++wi)
     {
         auto & ws = workspaces[wi];
@@ -800,7 +823,7 @@ void WindowTransform::writeOutCurrentRow()
         const auto * a = f.aggregate_function.get();
         auto * buf = ws.aggregate_function_state.data();
 
-        IColumn * result_column = outputAt(current_row)[wi].get();
+        IColumn * result_column = block.output_columns[wi].get();
         // FIXME does it also allocate the result on the arena?
         // We'll have to pass it out with blocks then...
         a->insertResultInto(buf, *result_column, arena.get());
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 541d4eb87c8..0ba8c8c6010 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -108,8 +108,6 @@ private:
     bool arePeers(const RowNumber & x, const RowNumber & y) const;
 
     void advanceFrameStartRowsOffset();
-    void advanceFrameStartRangeOffsetDispatch();
-    template <typename ColumnType>
     void advanceFrameStartRangeOffset();
     void advanceFrameStart();
 
@@ -117,8 +115,6 @@ private:
     void advanceFrameEndCurrentRow();
     void advanceFrameEndUnbounded();
     void advanceFrameEnd();
-    void advanceFrameEndRangeOffsetDispatch();
-    template <typename ColumnType>
     void advanceFrameEndRangeOffset();
 
     void updateAggregationState();
@@ -134,13 +130,19 @@ private:
     const Columns & inputAt(const RowNumber & x) const
     { return const_cast<WindowTransform *>(this)->inputAt(x); }
 
-    auto & blockAt(const RowNumber & x)
+    auto & blockAt(const uint64_t block_number)
     {
-        assert(x.block >= first_block_number);
-        assert(x.block - first_block_number < blocks.size());
-        return blocks[x.block - first_block_number];
+        assert(block_number >= first_block_number);
+        assert(block_number - first_block_number < blocks.size());
+        return blocks[block_number - first_block_number];
     }
 
+    const auto & blockAt(const uint64_t block_number) const
+    { return const_cast<WindowTransform *>(this)->blockAt(block_number); }
+
+    auto & blockAt(const RowNumber & x)
+    { return blockAt(x.block); }
+
     const auto & blockAt(const RowNumber & x) const
     { return const_cast<WindowTransform *>(this)->blockAt(x); }
 
@@ -299,6 +301,18 @@ public:
     // state after we find the new frame.
     RowNumber prev_frame_start;
     RowNumber prev_frame_end;
+
+    // Comparison function for RANGE OFFSET frames. We choose the appropriate
+    // overload once, based on the type of the ORDER BY column. Choosing it for
+    // each row would be slow.
+    int (* compare_values_with_offset) (
+        const IColumn * compared_column, size_t compared_row,
+        const IColumn * reference_column, size_t reference_row,
+        // We can make it a Field later if we need the Decimals. Now we only
+        // have ints and datetime, and the underlying Field type for them is
+        // uint64_t anyway.
+        uint64_t offset,
+        bool offset_is_preceding);
 };
 
 }

From 6bc0dbe8ff8ed8c0b0c78c721514994257dcc067 Mon Sep 17 00:00:00 2001
From: Alex Karo <alex@karo-dev.ru>
Date: Wed, 10 Feb 2021 22:03:27 +0300
Subject: [PATCH 0938/1238] Fix broken links to "max table size" param in
 backup documentation

---
 docs/en/operations/backup.md | 2 +-
 docs/es/operations/backup.md | 2 +-
 docs/fr/operations/backup.md | 2 +-
 docs/ja/operations/backup.md | 2 +-
 docs/ru/operations/backup.md | 2 +-
 docs/zh/operations/backup.md | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md
index ea37a22c165..f4206f5d70c 100644
--- a/docs/en/operations/backup.md
+++ b/docs/en/operations/backup.md
@@ -5,7 +5,7 @@ toc_title: Data Backup
 
 # Data Backup {#data-backup}
 
-While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). However, these safeguards don’t cover all possible cases and can be circumvented.
+While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards don’t cover all possible cases and can be circumvented.
 
 In order to effectively mitigate possible human errors, you should carefully prepare a strategy for backing up and restoring your data **in advance**.
 
diff --git a/docs/es/operations/backup.md b/docs/es/operations/backup.md
index a6297070663..be33851574a 100644
--- a/docs/es/operations/backup.md
+++ b/docs/es/operations/backup.md
@@ -5,7 +5,7 @@ toc_title: Copia de seguridad de datos
 
 # Copia de seguridad de datos {#data-backup}
 
-Mientras que la [replicación](../engines/table-engines/mergetree-family/replication.md) proporciona protección contra fallos de hardware, no protege de errores humanos: el borrado accidental de datos, elminar la tabla equivocada o una tabla en el clúster equivocado, y bugs de software que dan como resultado un procesado incorrecto de los datos o la corrupción de los datos. En muchos casos, errores como estos afectarán a todas las réplicas. ClickHouse dispone de salvaguardas para prevenir algunos tipos de errores — por ejemplo, por defecto [no se puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Sin embargo, estas salvaguardas no cubren todos los casos posibles y pueden eludirse.
+Mientras que la [replicación](../engines/table-engines/mergetree-family/replication.md) proporciona protección contra fallos de hardware, no protege de errores humanos: el borrado accidental de datos, elminar la tabla equivocada o una tabla en el clúster equivocado, y bugs de software que dan como resultado un procesado incorrecto de los datos o la corrupción de los datos. En muchos casos, errores como estos afectarán a todas las réplicas. ClickHouse dispone de salvaguardas para prevenir algunos tipos de errores — por ejemplo, por defecto [no se puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](server-configuration-parameters/settings.md#max-table-size-to-drop). Sin embargo, estas salvaguardas no cubren todos los casos posibles y pueden eludirse.
 
 Para mitigar eficazmente los posibles errores humanos, debe preparar cuidadosamente una estrategia para realizar copias de seguridad y restaurar sus datos **previamente**.
 
diff --git a/docs/fr/operations/backup.md b/docs/fr/operations/backup.md
index 9a463372947..953a96a04eb 100644
--- a/docs/fr/operations/backup.md
+++ b/docs/fr/operations/backup.md
@@ -7,7 +7,7 @@ toc_title: "La Sauvegarde Des Donn\xE9es"
 
 # La Sauvegarde Des Données {#data-backup}
 
-Alors [réplication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [vous ne pouvez pas simplement supprimer des tables avec un moteur de type MergeTree contenant plus de 50 Go de données](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Toutefois, ces garanties ne couvrent pas tous les cas possibles et peuvent être contournés.
+Alors [réplication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [vous ne pouvez pas simplement supprimer des tables avec un moteur de type MergeTree contenant plus de 50 Go de données](server-configuration-parameters/settings.md#max-table-size-to-drop). Toutefois, ces garanties ne couvrent pas tous les cas possibles et peuvent être contournés.
 
 Afin d'atténuer efficacement les erreurs humaines possibles, vous devez préparer soigneusement une stratégie de sauvegarde et de restauration de vos données **préalablement**.
 
diff --git a/docs/ja/operations/backup.md b/docs/ja/operations/backup.md
index 994271371a4..b0cde00e23c 100644
--- a/docs/ja/operations/backup.md
+++ b/docs/ja/operations/backup.md
@@ -7,7 +7,7 @@ toc_title: "\u30C7\u30FC\u30BF\u30D0\u30C3\u30AF\u30A2"
 
 # データバックア {#data-backup}
 
-ながら [複製](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [50Gbを超えるデータを含むMergeTreeのようなエンジンでは、テーブルを削除することはできません](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). しかし、これらの保障措置がカバーしないすべてのケースで回避.
+ながら [複製](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [50Gbを超えるデータを含むMergeTreeのようなエンジンでは、テーブルを削除することはできません](server-configuration-parameters/settings.md#max-table-size-to-drop). しかし、これらの保障措置がカバーしないすべてのケースで回避.
 
 ヒューマンエラーを効果的に軽減するには、データのバックアップと復元のための戦略を慎重に準備する必要があります **事前に**.
 
diff --git a/docs/ru/operations/backup.md b/docs/ru/operations/backup.md
index 0dcb6fd307d..165b54d9b62 100644
--- a/docs/ru/operations/backup.md
+++ b/docs/ru/operations/backup.md
@@ -5,7 +5,7 @@ toc_title: "\u0420\u0435\u0437\u0435\u0440\u0432\u043d\u043e\u0435\u0020\u043a\u
 
 # Резервное копирование данных {#rezervnoe-kopirovanie-dannykh}
 
-[Репликация](../engines/table-engines/mergetree-family/replication.md) обеспечивает защиту от аппаратных сбоев, но не защищает от человеческих ошибок: случайного удаления данных, удаления не той таблицы, которую надо было, или таблицы на не том кластере, а также программных ошибок, которые приводят к неправильной обработке данных или их повреждению. Во многих случаях подобные ошибки влияют на все реплики. ClickHouse имеет встроенные средства защиты для предотвращения некоторых типов ошибок — например, по умолчанию [не получится удалить таблицы \*MergeTree, содержащие более 50 Гб данных, одной командой](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Однако эти средства защиты не охватывают все возможные случаи и могут быть обойдены.
+[Репликация](../engines/table-engines/mergetree-family/replication.md) обеспечивает защиту от аппаратных сбоев, но не защищает от человеческих ошибок: случайного удаления данных, удаления не той таблицы, которую надо было, или таблицы на не том кластере, а также программных ошибок, которые приводят к неправильной обработке данных или их повреждению. Во многих случаях подобные ошибки влияют на все реплики. ClickHouse имеет встроенные средства защиты для предотвращения некоторых типов ошибок — например, по умолчанию [не получится удалить таблицы \*MergeTree, содержащие более 50 Гб данных, одной командой](server-configuration-parameters/settings.md#max-table-size-to-drop). Однако эти средства защиты не охватывают все возможные случаи и могут быть обойдены.
 
 Для того чтобы эффективно уменьшить возможные человеческие ошибки, следует тщательно подготовить стратегию резервного копирования и восстановления данных **заранее**.
 
diff --git a/docs/zh/operations/backup.md b/docs/zh/operations/backup.md
index 72491bb53ff..1b1993e3ae6 100644
--- a/docs/zh/operations/backup.md
+++ b/docs/zh/operations/backup.md
@@ -7,7 +7,7 @@ toc_title: "\u6570\u636E\u5907\u4EFD"
 
 # 数据备份 {#data-backup}
 
-尽管[副本](../engines/table-engines/mergetree-family/replication.md) 可以预防硬件错误带来的数据丢失, 但是它不能防止人为操作的错误: 意外删除数据, 删除错误的 table 或者删除错误 cluster 上的 table, 可以导致错误数据处理错误或者数据损坏的 bugs. 这类意外可能会影响所有的副本. ClickHouse 有内建的保障措施可以预防一些错误 — 例如, 默认情况下[您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). 但是，这些保障措施不能涵盖所有可能的情况，并且可以规避。
+尽管[副本](../engines/table-engines/mergetree-family/replication.md) 可以预防硬件错误带来的数据丢失, 但是它不能防止人为操作的错误: 意外删除数据, 删除错误的 table 或者删除错误 cluster 上的 table, 可以导致错误数据处理错误或者数据损坏的 bugs. 这类意外可能会影响所有的副本. ClickHouse 有内建的保障措施可以预防一些错误 — 例如, 默认情况下[您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](server-configuration-parameters/settings.md#max-table-size-to-drop). 但是，这些保障措施不能涵盖所有可能的情况，并且可以规避。
 
 为了有效地减少可能的人为错误，您应该 **提前**准备备份和还原数据的策略.
 

From 891fce3275b0060e2898916c78594f69d2ac46dc Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 10 Feb 2021 22:23:48 +0300
Subject: [PATCH 0939/1238] Don't allow to drop or rename version column

---
 src/Storages/MergeTree/MergeTreeData.cpp      | 29 ++++++++++++++-----
 .../01714_alter_drop_version.reference        |  1 +
 .../0_stateless/01714_alter_drop_version.sql  | 23 +++++++++++++++
 3 files changed, 46 insertions(+), 7 deletions(-)
 create mode 100644 tests/queries/0_stateless/01714_alter_drop_version.reference
 create mode 100644 tests/queries/0_stateless/01714_alter_drop_version.sql

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 7f99af1b1a3..8ff6506a281 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1490,16 +1490,31 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S
             getPartitionIDFromQuery(command.partition, global_context);
         }
 
-        /// Some type changes for version column is allowed despite it's a part of sorting key
-        if (command.type == AlterCommand::MODIFY_COLUMN && command.column_name == merging_params.version_column)
+        if (command.column_name == merging_params.version_column)
         {
-            const IDataType * new_type = command.data_type.get();
-            const IDataType * old_type = old_types[command.column_name];
+            /// Some type changes for version column is allowed despite it's a part of sorting key
+            if (command.type == AlterCommand::MODIFY_COLUMN)
+            {
+                const IDataType * new_type = command.data_type.get();
+                const IDataType * old_type = old_types[command.column_name];
 
-            checkVersionColumnTypesConversion(old_type, new_type, command.column_name);
+                checkVersionColumnTypesConversion(old_type, new_type, command.column_name);
 
-            /// No other checks required
-            continue;
+                /// No other checks required
+                continue;
+            }
+            else if (command.type == AlterCommand::DROP_COLUMN)
+            {
+                throw Exception(
+                    "Trying to ALTER DROP version " + backQuoteIfNeed(command.column_name) + " column",
+                    ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
+            }
+            else if (command.type == AlterCommand::RENAME_COLUMN)
+            {
+                throw Exception(
+                    "Trying to ALTER RENAME version " + backQuoteIfNeed(command.column_name) + " column",
+                    ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
+            }
         }
 
         if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned)
diff --git a/tests/queries/0_stateless/01714_alter_drop_version.reference b/tests/queries/0_stateless/01714_alter_drop_version.reference
new file mode 100644
index 00000000000..72749c905a3
--- /dev/null
+++ b/tests/queries/0_stateless/01714_alter_drop_version.reference
@@ -0,0 +1 @@
+1	1	1
diff --git a/tests/queries/0_stateless/01714_alter_drop_version.sql b/tests/queries/0_stateless/01714_alter_drop_version.sql
new file mode 100644
index 00000000000..e3d5db33859
--- /dev/null
+++ b/tests/queries/0_stateless/01714_alter_drop_version.sql
@@ -0,0 +1,23 @@
+DROP TABLE IF EXISTS alter_drop_version;
+
+CREATE TABLE alter_drop_version
+(
+    `key` UInt64,
+    `value` String,
+    `ver` Int8
+)
+ENGINE = ReplacingMergeTree(ver)
+ORDER BY key;
+
+INSERT INTO alter_drop_version VALUES (1, '1', 1);
+
+ALTER TABLE alter_drop_version DROP COLUMN ver; --{serverError 524}
+ALTER TABLE alter_drop_version RENAME COLUMN ver TO rev; --{serverError 524}
+
+DETACH TABLE alter_drop_version;
+
+ATTACH TABLE alter_drop_version;
+
+SELECT * FROM alter_drop_version;
+
+DROP TABLE IF EXISTS alter_drop_version;

From 5001b196137ca104efaadd315a2d4768278c4bb7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 10 Feb 2021 22:07:52 +0300
Subject: [PATCH 0940/1238] Accept arbitrary numeric types for numbers()
 arguments (for scientific notation)

This is to make the syntax simpler, i.e. avoid explicit cast to UInt64
if you want to use scientific notation (i.e. 1e9 over 1 000 000 000).

v2: use plain evaluateConstantExpression() over
evaluateConstantExpressionOrIdentifierAsLiteral() since identifier will
not work anyway
---
 src/TableFunctions/TableFunctionNumbers.cpp         | 13 ++++++++++++-
 ...702_system_numbers_scientific_notation.reference |  0
 .../01702_system_numbers_scientific_notation.sql    |  5 +++++
 3 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01702_system_numbers_scientific_notation.reference
 create mode 100644 tests/queries/0_stateless/01702_system_numbers_scientific_notation.sql

diff --git a/src/TableFunctions/TableFunctionNumbers.cpp b/src/TableFunctions/TableFunctionNumbers.cpp
index 4658165735a..594075b1c82 100644
--- a/src/TableFunctions/TableFunctionNumbers.cpp
+++ b/src/TableFunctions/TableFunctionNumbers.cpp
@@ -6,6 +6,7 @@
 #include <Common/typeid_cast.h>
 #include <Storages/System/StorageSystemNumbers.h>
 #include <Interpreters/evaluateConstantExpression.h>
+#include <Interpreters/convertFieldToType.h>
 #include <Interpreters/Context.h>
 #include <DataTypes/DataTypesNumber.h>
 #include "registerTableFunctions.h"
@@ -17,6 +18,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
 
@@ -56,7 +58,16 @@ void registerTableFunctionNumbers(TableFunctionFactory & factory)
 template <bool multithreaded>
 UInt64 TableFunctionNumbers<multithreaded>::evaluateArgument(const Context & context, ASTPtr & argument) const
 {
-    return evaluateConstantExpressionOrIdentifierAsLiteral(argument, context)->as<ASTLiteral &>().value.safeGet<UInt64>();
+    const auto & [field, type] = evaluateConstantExpression(argument, context);
+
+    if (!isNativeNumber(type))
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} expression, must be numeric type", type->getName());
+
+    Field converted = convertFieldToType(field, DataTypeUInt64());
+    if (converted.isNull())
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The value {} is not representable as UInt64", applyVisitor(FieldVisitorToString(), field));
+
+    return converted.safeGet<UInt64>();
 }
 
 }
diff --git a/tests/queries/0_stateless/01702_system_numbers_scientific_notation.reference b/tests/queries/0_stateless/01702_system_numbers_scientific_notation.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01702_system_numbers_scientific_notation.sql b/tests/queries/0_stateless/01702_system_numbers_scientific_notation.sql
new file mode 100644
index 00000000000..6e037ee4a2e
--- /dev/null
+++ b/tests/queries/0_stateless/01702_system_numbers_scientific_notation.sql
@@ -0,0 +1,5 @@
+select * from numbers(1e2) format Null;
+select * from numbers_mt(1e2) format Null;
+select * from numbers_mt('100') format Null; -- { serverError 43 }
+select * from numbers_mt(inf) format Null; -- { serverError 43 }
+select * from numbers_mt(nan) format Null; -- { serverError 43 }

From b6dc721e332e30c7e6dde40282441dd59cfa276e Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 10 Feb 2021 22:27:14 +0300
Subject: [PATCH 0941/1238] Update tests for new numbers(limit) syntax

$ gg -e 'numbers(toUInt64' -e 'numbers_mt(toUInt64' | cut -d: -f1 | sort -u | xargs sed -i -E 's#numbers(_mt|)\(toUInt64\(([^()]*)\)\)#numbers\1(\2)#'
---
 ..._tree_simple_aggregate_function_string.xml |  2 +-
 .../0_stateless/01016_uniqCombined64.sql      |  4 ++--
 .../01017_uniqCombined_memory_usage.sql       | 24 +++++++++----------
 .../01281_group_by_limit_memory_tracking.sh   |  2 +-
 ...3_optimize_aggregation_in_order_memory.sql |  2 +-
 ...emerge_sort_lowered_memory_bytes_ratio.sql |  6 ++---
 .../01641_memory_tracking_insert_optimize.sql |  2 +-
 7 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/tests/performance/aggregating_merge_tree_simple_aggregate_function_string.xml b/tests/performance/aggregating_merge_tree_simple_aggregate_function_string.xml
index c12f26ad595..0c93b4745cf 100644
--- a/tests/performance/aggregating_merge_tree_simple_aggregate_function_string.xml
+++ b/tests/performance/aggregating_merge_tree_simple_aggregate_function_string.xml
@@ -6,7 +6,7 @@
         SETTINGS index_granularity = 8192
         AS
         SELECT CAST(reinterpretAsString(number), 'SimpleAggregateFunction(any, String)') AS key
-        FROM numbers_mt(toUInt64(5e6))
+        FROM numbers_mt(5e6)
         SETTINGS max_insert_threads = 16
     </create_query>
 
diff --git a/tests/queries/0_stateless/01016_uniqCombined64.sql b/tests/queries/0_stateless/01016_uniqCombined64.sql
index 4720b53d15e..acf8135760a 100644
--- a/tests/queries/0_stateless/01016_uniqCombined64.sql
+++ b/tests/queries/0_stateless/01016_uniqCombined64.sql
@@ -5,5 +5,5 @@
 -- test is just to ensure that the result is different (and to document the
 -- outcome).
 
-SELECT uniqCombined(number)   FROM numbers(toUInt64(1e7));
-SELECT uniqCombined64(number) FROM numbers(toUInt64(1e7));
+SELECT uniqCombined(number)   FROM numbers(1e7);
+SELECT uniqCombined64(number) FROM numbers(1e7);
diff --git a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql
index bfcfec2b8ba..2ad1edae733 100644
--- a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql
+++ b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql
@@ -5,45 +5,45 @@
 -- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements
 SELECT 'UInt32';
 SET max_memory_usage = 4000000;
-SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); -- { serverError 241 }
+SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(8192 * 100) GROUP BY k); -- { serverError 241 }
 SET max_memory_usage = 9830400;
-SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k);
+SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(8192 * 100) GROUP BY k);
 
 -- HashTable for UInt64 (used until (1<<12) elements), hence 4096 elements
 SELECT 'UInt64';
 SET max_memory_usage = 4000000;
-SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); -- { serverError 241 }
+SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(4096 * 100) GROUP BY k); -- { serverError 241 }
 SET max_memory_usage = 9830400;
-SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k);
+SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(4096 * 100) GROUP BY k);
 
 SELECT 'K=16';
 
 -- HashTable for UInt32 (used until (1<<12) elements), hence 4096 elements
 SELECT 'UInt32';
 SET max_memory_usage = 2000000;
-SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); -- { serverError 241 }
+SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k); -- { serverError 241 }
 SET max_memory_usage = 4915200;
-SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k);
+SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k);
 
 -- HashTable for UInt64 (used until (1<<11) elements), hence 2048 elements
 SELECT 'UInt64';
 SET max_memory_usage = 2000000;
-SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(toUInt64(2048 * 100)) GROUP BY k); -- { serverError 241 }
+SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k); -- { serverError 241 }
 SET max_memory_usage = 4915200;
-SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(toUInt64(2048 * 100)) GROUP BY k);
+SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k);
 
 SELECT 'K=18';
 
 -- HashTable for UInt32 (used until (1<<14) elements), hence 16384 elements
 SELECT 'UInt32';
 SET max_memory_usage = 8000000;
-SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(toUInt64(16384 * 100)) GROUP BY k); -- { serverError 241 }
+SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(16384 * 100) GROUP BY k); -- { serverError 241 }
 SET max_memory_usage = 19660800;
-SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(toUInt64(16384 * 100)) GROUP BY k);
+SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(16384 * 100) GROUP BY k);
 
 -- HashTable for UInt64 (used until (1<<13) elements), hence 8192 elements
 SELECT 'UInt64';
 SET max_memory_usage = 8000000;
-SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); -- { serverError 241 }
+SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(8192 * 100) GROUP BY k); -- { serverError 241 }
 SET max_memory_usage = 19660800;
-SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k);
+SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(8192 * 100) GROUP BY k);
diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh
index 285e2ab8dad..9909d9b566d 100755
--- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh
+++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh
@@ -33,7 +33,7 @@ function execute_group_by()
         "--max_memory_usage_for_user="$((150<<20))
         "--max_threads=2"
     )
-    execute_null "${opts[@]}" <<<'SELECT uniq(number) FROM numbers_mt(toUInt64(1e6)) GROUP BY number % 5e5'
+    execute_null "${opts[@]}" <<<'SELECT uniq(number) FROM numbers_mt(1e6) GROUP BY number % 5e5'
 }
 
 # This is needed to keep at least one running query for user for the time of test.
diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql
index 6aa38a914f7..87c66609421 100644
--- a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql
+++ b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql
@@ -1,7 +1,7 @@
 drop table if exists data_01513;
 create table data_01513 (key String) engine=MergeTree() order by key;
 -- 10e3 groups, 1e3 keys each
-insert into data_01513 select number%10e3 from numbers(toUInt64(2e6));
+insert into data_01513 select number%10e3 from numbers(2e6);
 -- reduce number of parts to 1
 optimize table data_01513 final;
 
diff --git a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql
index b33b74c918d..5de4210d3f2 100644
--- a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql
+++ b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql
@@ -10,8 +10,8 @@ set max_block_size=40960;
 --     MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption
 --     MergeSortingTransform: Memory usage is lowered from 186.25 MiB to 95.00 MiB
 --     MergeSortingTransform: Re-merging is not useful (memory usage was not lowered by remerge_sort_lowered_memory_bytes_ratio=2.0)
-select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(toUInt64(3e6)) order by k limit 400e3 format Null; -- { serverError 241 }
-select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(toUInt64(3e6)) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 }
+select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 format Null; -- { serverError 241 }
+select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 }
 
 -- remerge_sort_lowered_memory_bytes_ratio 1.9 is good (need at least 1.91/0.98=1.94)
 --     MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption
@@ -26,4 +26,4 @@ select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v
 --     MergeSortingTransform: Memory usage is lowered from 188.13 MiB to 95.00 MiB
 --     MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 809600 rows) to save memory consumption
 --     MergeSortingTransform: Memory usage is lowered from 188.13 MiB to 95.00 MiB
-select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(toUInt64(3e6)) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=1.9 format Null;
+select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=1.9 format Null;
diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql
index f059da20755..7a92f40b3f0 100644
--- a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql
+++ b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql
@@ -5,7 +5,7 @@ create table data_01641 (key Int, value String) engine=MergeTree order by (key,
 -- peak memory usage is 170MiB
 set max_memory_usage='200Mi';
 system stop merges data_01641;
-insert into data_01641 select number, toString(number) from numbers(toUInt64(120e6));
+insert into data_01641 select number, toString(number) from numbers(120e6);
 
 -- peak:
 -- - is 21MiB if background merges already scheduled

From 6f837b41e3c03530ebc3c3f0ad18d8c898722028 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 10 Feb 2021 22:32:34 +0300
Subject: [PATCH 0942/1238] Update inplaceBlockConversions.h

---
 src/Interpreters/inplaceBlockConversions.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/inplaceBlockConversions.h b/src/Interpreters/inplaceBlockConversions.h
index 837fe9153a9..63540e2994d 100644
--- a/src/Interpreters/inplaceBlockConversions.h
+++ b/src/Interpreters/inplaceBlockConversions.h
@@ -17,7 +17,7 @@ class ActionsDAG;
 using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
 
 /// Create actions which adds missing defaults to block according to required_columns using columns description.
-/// Return nullptr if no cations required.
+/// Return nullptr if no actions required.
 ActionsDAGPtr evaluateMissingDefaults(
     const Block & header,
     const NamesAndTypesList & required_columns,

From e2d5972eca63e42459e467a093e1d4a23ab50829 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 10 Feb 2021 21:49:33 +0300
Subject: [PATCH 0943/1238] Cover buffer_profile config directive

---
 .../test_buffer_profile/__init__.py           |  0
 .../configs/buffer_profile.xml                |  3 ++
 .../configs/users.d/buffer_profile.xml        |  8 +++
 tests/integration/test_buffer_profile/test.py | 54 +++++++++++++++++++
 4 files changed, 65 insertions(+)
 create mode 100644 tests/integration/test_buffer_profile/__init__.py
 create mode 100644 tests/integration/test_buffer_profile/configs/buffer_profile.xml
 create mode 100644 tests/integration/test_buffer_profile/configs/users.d/buffer_profile.xml
 create mode 100644 tests/integration/test_buffer_profile/test.py

diff --git a/tests/integration/test_buffer_profile/__init__.py b/tests/integration/test_buffer_profile/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_buffer_profile/configs/buffer_profile.xml b/tests/integration/test_buffer_profile/configs/buffer_profile.xml
new file mode 100644
index 00000000000..6ce6de70e63
--- /dev/null
+++ b/tests/integration/test_buffer_profile/configs/buffer_profile.xml
@@ -0,0 +1,3 @@
+<yandex>
+    <buffer_profile>buffer_profile</buffer_profile>
+</yandex>
diff --git a/tests/integration/test_buffer_profile/configs/users.d/buffer_profile.xml b/tests/integration/test_buffer_profile/configs/users.d/buffer_profile.xml
new file mode 100644
index 00000000000..2edd2b63dc6
--- /dev/null
+++ b/tests/integration/test_buffer_profile/configs/users.d/buffer_profile.xml
@@ -0,0 +1,8 @@
+<yandex>
+    <profiles>
+        <buffer_profile>
+            <max_partitions_per_insert_block>1</max_partitions_per_insert_block>
+        </buffer_profile>
+    </profiles>
+</yandex>
+
diff --git a/tests/integration/test_buffer_profile/test.py b/tests/integration/test_buffer_profile/test.py
new file mode 100644
index 00000000000..ae9220898ab
--- /dev/null
+++ b/tests/integration/test_buffer_profile/test.py
@@ -0,0 +1,54 @@
+# pylint: disable=unused-argument
+# pylint: disable=redefined-outer-name
+# pylint: disable=line-too-long
+
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from helpers.client import QueryRuntimeException
+
+cluster = ClickHouseCluster(__file__)
+
+node_default = cluster.add_instance('node_default')
+node_buffer_profile = cluster.add_instance('node_buffer_profile',
+    main_configs=['configs/buffer_profile.xml'],
+    user_configs=['configs/users.d/buffer_profile.xml'])
+
+@pytest.fixture(scope='module', autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+def bootstrap(node):
+    node.query("""
+    CREATE TABLE data (key Int) Engine=MergeTree()
+    ORDER BY key
+    PARTITION BY key % 2;
+
+    CREATE TABLE buffer AS data Engine=Buffer(currentDatabase(), data,
+            /* settings for manual flush only */
+            1,    /* num_layers */
+            10e6, /* min_time, placeholder */
+            10e6, /* max_time, placeholder */
+            0,    /* min_rows   */
+            10e6, /* max_rows   */
+            0,    /* min_bytes  */
+            80e6  /* max_bytes  */
+    );
+
+    INSERT INTO buffer SELECT * FROM numbers(100);
+    """)
+
+def test_default_profile():
+    bootstrap(node_default)
+    # flush the buffer
+    node_default.query('OPTIMIZE TABLE buffer')
+
+def test_buffer_profile():
+    bootstrap(node_buffer_profile)
+    with pytest.raises(QueryRuntimeException, match='Too many partitions for single INSERT block'):
+        # flush the buffer
+        node_buffer_profile.query('OPTIMIZE TABLE buffer')

From 809fa7e4cc1b4f204533c84d31cab36b0a8ef68d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 10 Feb 2021 23:07:28 +0300
Subject: [PATCH 0944/1238] Sync SYSTEM FLUSH DISTRIBUTED with TRUNCATE

---
 src/Interpreters/InterpreterSystemQuery.cpp | 2 +-
 src/Storages/StorageDistributed.cpp         | 5 ++++-
 src/Storages/StorageDistributed.h           | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 86706701141..0e9683de95f 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -606,7 +606,7 @@ void InterpreterSystemQuery::flushDistributed(ASTSystemQuery &)
     context.checkAccess(AccessType::SYSTEM_FLUSH_DISTRIBUTED, table_id);
 
     if (auto * storage_distributed = dynamic_cast<StorageDistributed *>(DatabaseCatalog::instance().getTable(table_id, context).get()))
-        storage_distributed->flushClusterNodesAllData();
+        storage_distributed->flushClusterNodesAllData(context);
     else
         throw Exception("Table " + table_id.getNameForLogs() + " is not distributed", ErrorCodes::BAD_ARGUMENTS);
 }
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 8605013c65d..c08dc38fa2d 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -882,8 +882,11 @@ ActionLock StorageDistributed::getActionLock(StorageActionBlockType type)
     return {};
 }
 
-void StorageDistributed::flushClusterNodesAllData()
+void StorageDistributed::flushClusterNodesAllData(const Context & context)
 {
+    /// Sync SYSTEM FLUSH DISTRIBUTED with TRUNCATE
+    auto table_lock = lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
+
     std::vector<std::shared_ptr<StorageDistributedDirectoryMonitor>> directory_monitors;
 
     {
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index 928b6297297..4d3869f7c5c 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -114,7 +114,7 @@ public:
     /// (note that monitors are created lazily, i.e. until at least one INSERT executed)
     std::vector<StorageDistributedDirectoryMonitor::Status> getDirectoryMonitorsStatuses() const;
 
-    void flushClusterNodesAllData();
+    void flushClusterNodesAllData(const Context & context);
 
     ClusterPtr getCluster() const;
 

From 15256d86e59613d36d13c93bbdec960ededcf81e Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Wed, 10 Feb 2021 23:30:40 +0300
Subject: [PATCH 0945/1238] better replica recovery and queue cleanup

---
 src/Common/ZooKeeper/IKeeper.cpp              |   2 +-
 src/Common/ZooKeeper/ZooKeeper.cpp            |  21 ++--
 src/Common/ZooKeeper/ZooKeeper.h              |  11 +-
 src/Databases/DatabaseOnDisk.cpp              |   2 +-
 src/Databases/DatabaseOnDisk.h                |   2 +-
 src/Databases/DatabaseReplicated.cpp          | 109 +++++++++++++++---
 src/Databases/DatabaseReplicated.h            |   2 +
 src/Databases/DatabaseReplicatedWorker.cpp    |   3 +-
 src/Databases/IDatabase.h                     |   2 +-
 .../MySQL/DatabaseConnectionMySQL.cpp         |   6 +-
 src/Databases/MySQL/DatabaseConnectionMySQL.h |   4 +-
 src/Interpreters/DDLWorker.cpp                |  87 ++++++--------
 src/Interpreters/InterpreterDropQuery.cpp     |   2 +-
 .../test_distributed_ddl/cluster.py           |   4 +-
 14 files changed, 165 insertions(+), 92 deletions(-)

diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp
index ad18fdd992a..94fd291bd12 100644
--- a/src/Common/ZooKeeper/IKeeper.cpp
+++ b/src/Common/ZooKeeper/IKeeper.cpp
@@ -59,7 +59,7 @@ static void addRootPath(String & path, const String & root_path)
         throw Exception("Path cannot be empty", Error::ZBADARGUMENTS);
 
     if (path[0] != '/')
-        throw Exception("Path must begin with /", Error::ZBADARGUMENTS);
+        throw Exception("Path must begin with /, got " + path, Error::ZBADARGUMENTS);
 
     if (root_path.empty())
         return;
diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index 7a64609dc22..dc6abca6892 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -610,7 +610,7 @@ void ZooKeeper::removeChildren(const std::string & path)
 }
 
 
-void ZooKeeper::removeChildrenRecursive(const std::string & path)
+void ZooKeeper::removeChildrenRecursive(const std::string & path, const String & keep_child_node)
 {
     Strings children = getChildren(path);
     while (!children.empty())
@@ -619,14 +619,15 @@ void ZooKeeper::removeChildrenRecursive(const std::string & path)
         for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i)
         {
             removeChildrenRecursive(path + "/" + children.back());
-            ops.emplace_back(makeRemoveRequest(path + "/" + children.back(), -1));
+            if (likely(keep_child_node.empty() || keep_child_node != children.back()))
+                ops.emplace_back(makeRemoveRequest(path + "/" + children.back(), -1));
             children.pop_back();
         }
         multi(ops);
     }
 }
 
-void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path)
+void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path, const String & keep_child_node)
 {
     Strings children;
     if (tryGetChildren(path, children) != Coordination::Error::ZOK)
@@ -637,14 +638,14 @@ void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path)
         Strings batch;
         for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i)
         {
-            batch.push_back(path + "/" + children.back());
+            String child_path = path + "/" + children.back();
+            tryRemoveChildrenRecursive(child_path);
+            if (likely(keep_child_node.empty() || keep_child_node != children.back()))
+            {
+                batch.push_back(child_path);
+                ops.emplace_back(zkutil::makeRemoveRequest(child_path, -1));
+            }
             children.pop_back();
-            tryRemoveChildrenRecursive(batch.back());
-
-            Coordination::RemoveRequest request;
-            request.path = batch.back();
-
-            ops.emplace_back(std::make_shared<Coordination::RemoveRequest>(std::move(request)));
         }
 
         /// Try to remove the children with a faster method - in bulk. If this fails,
diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index d532da10f2f..fbe1bede91a 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -184,6 +184,12 @@ public:
     /// result would be the same as for the single call.
     void tryRemoveRecursive(const std::string & path);
 
+    /// Similar to removeRecursive(...) and tryRemoveRecursive(...), but does not remove path itself.
+    /// If keep_child_node is not empty, this method will not remove path/keep_child_node (but will remove its subtree).
+    /// It can be useful to keep some child node as a flag which indicates that path is currently removing.
+    void removeChildrenRecursive(const std::string & path, const String & keep_child_node = {});
+    void tryRemoveChildrenRecursive(const std::string & path, const String & keep_child_node = {});
+
     /// Remove all children nodes (non recursive).
     void removeChildren(const std::string & path);
 
@@ -247,9 +253,6 @@ private:
     void init(const std::string & implementation_, const std::string & hosts_, const std::string & identity_,
               int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_);
 
-    void removeChildrenRecursive(const std::string & path);
-    void tryRemoveChildrenRecursive(const std::string & path);
-
     /// The following methods don't throw exceptions but return error codes.
     Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created);
     Coordination::Error removeImpl(const std::string & path, int32_t version);
@@ -328,7 +331,7 @@ public:
         catch (...)
         {
             ProfileEvents::increment(ProfileEvents::CannotRemoveEphemeralNode);
-            DB::tryLogCurrentException(__PRETTY_FUNCTION__);
+            DB::tryLogCurrentException(__PRETTY_FUNCTION__, "Cannot remove " + path + ": ");
         }
     }
 
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index a03cb33591c..195f57d1bda 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -311,7 +311,7 @@ void DatabaseOnDisk::commitCreateTable(const ASTCreateQuery & query, const Stora
     }
 }
 
-void DatabaseOnDisk::detachTablePermanently(const String & table_name)
+void DatabaseOnDisk::detachTablePermanently(const Context &, const String & table_name)
 {
     auto table = detachTable(table_name);
 
diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h
index 60a50ac4539..fefe6e91606 100644
--- a/src/Databases/DatabaseOnDisk.h
+++ b/src/Databases/DatabaseOnDisk.h
@@ -41,7 +41,7 @@ public:
         const StoragePtr & table,
         const ASTPtr & query) override;
 
-    void detachTablePermanently(const String & table_name) override;
+    void detachTablePermanently(const Context & context, const String & table_name) override;
 
     void dropTable(
         const Context & context,
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index a3da271a597..0ac71793e5d 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -39,6 +39,8 @@ namespace ErrorCodes
 }
 
 static constexpr const char * DROPPED_MARK = "DROPPED";
+static constexpr const char * BROKEN_TABLE_PREFIX = "_broken_";
+
 
 zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
 {
@@ -306,13 +308,76 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
     if (new_replica && !empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "It's new replica, but database is not empty");
 
-    if (!new_replica)
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Automatic replica recovery is not implemented");
-
     auto table_name_to_metadata = tryGetConsistentMetadataSnapshot(current_zookeeper, max_log_ptr);
 
+    Strings tables_to_detach;
+    size_t total_tables = 0;
+    auto existing_tables_it = getTablesIterator(global_context, [&](const String & name) { return !startsWith(name, BROKEN_TABLE_PREFIX); });
+    while (existing_tables_it->isValid())
+    {
+        String name = existing_tables_it->name();
+        auto in_zk = table_name_to_metadata.find(name);
+        String local_metadata = readMetadataFile(name);
+        if (in_zk == table_name_to_metadata.end() || in_zk->second != local_metadata)
+        {
+            bool should_detach = true;
+            bool looks_like_replicated = in_zk->second.find("ReplicatedMergeTree") != std::string::npos;
+
+            if (looks_like_replicated)
+            {
+                ParserCreateQuery parser;
+                auto size = global_context.getSettingsRef().max_query_size;
+                auto depth = global_context.getSettingsRef().max_parser_depth;
+                ASTPtr local_create = parseQuery(parser, local_metadata, size, depth);
+                ASTPtr zk_create = parseQuery(parser, in_zk->second, size, depth);
+                if (local_create->as<ASTCreateQuery>()->uuid == zk_create->as<ASTCreateQuery>()->uuid)
+                {
+                    /// For ReplicatedMergeTree tables we can compare only UUIDs to ensure that it's tha same table.
+                    /// Metadata can be different, it's handled on table replication level.
+                    /// TODO maybe we should also compare MergeTree SETTINGS?
+                    should_detach = false;
+                }
+            }
+
+            if (should_detach)
+                tables_to_detach.emplace_back(std::move(name));
+        }
+        existing_tables_it->next();
+        ++total_tables;
+    }
+
+    if (total_tables < tables_to_detach.size() * 2)
+        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "Too many tables to detach: {} of {}", tables_to_detach.size(), total_tables);
+    else if (!tables_to_detach.empty())
+        LOG_WARNING(log, "Will DETACH PERMANENTLY {} broken tables to recover replica", tables_to_detach.size());
+
+    auto db_guard = DatabaseCatalog::instance().getDDLGuard(getDatabaseName(), "");
+    for (const auto & table_name : tables_to_detach)
+    {
+        String to_name = fmt::format("{}_{}_{}_{}", BROKEN_TABLE_PREFIX, table_name, max_log_ptr, thread_local_rng() % 1000);
+        DDLGuardPtr table_guard = DatabaseCatalog::instance().getDDLGuard(getDatabaseName(), std::min(table_name, to_name));
+        DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(getDatabaseName(), std::max(table_name, to_name));
+
+        if (isDictionaryExist(table_name))
+        {
+            /// TODO implement DETACH DICTIONARY PERMANENTLY
+            DatabaseAtomic::removeDictionary(global_context, table_name);
+        }
+        else
+        {
+            DatabaseAtomic::renameTable(global_context, table_name, *this, to_name, false, false);
+            DatabaseAtomic::detachTablePermanently(global_context, to_name);
+        }
+    }
+
     for (const auto & name_and_meta : table_name_to_metadata)
     {
+        if (isTableExist(name_and_meta.first, global_context))
+        {
+            assert(name_and_meta.second == readMetadataFile(name_and_meta.first));
+            continue;
+        }
+
         auto query_ast = parseQueryFromMetadataInZooKeeper(name_and_meta.first, name_and_meta.second);
 
         Context query_context = global_context;
@@ -349,7 +414,7 @@ std::map<String, String> DatabaseReplicated::tryGetConsistentMetadataSnapshot(co
             auto res = futures[i].get();
             if (res.error != Coordination::Error::ZOK)
                 break;
-            table_name_to_metadata.emplace(table_names[i], res.data);
+            table_name_to_metadata.emplace(unescapeForFileName(table_names[i]), res.data);
         }
 
         UInt32 new_max_log_ptr = parse<UInt32>(zookeeper->get(zookeeper_path + "/max_log_ptr"));
@@ -451,18 +516,8 @@ void DatabaseReplicated::renameTable(const Context & context, const String & tab
         if (exchange && !to_database.isTableExist(to_table_name, context))
             throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", to_table_name);
 
-        String statement;
-        String statement_to;
-        {
-            /// NOTE It's not atomic (however, we have only one thread)
-            ReadBufferFromFile in(getObjectMetadataPath(table_name), 4096);
-            readStringUntilEOF(statement, in);
-            if (exchange)
-            {
-                ReadBufferFromFile in_to(to_database.getObjectMetadataPath(to_table_name), 4096);
-                readStringUntilEOF(statement_to, in_to);
-            }
-        }
+        String statement = readMetadataFile(table_name);
+        String statement_to = readMetadataFile(to_table_name);
         String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_name);
         String metadata_zk_path_to = txn->zookeeper_path + "/metadata/" + escapeForFileName(to_table_name);
         txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
@@ -481,6 +536,8 @@ void DatabaseReplicated::commitCreateTable(const ASTCreateQuery & query, const S
                        const String & table_metadata_tmp_path, const String & table_metadata_path,
                        const Context & query_context)
 {
+    if (startsWith(query.table, BROKEN_TABLE_PREFIX))
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "It's not allowed to attach broken tables");
     auto txn = query_context.getMetadataTransaction();
     assert(!ddl_worker->isCurrentlyActive() || txn);
     if (txn && txn->is_initial_query)
@@ -533,4 +590,24 @@ void DatabaseReplicated::removeDictionary(const Context & context, const String
     DatabaseAtomic::removeDictionary(context, dictionary_name);
 }
 
+void DatabaseReplicated::detachTablePermanently(const Context & context, const String & table_name)
+{
+    auto txn = context.getMetadataTransaction();
+    assert(!ddl_worker->isCurrentlyActive() || txn);
+    if (txn && txn->is_initial_query)
+    {
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_name);
+        txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+    }
+    DatabaseAtomic::detachTablePermanently(context, table_name);
+}
+
+String DatabaseReplicated::readMetadataFile(const String & table_name) const
+{
+    String statement;
+    ReadBufferFromFile in(getObjectMetadataPath(table_name), 4096);
+    readStringUntilEOF(statement, in);
+    return statement;
+}
+
 }
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index fffc2b5c98a..2c998a8bc97 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -62,6 +62,7 @@ public:
                           const String & dictionary_name,
                           const ASTPtr & query) override;
     void removeDictionary(const Context & context, const String & dictionary_name) override;
+    void detachTablePermanently(const Context & context, const String & table_name) override;
 
     void drop(const Context & /*context*/) override;
 
@@ -90,6 +91,7 @@ private:
     std::map<String, String> tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr);
 
     ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query);
+    String readMetadataFile(const String & table_name) const;
 
     String zookeeper_path;
     String shard_name;
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index 3162979e787..b29a8822c0c 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -24,13 +24,14 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db
 
 void DatabaseReplicatedDDLWorker::initializeMainThread()
 {
-    while (!initialized && !stop_flag)
+    while (!stop_flag)
     {
         try
         {
             auto zookeeper = getAndSetZooKeeper();
             initializeReplication();
             initialized = true;
+            return;
         }
         catch (...)
         {
diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index fc821fcab30..3a196f827b7 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -249,7 +249,7 @@ public:
 
     /// Forget about the table without deleting it's data, but rename metadata file to prevent reloading it
     /// with next restart. The database may not support this method.
-    virtual void detachTablePermanently(const String & /*name*/)
+    virtual void detachTablePermanently(const Context & /*context*/, const String & /*name*/)
     {
         throw Exception("There is no DETACH TABLE PERMANENTLY query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
     }
diff --git a/src/Databases/MySQL/DatabaseConnectionMySQL.cpp b/src/Databases/MySQL/DatabaseConnectionMySQL.cpp
index 35b016f255b..eeea12ae8f3 100644
--- a/src/Databases/MySQL/DatabaseConnectionMySQL.cpp
+++ b/src/Databases/MySQL/DatabaseConnectionMySQL.cpp
@@ -395,7 +395,7 @@ void DatabaseConnectionMySQL::loadStoredObjects(Context &, bool, bool /*force_at
     }
 }
 
-void DatabaseConnectionMySQL::detachTablePermanently(const String & table_name)
+void DatabaseConnectionMySQL::detachTablePermanently(const Context &, const String & table_name)
 {
     std::lock_guard<std::mutex> lock{mutex};
 
@@ -429,9 +429,9 @@ void DatabaseConnectionMySQL::detachTablePermanently(const String & table_name)
     table_iter->second.second->is_dropped = true;
 }
 
-void DatabaseConnectionMySQL::dropTable(const Context &, const String & table_name, bool /*no_delay*/)
+void DatabaseConnectionMySQL::dropTable(const Context & context, const String & table_name, bool /*no_delay*/)
 {
-    detachTablePermanently(table_name);
+    detachTablePermanently(context, table_name);
 }
 
 DatabaseConnectionMySQL::~DatabaseConnectionMySQL()
diff --git a/src/Databases/MySQL/DatabaseConnectionMySQL.h b/src/Databases/MySQL/DatabaseConnectionMySQL.h
index 3e305fcb20d..d0a5c041d7b 100644
--- a/src/Databases/MySQL/DatabaseConnectionMySQL.h
+++ b/src/Databases/MySQL/DatabaseConnectionMySQL.h
@@ -72,9 +72,9 @@ public:
 
     StoragePtr detachTable(const String & table_name) override;
 
-    void detachTablePermanently(const String & table_name) override;
+    void detachTablePermanently(const Context & context, const String & table_name) override;
 
-    void dropTable(const Context &, const String & table_name, bool no_delay) override;
+    void dropTable(const Context & context, const String & table_name, bool no_delay) override;
 
     void attachTable(const String & table_name, const StoragePtr & storage, const String & relative_table_path) override;
 
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index efaacabf4de..975eaeaca1b 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -315,11 +315,10 @@ void DDLWorker::scheduleTasks()
     {
         /// Main thread of DDLWorker was restarted, probably due to lost connection with ZooKeeper.
         /// We have some unfinished tasks. To avoid duplication of some queries, try to write execution status.
-        bool status_written = task->ops.empty();
         bool task_still_exists = zookeeper->exists(task->entry_path);
+        bool status_written = zookeeper->exists(task->getFinishedNodePath());
         if (task->was_executed && !status_written && task_still_exists)
         {
-            assert(!zookeeper->exists(task->getFinishedNodePath()));
             processTask(*task);
         }
     }
@@ -472,9 +471,16 @@ void DDLWorker::processTask(DDLTaskBase & task)
     String active_node_path = task.getActiveNodePath();
     String finished_node_path = task.getFinishedNodePath();
 
-    String dummy;
-    zookeeper->createAncestors(active_node_path);
-    auto active_node = zkutil::EphemeralNodeHolder::create(active_node_path, *zookeeper, "");
+    auto create_active_res = zookeeper->tryCreate(active_node_path, {}, zkutil::CreateMode::Ephemeral);
+    if (create_active_res != Coordination::Error::ZOK)
+    {
+        if (create_active_res == Coordination::Error::ZNONODE)
+            throw Coordination::Exception(create_active_res, active_node_path);
+        createStatusDirs(task.entry_path, zookeeper);
+        zookeeper->create(active_node_path, {}, zkutil::CreateMode::Ephemeral);
+
+    }
+    auto active_node = zkutil::EphemeralNodeHolder::existing(active_node_path, *zookeeper);
 
     if (!task.was_executed)
     {
@@ -755,7 +761,6 @@ void DDLWorker::cleanupQueue(Int64, const ZooKeeperPtr & zookeeper)
 
         String node_name = *it;
         String node_path = fs::path(queue_dir) / node_name;
-        String lock_path = fs::path(node_path) / "lock";
 
         Coordination::Stat stat;
         String dummy;
@@ -769,39 +774,29 @@ void DDLWorker::cleanupQueue(Int64, const ZooKeeperPtr & zookeeper)
             if (!canRemoveQueueEntry(node_name, stat))
                 continue;
 
-            /// Skip if there are active nodes (it is weak guard)
-            if (zookeeper->exists(fs::path(node_path) / "active", &stat) && stat.numChildren > 0)
+            /// At first we remove entry/active node to prevent staled hosts from executing entry concurrently
+            auto rm_active_res = zookeeper->tryRemove(fs::path(node_path) / "active");
+            if (rm_active_res != Coordination::Error::ZOK && rm_active_res != Coordination::Error::ZNONODE)
             {
-                LOG_INFO(log, "Task {} should be deleted, but there are active workers. Skipping it.", node_name);
-                continue;
-            }
-
-            /// Usage of the lock is not necessary now (tryRemoveRecursive correctly removes node in a presence of concurrent cleaners)
-            /// But the lock will be required to implement system.distributed_ddl_queue table
-            auto lock = createSimpleZooKeeperLock(zookeeper, node_path, "lock", host_fqdn_id);
-            if (!lock->tryLock())
-            {
-                LOG_INFO(log, "Task {} should be deleted, but it is locked. Skipping it.", node_name);
+                if (rm_active_res == Coordination::Error::ZNOTEMPTY)
+                    LOG_DEBUG(log, "Task {} should be deleted, but there are active workers. Skipping it.", node_name);
+                else
+                    LOG_WARNING(log, "Unexpected status code {} on attempt to remove {}/active", rm_active_res, node_name);
                 continue;
             }
 
+            /// Now we can safely delete entry
             LOG_INFO(log, "Task {} is outdated, deleting it", node_name);
 
-            /// Deleting
-            {
-                Strings children = zookeeper->getChildren(node_path);
-                for (const String & child : children)
-                {
-                    if (child != "lock")
-                        zookeeper->tryRemoveRecursive(fs::path(node_path) / child);
-                }
+            /// We recursively delete all nodes except entry/finished to prevent staled hosts from
+            /// creating entry/active node (see createStatusDirs(...))
+            zookeeper->tryRemoveChildrenRecursive(node_path, "finished");
 
-                /// Remove the lock node and its parent atomically
-                Coordination::Requests ops;
-                ops.emplace_back(zkutil::makeRemoveRequest(lock_path, -1));
-                ops.emplace_back(zkutil::makeRemoveRequest(node_path, -1));
-                zookeeper->multi(ops);
-            }
+            /// And then we remove entry and entry/finished in a single transaction
+            Coordination::Requests ops;
+            ops.emplace_back(zkutil::makeRemoveRequest(fs::path(node_path) / "finished", -1));
+            ops.emplace_back(zkutil::makeRemoveRequest(node_path, -1));
+            zookeeper->multi(ops);
         }
         catch (...)
         {
@@ -819,7 +814,7 @@ bool DDLWorker::canRemoveQueueEntry(const String & entry_name, const Coordinatio
 
     /// If too many nodes in task queue (> max_tasks_in_queue), delete oldest one
     UInt32 entry_number = DDLTaskBase::getLogEntryNumber(entry_name);
-    bool node_is_outside_max_window = entry_number < max_id.load(std::memory_order_relaxed) - max_tasks_in_queue;
+    bool node_is_outside_max_window = entry_number + max_tasks_in_queue < max_id.load(std::memory_order_relaxed);
 
     return node_lifetime_is_expired || node_is_outside_max_window;
 }
@@ -828,21 +823,17 @@ bool DDLWorker::canRemoveQueueEntry(const String & entry_name, const Coordinatio
 void DDLWorker::createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper)
 {
     Coordination::Requests ops;
-    {
-        Coordination::CreateRequest request;
-        request.path = fs::path(node_path) / "active";
-        ops.emplace_back(std::make_shared<Coordination::CreateRequest>(std::move(request)));
-    }
-    {
-        Coordination::CreateRequest request;
-        request.path = fs::path(node_path) / "finished";
-        ops.emplace_back(std::make_shared<Coordination::CreateRequest>(std::move(request)));
-    }
+    ops.emplace_back(zkutil::makeCreateRequest(fs::path(node_path) / "active", {}, zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(fs::path(node_path) / "finished", {}, zkutil::CreateMode::Persistent));
+
     Coordination::Responses responses;
     Coordination::Error code = zookeeper->tryMulti(ops, responses);
-    if (code != Coordination::Error::ZOK
-        && code != Coordination::Error::ZNODEEXISTS)
-        throw Coordination::Exception(code);
+    bool both_created = code == Coordination::Error::ZOK;
+    bool both_already_exists = responses.size() == 2 && responses[0]->error == Coordination::Error::ZNODEEXISTS
+                                                     && responses[1]->error == Coordination::Error::ZNODEEXISTS;
+    if (both_created || both_already_exists)
+        return;
+    throw Coordination::Exception(code);
 }
 
 
@@ -877,8 +868,6 @@ String DDLWorker::enqueueQuery(DDLLogEntry & entry)
 void DDLWorker::initializeMainThread()
 {
     assert(!initialized);
-    assert(max_id == 0);
-    assert(current_tasks.empty());
     setThreadName("DDLWorker");
     LOG_DEBUG(log, "Started DDLWorker thread");
 
@@ -896,7 +885,7 @@ void DDLWorker::initializeMainThread()
             if (!Coordination::isHardwareError(e.code))
             {
                 /// A logical error.
-                LOG_ERROR(log, "ZooKeeper error: {}. Failed to start DDLWorker.",getCurrentExceptionMessage(true));
+                LOG_ERROR(log, "ZooKeeper error: {}. Failed to start DDLWorker.", getCurrentExceptionMessage(true));
                 assert(false);  /// Catch such failures in tests with debug build
             }
 
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index ae76e8efd46..9e63c647f71 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -162,7 +162,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
             if (query.permanently)
             {
                 /// Drop table from memory, don't touch data, metadata file renamed and will be skipped during server restart
-                database->detachTablePermanently(table_id.table_name);
+                database->detachTablePermanently(context, table_id.table_name);
             }
             else
             {
diff --git a/tests/integration/test_distributed_ddl/cluster.py b/tests/integration/test_distributed_ddl/cluster.py
index 811eb94bad4..45a159ed2b9 100644
--- a/tests/integration/test_distributed_ddl/cluster.py
+++ b/tests/integration/test_distributed_ddl/cluster.py
@@ -104,8 +104,8 @@ class ClickHouseClusterWithDDLHelpers(ClickHouseCluster):
     def ddl_check_there_are_no_dublicates(instance):
         query = "SELECT max(c), argMax(q, c) FROM (SELECT lower(query) AS q, count() AS c FROM system.query_log WHERE type=2 AND q LIKE '/* ddl_entry=query-%' GROUP BY query)"
         rows = instance.query(query)
-        assert len(rows) > 0 and rows[0][0] == "1", "dublicates on {} {}, query {}".format(instance.name,
-                                                                                           instance.ip_address, query)
+        assert len(rows) > 0 and rows[0][0] == "1", "dublicates on {} {}: {}".format(instance.name,
+                                                                                           instance.ip_address, rows)
 
     @staticmethod
     def insert_reliable(instance, query_insert):

From 53a3e178c7b150f70c13246d54bd938073c6e767 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 10 Feb 2021 23:32:11 +0300
Subject: [PATCH 0946/1238] See comment from Maxim Akhmedov in
 https://t.me/joinchat/VP49ANS8VOcAgG75 at 2021-02-10 22:31 MSK

---
 .github/workflows/codeql-analysis.yml | 32 ---------------------------
 1 file changed, 32 deletions(-)
 delete mode 100644 .github/workflows/codeql-analysis.yml

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
deleted file mode 100644
index 633dd47a2d5..00000000000
--- a/.github/workflows/codeql-analysis.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-# See the example here: https://github.com/github/codeql-action
-
-name: "CodeQL Scanning"
-
-on:
-  schedule:
-    - cron: '0 19 * * *'
-jobs:
-  CodeQL-Build:
-
-    runs-on: self-hosted
-    timeout-minutes: 1440
-
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v2
-      with:
-        fetch-depth: 2
-        submodules: 'recursive'
-
-    - name: Initialize CodeQL
-      uses: github/codeql-action/init@v1
-
-      with:
-        languages: cpp
-
-    - run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-10 g++-10 && mkdir build
-    - run: cd build && CC=gcc-10 CXX=g++-10 cmake ..
-    - run: cd build && ninja
-
-    - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v1

From 53ea58810eb41e31526682aec5e7de935f6d3414 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 11 Feb 2021 00:25:50 +0300
Subject: [PATCH 0947/1238] Do not allow constant folding of explicitly
 forbidden functions

---
 src/Interpreters/ExpressionAnalyzer.cpp                        | 3 +++
 .../0_stateless/01611_constant_folding_subqueries.reference    | 2 ++
 .../queries/0_stateless/01611_constant_folding_subqueries.sql  | 1 +
 3 files changed, 6 insertions(+)

diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 3f65a6f3f58..984249e15cf 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -89,6 +89,9 @@ bool allowEarlyConstantFolding(const ActionsDAG & actions, const Settings & sett
     {
         if (node.type == ActionsDAG::ActionType::FUNCTION && node.function_base)
         {
+            if (!node.function_base->isSuitableForConstantFolding())
+                return false;
+
             auto name = node.function_base->getName();
             if (name == "ignore")
                 return false;
diff --git a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference
index ac91b53b754..d10502c5860 100644
--- a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference
+++ b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference
@@ -7,3 +7,5 @@ EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUI
 SELECT
     identity(cast(0, \'UInt64\')) AS n,
     toUInt64(10 / n)
+SELECT * FROM (WITH (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) as q SELECT * FROM system.one WHERE q > 0);
+0
diff --git a/tests/queries/0_stateless/01611_constant_folding_subqueries.sql b/tests/queries/0_stateless/01611_constant_folding_subqueries.sql
index abf67a8ed6a..59f057d1ec5 100644
--- a/tests/queries/0_stateless/01611_constant_folding_subqueries.sql
+++ b/tests/queries/0_stateless/01611_constant_folding_subqueries.sql
@@ -2,3 +2,4 @@
 SELECT * FROM (SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n)) FORMAT CSV;
 SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) FORMAT CSV;
 EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n);
+SELECT * FROM (WITH (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) as q SELECT * FROM system.one WHERE q > 0);

From 6b82e8ad19be4be3ab4ece53a1c81e1afa54f4c5 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 11 Feb 2021 00:37:08 +0300
Subject: [PATCH 0948/1238] Mark ignore() as not suitable for constant folding

---
 src/Functions/ignore.cpp                | 1 +
 src/Interpreters/ExpressionAnalyzer.cpp | 4 ----
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/Functions/ignore.cpp b/src/Functions/ignore.cpp
index 6b02c3a462d..1348144cb05 100644
--- a/src/Functions/ignore.cpp
+++ b/src/Functions/ignore.cpp
@@ -29,6 +29,7 @@ public:
     }
 
     bool useDefaultImplementationForNulls() const override { return false; }
+    bool isSuitableForConstantFolding() const override { return false; }
 
     /// We should never return LowCardinality result, cause we declare that result is always constant zero.
     /// (in getResultIfAlwaysReturnsConstantAndHasArguments)
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 984249e15cf..8a421d06b72 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -91,10 +91,6 @@ bool allowEarlyConstantFolding(const ActionsDAG & actions, const Settings & sett
         {
             if (!node.function_base->isSuitableForConstantFolding())
                 return false;
-
-            auto name = node.function_base->getName();
-            if (name == "ignore")
-                return false;
         }
     }
     return true;

From 3adadeb12bb7d2f4c9405927a28f9f7a49617d46 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 11 Feb 2021 00:46:33 +0300
Subject: [PATCH 0949/1238] Mark 01513_optimize_aggregation_in_order_memory as
 long

https://clickhouse-test-reports.s3.yandex.net/20301/b6dc721e332e30c7e6dde40282441dd59cfa276e/functional_stateless_tests_flaky_check_(address).html#fail1
---
 ... => 01513_optimize_aggregation_in_order_memory_long.reference} | 0
 ...ry.sql => 01513_optimize_aggregation_in_order_memory_long.sql} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/queries/0_stateless/{01513_optimize_aggregation_in_order_memory.reference => 01513_optimize_aggregation_in_order_memory_long.reference} (100%)
 rename tests/queries/0_stateless/{01513_optimize_aggregation_in_order_memory.sql => 01513_optimize_aggregation_in_order_memory_long.sql} (100%)

diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.reference b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.reference
similarity index 100%
rename from tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.reference
rename to tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.reference
diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql
similarity index 100%
rename from tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql
rename to tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql

From f442b30f3056e495faadae2076df12af0516dfa8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 11 Feb 2021 01:23:27 +0300
Subject: [PATCH 0950/1238] Fix test

---
 .../CompressedReadBufferFromFile.cpp          |  1 -
 src/Functions/array/arrayDifference.cpp       |  3 +-
 src/Server/TCPHandler.cpp                     | 64 +++++++++++--------
 3 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp
index eba8ba68438..54f360f417b 100644
--- a/src/Compression/CompressedReadBufferFromFile.cpp
+++ b/src/Compression/CompressedReadBufferFromFile.cpp
@@ -108,7 +108,6 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
         /// If the decompressed block fits entirely where it needs to be copied.
         if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
         {
-            //std::cerr << "readBig " << file_in.getFileName() << "\n";
             decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum);
             bytes_read += size_decompressed;
             bytes += size_decompressed;
diff --git a/src/Functions/array/arrayDifference.cpp b/src/Functions/array/arrayDifference.cpp
index c02533c2564..2c71c58867f 100644
--- a/src/Functions/array/arrayDifference.cpp
+++ b/src/Functions/array/arrayDifference.cpp
@@ -83,9 +83,9 @@ struct ArrayDifferenceImpl
         }
         res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
         return true;
-
     }
 
+
     static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
     {
         ColumnPtr res;
@@ -107,7 +107,6 @@ struct ArrayDifferenceImpl
         else
             throw Exception("Unexpected column for arrayDifference: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN);
     }
-
 };
 
 struct NameArrayDifference { static constexpr auto name = "arrayDifference"; };
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 49f6eda3bab..9d6d42d9799 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -213,9 +213,6 @@ void TCPHandler::runImpl()
                 /// Get blocks of temporary tables
                 readData(connection_settings);
 
-                if (state.io.out)
-                    state.io.out->writeSuffix();
-
                 /// Reset the input stream, as we received an empty block while receiving external table data.
                 /// So, the stream has been marked as cancelled and we can't read from it anymore.
                 state.block_in.reset();
@@ -1183,34 +1180,45 @@ bool TCPHandler::receiveData(bool scalar)
     if (block)
     {
         if (scalar)
+        {
+            /// Scalar value
             query_context->addScalar(temporary_id.table_name, block);
+        }
+        else if (!state.need_receive_data_for_insert && !state.need_receive_data_for_input)
+        {
+            /// Data for external tables
+
+            auto resolved = query_context->tryResolveStorageID(temporary_id, Context::ResolveExternal);
+            StoragePtr storage;
+            /// If such a table does not exist, create it.
+            if (resolved)
+            {
+                storage = DatabaseCatalog::instance().getTable(resolved, *query_context);
+            }
+            else
+            {
+                NamesAndTypesList columns = block.getNamesAndTypesList();
+                std::cerr << columns.toString() << "\n";
+                auto temporary_table = TemporaryTableHolder(*query_context, ColumnsDescription{columns}, {});
+                storage = temporary_table.getTable();
+                query_context->addExternalTable(temporary_id.table_name, std::move(temporary_table));
+            }
+            auto metadata_snapshot = storage->getInMemoryMetadataPtr();
+            /// The data will be written directly to the table.
+            auto temporary_table_out = storage->write(ASTPtr(), metadata_snapshot, *query_context);
+            temporary_table_out->write(block);
+            temporary_table_out->writeSuffix();
+
+        }
+        else if (state.need_receive_data_for_input)
+        {
+            /// 'input' table function.
+            state.block_for_input = block;
+        }
         else
         {
-            /// If there is an insert request, then the data should be written directly to `state.io.out`.
-            /// Otherwise, we write the blocks in the temporary `external_table_name` table.
-            if (!state.need_receive_data_for_insert && !state.need_receive_data_for_input && !state.io.out)
-            {
-                auto resolved = query_context->tryResolveStorageID(temporary_id, Context::ResolveExternal);
-                StoragePtr storage;
-                /// If such a table does not exist, create it.
-                if (resolved)
-                    storage = DatabaseCatalog::instance().getTable(resolved, *query_context);
-                else
-                {
-                    NamesAndTypesList columns = block.getNamesAndTypesList();
-                    auto temporary_table = TemporaryTableHolder(*query_context, ColumnsDescription{columns}, {});
-                    storage = temporary_table.getTable();
-                    query_context->addExternalTable(temporary_id.table_name, std::move(temporary_table));
-                }
-                auto metadata_snapshot = storage->getInMemoryMetadataPtr();
-                /// The data will be written directly to the table.
-                state.io.out = storage->write(ASTPtr(), metadata_snapshot, *query_context);
-            }
-
-            if (state.need_receive_data_for_input)
-                state.block_for_input = block;
-            else
-                state.io.out->write(block);
+            /// INSERT query.
+            state.io.out->write(block);
         }
         return true;
     }

From dbb41ce4e24529e4dbb7b47e0a607e3037a3708b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 11 Feb 2021 02:55:52 +0300
Subject: [PATCH 0951/1238] Remove debug output

---
 src/Server/TCPHandler.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 9d6d42d9799..d66639ef111 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1198,7 +1198,6 @@ bool TCPHandler::receiveData(bool scalar)
             else
             {
                 NamesAndTypesList columns = block.getNamesAndTypesList();
-                std::cerr << columns.toString() << "\n";
                 auto temporary_table = TemporaryTableHolder(*query_context, ColumnsDescription{columns}, {});
                 storage = temporary_table.getTable();
                 query_context->addExternalTable(temporary_id.table_name, std::move(temporary_table));

From e73a6c9660224f59da6413d4205d153f43bd2396 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 11 Feb 2021 03:03:21 +0300
Subject: [PATCH 0952/1238] Temporary disable LIVE VIEW test

---
 ...g_avg.py => 00979_live_view_watch_live_moving_avg.py.disabled} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/queries/0_stateless/{00979_live_view_watch_live_moving_avg.py => 00979_live_view_watch_live_moving_avg.py.disabled} (100%)

diff --git a/tests/queries/0_stateless/00979_live_view_watch_live_moving_avg.py b/tests/queries/0_stateless/00979_live_view_watch_live_moving_avg.py.disabled
similarity index 100%
rename from tests/queries/0_stateless/00979_live_view_watch_live_moving_avg.py
rename to tests/queries/0_stateless/00979_live_view_watch_live_moving_avg.py.disabled

From e9586cc44e170090b8faf474c5f76465b60daaa5 Mon Sep 17 00:00:00 2001
From: bharatnc <bharatnc@gmail.com>
Date: Wed, 10 Feb 2021 19:13:19 -0800
Subject: [PATCH 0953/1238] Document ALTER RENAME Column

---
 .../en/sql-reference/statements/alter/column.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md
index 0ea4d4b3dc5..5933cb8bce9 100644
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@@ -24,6 +24,7 @@ The following actions are supported:
 -   [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
 -   [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL.
 -   [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties.
+-   [RENAME COLUMN](#alter_rename-column) — Renames an existing column.
 
 These actions are described in detail below.
 
@@ -166,6 +167,22 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
 
 - [REMOVE TTL](ttl.md).
 
+## RENAME COLUMN {#alter_rename-column}
+
+Renames an existing column.
+
+Syntax:
+
+```sql
+ALTER TABLE table_name RENAME COLUMN column_name TO new_column_name;
+```
+
+**Example**
+
+```sql
+ALTER TABLE table_with_ttl RENAME COLUMN column_ttl TO column_ttl_new;
+```
+
 ## Limitations {#alter-query-limitations}
 
 The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot.

From dac0c0fa9547a3b85c422a35ad9191017595b76e Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 11 Feb 2021 09:56:14 +0300
Subject: [PATCH 0954/1238] Mark 01641_memory_tracking_insert_optimize as long

https://clickhouse-test-reports.s3.yandex.net/20301/3adadeb12bb7d2f4c9405927a28f9f7a49617d46/functional_stateless_tests_flaky_check_(address).html#fail1
---
 ...rence => 01641_memory_tracking_insert_optimize_long.reference} | 0
 ...ptimize.sql => 01641_memory_tracking_insert_optimize_long.sql} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/queries/0_stateless/{01641_memory_tracking_insert_optimize.reference => 01641_memory_tracking_insert_optimize_long.reference} (100%)
 rename tests/queries/0_stateless/{01641_memory_tracking_insert_optimize.sql => 01641_memory_tracking_insert_optimize_long.sql} (100%)

diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.reference b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.reference
similarity index 100%
rename from tests/queries/0_stateless/01641_memory_tracking_insert_optimize.reference
rename to tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.reference
diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.sql
similarity index 100%
rename from tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql
rename to tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.sql

From 2905df831f9119d414c44a8eedd8df9012825889 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Thu, 11 Feb 2021 10:15:18 +0300
Subject: [PATCH 0955/1238] JSON deteted

---
 docs/en/sql-reference/data-types/map.md       | 40 +++++++++++++---
 .../functions/tuple-map-functions.md          |  4 +-
 docs/ru/sql-reference/data-types/map.md       | 46 ++++++++++++-------
 .../functions/tuple-map-functions.md          |  4 +-
 4 files changed, 66 insertions(+), 28 deletions(-)

diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md
index 0f0f69d421d..58634e5b669 100644
--- a/docs/en/sql-reference/data-types/map.md
+++ b/docs/en/sql-reference/data-types/map.md
@@ -5,7 +5,7 @@ toc_title: Map(key, value)
 
 # Map(key, value) {#data_type-map}
 
-`Map(key, value)` data type stores `key:value` pairs in structures like JSON. 
+`Map(key, value)` data type stores `key:value` pairs. 
 
 **Parameters** 
 -   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
@@ -14,24 +14,50 @@ toc_title: Map(key, value)
 !!! warning "Warning"
     Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
 
-To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax.
+To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity.
 
-**Example**
+**Examples**
 
-Query:
+Consider the table:
 
 ``` sql
 CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
-INSERT INTO table_map VALUES ({'key1':1, 'key2':100}), ({'key1':2,'key2':200}), ({'key1':3,'key2':300});
+INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30});
+```
+
+Select all `key2` values: 
+
+```sql
 SELECT a['key2'] FROM table_map;
 ```
 Result:
 
 ```text
 ┌─arrayElement(a, 'key2')─┐
+│                      10 │
+│                      20 │
+│                      30 │
+└─────────────────────────┘
+```
+
+If there's no such `key` in the `Map()` column, the query returns zeros for numerical values, empty strings or empty arrays. 
+
+```sql
+INSERT INTO table_map VALUES ({'key3':100}), ({});
+SELECT a['key3'] FROM table_map;
+```
+
+Result:
+
+```text
+┌─arrayElement(a, 'key3')─┐
 │                     100 │
-│                     200 │
-│                     300 │
+│                       0 │
+└─────────────────────────┘
+┌─arrayElement(a, 'key3')─┐
+│                       0 │
+│                       0 │
+│                       0 │
 └─────────────────────────┘
 ```
 
diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md
index b81f971196a..18d008f11f2 100644
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@@ -7,7 +7,7 @@ toc_title: Working with maps
 
 ## map {#function-map}
 
-Arranges `key:value` pairs into a JSON data structure.
+Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types/map.md) data type.
 
 **Syntax** 
 
@@ -22,7 +22,7 @@ map(key1, value1[, key2, value2, ...])
 
 **Returned value**
 
--   JSON with `key:value` pairs.
+-  Data structure as `key:value` pairs.
 
 Type: [Map(key, value)](../../sql-reference/data-types/map.md).
 
diff --git a/docs/ru/sql-reference/data-types/map.md b/docs/ru/sql-reference/data-types/map.md
index c1391e37133..9c2ffedc4a9 100644
--- a/docs/ru/sql-reference/data-types/map.md
+++ b/docs/ru/sql-reference/data-types/map.md
@@ -5,7 +5,7 @@ toc_title: Map(key, value)
 
 # Map(key, value) {#data_type-map}
 
-Тип данных `Map(key, value)` хранит пары `ключ:значение` в структурах типа JSON. 
+Тип данных `Map(key, value)` хранит пары `ключ:значение`. 
 
 **Параметры** 
 -   `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md).
@@ -14,39 +14,51 @@ toc_title: Map(key, value)
 !!! warning "Предупреждение"
     Сейчас использование типа данных `Map` является экспериментальной возможностью. Чтобы использовать этот тип данных, включите настройку `allow_experimental_map_type = 1`.
 
-Чтобы получить значение из колонки `a Map('key', 'value')`, используйте синтаксис `a['key']`.
+Чтобы получить значение из колонки `a Map('key', 'value')`, используйте синтаксис `a['key']`. В настоящее время такая подстановка работает по алгоритму с линейной сложностью.
 
-**Пример**
+**Примеры**
 
-Запрос:
+Рассмотрим таблицу:
 
 ``` sql
 CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
-INSERT INTO table_map VALUES ({'key1':1, 'key2':100}), ({'key1':2,'key2':200}), ({'key1':3,'key2':300});
+INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30});
+```
+
+Выборка всем значений ключа `key2`: 
+
+```sql
 SELECT a['key2'] FROM table_map;
 ```
 Результат:
 
 ```text
 ┌─arrayElement(a, 'key2')─┐
-│                     100 │
-│                     200 │
-│                     300 │
+│                      10 │
+│                      20 │
+│                      30 │
 └─────────────────────────┘
 ```
 
-## Преобразование типа данных Tuple в Map {#map-and-tuple}
+Если для какого-то ключа `key` в колонке с типом `Map()` нет значения, запрос возвращает нули для числовых колонок, пустые строки или пустые массивы. 
 
-Для преобразования данных с типом `Tuple()` в тип `Map()` можно использовать функцию [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast):
-
-``` sql
-SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
+```sql
+INSERT INTO table_map VALUES ({'key3':100}), ({});
+SELECT a['key3'] FROM table_map;
 ```
 
-``` text
-┌─map───────────────────────────┐
-│ {1:'Ready',2:'Steady',3:'Go'} │
-└───────────────────────────────┘
+Результат:
+
+```text
+┌─arrayElement(a, 'key3')─┐
+│                     100 │
+│                       0 │
+└─────────────────────────┘
+┌─arrayElement(a, 'key3')─┐
+│                       0 │
+│                       0 │
+│                       0 │
+└─────────────────────────┘
 ```
 
 **См. также**
diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md
index 65e44698008..a36613280a1 100644
--- a/docs/ru/sql-reference/functions/tuple-map-functions.md
+++ b/docs/ru/sql-reference/functions/tuple-map-functions.md
@@ -7,7 +7,7 @@ toc_title: Работа с контейнерами map
 
 ## map {#function-map}
 
-Преобразовывает пары `ключ:значение` в структуру JSON.
+Преобразовывает пары `ключ:значение` в тип данных [Map(key, value)](../../sql-reference/data-types/map.md).
 
 **Синтаксис** 
 
@@ -22,7 +22,7 @@ map(key1, value1[, key2, value2, ...])
 
 **Возвращаемое значение**
 
--   Структура JSON с парами `ключ:значение`.
+-   Структура данных в виде пар `ключ:значение`.
 
 Тип: [Map(key, value)](../../sql-reference/data-types/map.md).
 

From 064deaf3c1bf6dabf461b4cda124fcb2779bbea6 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 11 Feb 2021 01:10:42 +0300
Subject: [PATCH 0956/1238] Fix 00738_lock_for_inner_table flakiness

It is possible to execute DROP just before an INSERT will acquire the
lock for the underlying table, and then the test will fail [1]:

    2021-02-09 13:03:27 00738_lock_for_inner_table:                                             [ FAIL ] 3.18 sec. - having stderror:
    2021-02-09 13:03:27 [3eff0fc65d1a] 2021.02.09 13:03:27.440841 [ 220384 ] {test_00738} <Error> executeQuery: Code: 60, e.displayText() = DB::Exception: Table default.`.inner_id.00000738-1000-4000-8000-000000000001` (9647fbaa-a80d-420e-9240-30f5719a84e7) doesn't exist (version 21.3.1.5956) (from [::1]:56964) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01701_if_tuple_segfault.sql') (in query: INSERT INTO tab_00738 SELECT number FROM numbers(10000000)), Stack trace (when copying this message, always include the lines below):
    2021-02-09 13:03:27
    2021-02-09 13:03:27 0. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/exception:133: std::exception::capture() @ 0x10d0a908 in /usr/bin/clickhouse
    2021-02-09 13:03:27 1. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/exception:111: std::exception::exception() @ 0x10d0a8d5 in /usr/bin/clickhouse
    2021-02-09 13:03:27 2. ./obj-x86_64-linux-gnu/../contrib/poco/Foundation/src/Exception.cpp:27: Poco::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int) @ 0x1e5b4943 in /usr/bin/clickhouse
    2021-02-09 13:03:27 3. ./obj-x86_64-linux-gnu/../src/Common/Exception.cpp:54: DB::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int, bool) @ 0x10cec720 in /usr/bin/clickhouse
    2021-02-09 13:03:27 4. ./obj-x86_64-linux-gnu/../src/Common/Exception.h:38: DB::Exception::Exception<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&&) @ 0x10e82041 in /usr/bin/clickhouse
    2021-02-09 13:03:27 5. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/optional:324: void std::__1::__optional_storage_base<DB::Exception, false>::__construct<int const&, char const (&) [23], std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(int const&, char const (&) [23], std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&&) @ 0x19940df9 in /usr/bin/clickhouse
    2021-02-09 13:03:27 6. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/optional:830: DB::Exception& std::__1::optional<DB::Exception>::emplace<int const&, char const (&) [23], std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, void>(int const&, char const (&) [23], std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&&) @ 0x19939b7a in /usr/bin/clickhouse
    2021-02-09 13:03:27 7. ./obj-x86_64-linux-gnu/../src/Interpreters/DatabaseCatalog.cpp:233: DB::DatabaseCatalog::getTableImpl(DB::StorageID const&, DB::Context const&, std::__1::optional<DB::Exception>*) const @ 0x1992efcf in /usr/bin/clickhouse
    2021-02-09 13:03:27 8. ./obj-x86_64-linux-gnu/../src/Interpreters/DatabaseCatalog.cpp:641: DB::DatabaseCatalog::getTable(DB::StorageID const&, DB::Context const&) const @ 0x19932fba in /usr/bin/clickhouse
    2021-02-09 13:03:27 9. ./obj-x86_64-linux-gnu/../src/Storages/StorageMaterializedView.cpp:376: DB::StorageMaterializedView::getTargetTable() const @ 0x1a5fe2bf in /usr/bin/clickhouse
    2021-02-09 13:03:27 10. ./obj-x86_64-linux-gnu/../src/DataStreams/PushingToViewsBlockOutputStream.cpp:88: DB::PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(std::__1::shared_ptr<DB::IStorage> const&, std::__1::shared_ptr<DB::StorageInMemoryMetadata const> const&, DB::Context const&, std::__1::shared_ptr<DB::IAST> const&, bool) @ 0x19e26530 in /usr/bin/clickhouse

And if you will take a look at the 88 line, you will see that this is
just a timing issue.

  [1]: https://clickhouse-test-reports.s3.yandex.net/19673/7bddaba9208232f54095712f0cbfa44c6a5e2564/functional_stateless_tests_(antlr_debug).html#fail1
---
 .../0_stateless/00738_lock_for_inner_table.sh  | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/00738_lock_for_inner_table.sh b/tests/queries/0_stateless/00738_lock_for_inner_table.sh
index 9540d566ac3..45a28cf2967 100755
--- a/tests/queries/0_stateless/00738_lock_for_inner_table.sh
+++ b/tests/queries/0_stateless/00738_lock_for_inner_table.sh
@@ -5,9 +5,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-echo "DROP TABLE IF EXISTS tab_00738;
-DROP TABLE IF EXISTS mv;
-CREATE TABLE tab_00738(a Int) ENGINE = Log;
+echo "DROP TABLE IF EXISTS tab_00738 SYNC;
+DROP TABLE IF EXISTS mv SYNC;
+-- create table with fsync and 20 partitions for slower INSERT
+-- (since increasing number of records will make it significantly slower in debug build, but not in release)
+CREATE TABLE tab_00738(a Int) ENGINE = MergeTree() ORDER BY a PARTITION BY a%20 SETTINGS fsync_after_insert=1;
 CREATE MATERIALIZED VIEW mv UUID '00000738-1000-4000-8000-000000000001' ENGINE = Log AS SELECT a FROM tab_00738;" | ${CLICKHOUSE_CLIENT} -n
 
 ${CLICKHOUSE_CLIENT} --query_id test_00738 --query "INSERT INTO tab_00738 SELECT number FROM numbers(10000000)" &
@@ -20,6 +22,16 @@ function drop()
 function wait_for_query_to_start()
 {
     while [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'test_00738'") == 0 ]]; do sleep 0.001; done
+
+    # The query is already started, but there is no guarantee that it locks the underlying table already.
+    # Wait until PushingToViewsBlockOutputStream will acquire the lock of the underlying table for the INSERT query.
+    # (assume that 0.5 second is enough for this, but this is not 100% correct)
+    sleep 0.5
+
+    # query already finished, fail
+    if [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'test_00738'") == 0 ]]; then
+        exit 2
+    fi
 }
 
 export -f wait_for_query_to_start

From ed7e5a26be84e5041c31e2d7a2374d9ce517aa1c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 11 Feb 2021 10:16:13 +0300
Subject: [PATCH 0957/1238] Generate UUID based on random current database in
 00738_lock_for_inner_table

---
 tests/queries/0_stateless/00738_lock_for_inner_table.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00738_lock_for_inner_table.sh b/tests/queries/0_stateless/00738_lock_for_inner_table.sh
index 45a28cf2967..9308e3e07db 100755
--- a/tests/queries/0_stateless/00738_lock_for_inner_table.sh
+++ b/tests/queries/0_stateless/00738_lock_for_inner_table.sh
@@ -5,18 +5,22 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
+# there are some issues with Atomic database, let's generate it uniq
+# otherwise flaky check will not pass.
+uuid=$(${CLICKHOUSE_CLIENT} --query "SELECT reinterpretAsUUID(currentDatabase())")
+
 echo "DROP TABLE IF EXISTS tab_00738 SYNC;
 DROP TABLE IF EXISTS mv SYNC;
 -- create table with fsync and 20 partitions for slower INSERT
 -- (since increasing number of records will make it significantly slower in debug build, but not in release)
 CREATE TABLE tab_00738(a Int) ENGINE = MergeTree() ORDER BY a PARTITION BY a%20 SETTINGS fsync_after_insert=1;
-CREATE MATERIALIZED VIEW mv UUID '00000738-1000-4000-8000-000000000001' ENGINE = Log AS SELECT a FROM tab_00738;" | ${CLICKHOUSE_CLIENT} -n
+CREATE MATERIALIZED VIEW mv UUID '$uuid' ENGINE = Log AS SELECT a FROM tab_00738;" | ${CLICKHOUSE_CLIENT} -n
 
 ${CLICKHOUSE_CLIENT} --query_id test_00738 --query "INSERT INTO tab_00738 SELECT number FROM numbers(10000000)" &
 
 function drop()
 {
-    ${CLICKHOUSE_CLIENT} --query "DROP TABLE \`.inner_id.00000738-1000-4000-8000-000000000001\`" -n
+    ${CLICKHOUSE_CLIENT} --query "DROP TABLE \`.inner_id.$uuid\`" -n
 }
 
 function wait_for_query_to_start()

From 6845eb36fa5acff1c9eafe82ac651aa8e22db1b0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 11 Feb 2021 10:19:28 +0300
Subject: [PATCH 0958/1238] Generate query_id based on current database in
 00738_lock_for_inner_table

For flaky checker
---
 tests/queries/0_stateless/00738_lock_for_inner_table.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/00738_lock_for_inner_table.sh b/tests/queries/0_stateless/00738_lock_for_inner_table.sh
index 9308e3e07db..d19288f65d8 100755
--- a/tests/queries/0_stateless/00738_lock_for_inner_table.sh
+++ b/tests/queries/0_stateless/00738_lock_for_inner_table.sh
@@ -16,7 +16,7 @@ DROP TABLE IF EXISTS mv SYNC;
 CREATE TABLE tab_00738(a Int) ENGINE = MergeTree() ORDER BY a PARTITION BY a%20 SETTINGS fsync_after_insert=1;
 CREATE MATERIALIZED VIEW mv UUID '$uuid' ENGINE = Log AS SELECT a FROM tab_00738;" | ${CLICKHOUSE_CLIENT} -n
 
-${CLICKHOUSE_CLIENT} --query_id test_00738 --query "INSERT INTO tab_00738 SELECT number FROM numbers(10000000)" &
+${CLICKHOUSE_CLIENT} --query_id insert_$CLICKHOUSE_DATABASE --query "INSERT INTO tab_00738 SELECT number FROM numbers(10000000)" &
 
 function drop()
 {
@@ -25,7 +25,7 @@ function drop()
 
 function wait_for_query_to_start()
 {
-    while [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'test_00738'") == 0 ]]; do sleep 0.001; done
+    while [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'insert_$CLICKHOUSE_DATABASE'") == 0 ]]; do sleep 0.001; done
 
     # The query is already started, but there is no guarantee that it locks the underlying table already.
     # Wait until PushingToViewsBlockOutputStream will acquire the lock of the underlying table for the INSERT query.
@@ -33,7 +33,7 @@ function wait_for_query_to_start()
     sleep 0.5
 
     # query already finished, fail
-    if [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'test_00738'") == 0 ]]; then
+    if [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'insert_$CLICKHOUSE_DATABASE'") == 0 ]]; then
         exit 2
     fi
 }

From 222a0db3f45a434a2c7f6163498c85835316c9ef Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 11 Feb 2021 10:30:08 +0300
Subject: [PATCH 0959/1238] Update tests expectations for
 early_constant_folding

---
 tests/queries/0_stateless/00597_push_down_predicate.reference   | 1 +
 .../queries/0_stateless/01029_early_constant_folding.reference  | 2 +-
 tests/queries/0_stateless/01029_early_constant_folding.sql      | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00597_push_down_predicate.reference b/tests/queries/0_stateless/00597_push_down_predicate.reference
index cea533d6ccb..794d9e7af5f 100644
--- a/tests/queries/0_stateless/00597_push_down_predicate.reference
+++ b/tests/queries/0_stateless/00597_push_down_predicate.reference
@@ -115,6 +115,7 @@ FROM
     SELECT
         1 AS id,
         identity(cast(1, \'UInt8\')) AS subquery
+    WHERE subquery = 1
 )
 WHERE subquery = 1
 1	1
diff --git a/tests/queries/0_stateless/01029_early_constant_folding.reference b/tests/queries/0_stateless/01029_early_constant_folding.reference
index 7e2f6c7ce76..8a1d4cec388 100644
--- a/tests/queries/0_stateless/01029_early_constant_folding.reference
+++ b/tests/queries/0_stateless/01029_early_constant_folding.reference
@@ -2,7 +2,7 @@ SELECT 1
 WHERE 0
 SELECT 1
 SELECT 1
-WHERE 0
+WHERE (1 IN (0, 2)) AND (2 = (identity(cast(2, \'UInt8\')) AS subquery))
 SELECT 1
 WHERE 1 IN (
 (
diff --git a/tests/queries/0_stateless/01029_early_constant_folding.sql b/tests/queries/0_stateless/01029_early_constant_folding.sql
index 428c3625295..6336b62e080 100644
--- a/tests/queries/0_stateless/01029_early_constant_folding.sql
+++ b/tests/queries/0_stateless/01029_early_constant_folding.sql
@@ -4,7 +4,7 @@ EXPLAIN SYNTAX SELECT 1 WHERE 1 = 0;
 
 EXPLAIN SYNTAX SELECT 1 WHERE 1 IN (0, 1, 2);
 
-EXPLAIN SYNTAX SELECT 1 WHERE 1 IN (0, 2) AND 2 = (SELECT 2);
+EXPLAIN SYNTAX SELECT 1 WHERE 1 IN (0, 2) AND 2 = ((SELECT 2) AS subquery);
 
 -- no constant folding
 

From 308fbd2ba5b908ba019c077dccaa8e4af825a1e8 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Thu, 11 Feb 2021 10:45:51 +0300
Subject: [PATCH 0960/1238] Update run-fuzzer.sh

---
 docker/test/fuzzer/run-fuzzer.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index 9af401238a3..766fec76179 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -190,7 +190,7 @@ case "$stage" in
         # Lost connection to the server. This probably means that the server died
         # with abort.
         echo "failure" > status.txt
-        if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*" server.log > description.txt
+        if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
         then
             echo "Lost connection to server. See the logs." > description.txt
         fi

From 363007b9649cd4add7123ca99c160ca91d50ce6f Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 11 Feb 2021 11:39:39 +0300
Subject: [PATCH 0961/1238] fixes

---
 src/Interpreters/InterpreterSelectQuery.cpp   | 44 ++++++++++++-------
 src/Processors/Transforms/WindowTransform.cpp |  4 +-
 .../01591_window_functions.reference          |  8 ++++
 .../0_stateless/01591_window_functions.sql    |  7 +++
 4 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index f78ca478fb8..84de6fa4e6c 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1847,26 +1847,36 @@ static bool windowDescriptionComparator(const WindowDescription * _left,
         {
             return true;
         }
-
-        if (left[i].column_number < right[i].column_number)
-        {
-            return true;
-        }
-
-        if (left[i].direction < right[i].direction)
-        {
-            return true;
-        }
-
-        if (left[i].nulls_direction < right[i].nulls_direction)
-        {
-            return true;
-        }
-
-        if (left[i] != right[i])
+        else if (left[i].column_name > right[i].column_name)
         {
             return false;
         }
+        else if (left[i].column_number < right[i].column_number)
+        {
+            return true;
+        }
+        else if (left[i].column_number > right[i].column_number)
+        {
+            return false;
+        }
+        else if (left[i].direction < right[i].direction)
+        {
+            return true;
+        }
+        else if (left[i].direction > right[i].direction)
+        {
+            return false;
+        }
+        else if (left[i].nulls_direction < right[i].nulls_direction)
+        {
+            return true;
+        }
+        else if (left[i].nulls_direction > right[i].nulls_direction)
+        {
+            return false;
+        }
+
+        assert(left[i] == right[i]);
     }
 
     // Note that we check the length last, because we want to put together the
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 7a53d328c50..7fc9b56c3d5 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -798,10 +798,10 @@ void WindowTransform::updateAggregationState()
             // For now, add the values one by one.
             auto * columns = ws.argument_columns.data();
             // Removing arena.get() from the loop makes it faster somehow...
-            auto * _arena = arena.get();
+            auto * arena_ = arena.get();
             for (auto row = first_row; row < past_the_end_row; ++row)
             {
-                a->add(buf, columns, row, _arena);
+                a->add(buf, columns, row, arena_);
             }
         }
     }
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 46cbaa4e998..217a8571d5f 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -912,3 +912,11 @@ Expression ((Projection + Before ORDER BY))
                   Expression ((Before window functions + (Projection + Before ORDER BY)))
                     SettingQuotaAndLimits (Set limits and quota after reading from storage)
                       ReadFromStorage (SystemNumbers)
+-- A test case for the sort comparator found by fuzzer.
+SELECT
+    max(number) OVER (ORDER BY number DESC NULLS FIRST),
+    max(number) OVER (ORDER BY number ASC NULLS FIRST)
+FROM numbers(2)
+;
+1	0
+1	1
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 04fd48bde9f..8742562a621 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -308,3 +308,10 @@ from
     (select number, intDiv(number, 3) p, mod(number, 5) o
         from numbers(16)) t
 ;
+
+-- A test case for the sort comparator found by fuzzer.
+SELECT
+    max(number) OVER (ORDER BY number DESC NULLS FIRST),
+    max(number) OVER (ORDER BY number ASC NULLS FIRST)
+FROM numbers(2)
+;

From b49b7f859d0c7edeee539286cdc4051226971e78 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 11 Feb 2021 12:17:57 +0300
Subject: [PATCH 0962/1238] Simplify startup with fixed config

---
 src/Coordination/InMemoryStateManager.cpp     | 38 +++++++-
 src/Coordination/InMemoryStateManager.h       | 14 ++-
 src/Coordination/NuKeeperServer.cpp           | 39 +-------
 src/Coordination/NuKeeperServer.h             | 15 +---
 .../NuKeeperStorageDispatcher.cpp             | 90 ++-----------------
 .../configs/enable_test_keeper1.xml           |  6 +-
 .../configs/enable_test_keeper2.xml           |  6 +-
 .../configs/enable_test_keeper3.xml           |  6 +-
 8 files changed, 67 insertions(+), 147 deletions(-)

diff --git a/src/Coordination/InMemoryStateManager.cpp b/src/Coordination/InMemoryStateManager.cpp
index 15a1f7aa622..d90c7e46f0d 100644
--- a/src/Coordination/InMemoryStateManager.cpp
+++ b/src/Coordination/InMemoryStateManager.cpp
@@ -1,16 +1,46 @@
 #include <Coordination/InMemoryStateManager.h>
+#include <Common/Exception.h>
 
 namespace DB
 {
 
-InMemoryStateManager::InMemoryStateManager(int my_server_id_, const std::string & endpoint_)
+namespace ErrorCodes
+{
+    extern const int RAFT_ERROR;
+}
+
+InMemoryStateManager::InMemoryStateManager(
+    int my_server_id_,
+    const std::string & config_prefix,
+    const Poco::Util::AbstractConfiguration & config)
     : my_server_id(my_server_id_)
-    , endpoint(endpoint_)
     , log_store(nuraft::cs_new<InMemoryLogStore>())
-    , server_config(nuraft::cs_new<nuraft::srv_config>(my_server_id, endpoint))
     , cluster_config(nuraft::cs_new<nuraft::cluster_config>())
 {
-    cluster_config->get_servers().push_back(server_config);
+    Poco::Util::AbstractConfiguration::Keys keys;
+    config.keys(config_prefix, keys);
+
+    for (const auto & server_key : keys)
+    {
+        std::string full_prefix = config_prefix + "." + server_key;
+        int server_id = config.getInt(full_prefix + ".id");
+        std::string hostname = config.getString(full_prefix + ".hostname");
+        int port = config.getInt(full_prefix + ".port");
+        bool can_become_leader = config.getBool(full_prefix + ".can_become_leader", true);
+        int32_t priority = config.getInt(full_prefix + ".priority", 1);
+
+        auto endpoint = hostname + ":" + std::to_string(port);
+        auto peer_config = nuraft::cs_new<nuraft::srv_config>(server_id, 0, endpoint, "", !can_become_leader, priority);
+        if (server_id == my_server_id)
+        {
+            my_server_config = peer_config;
+            my_port = port;
+        }
+
+        cluster_config->get_servers().push_back(peer_config);
+    }
+    if (!my_server_config)
+        throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section");
 }
 
 void InMemoryStateManager::save_config(const nuraft::cluster_config & config)
diff --git a/src/Coordination/InMemoryStateManager.h b/src/Coordination/InMemoryStateManager.h
index 7446073c9c9..b48b5188f36 100644
--- a/src/Coordination/InMemoryStateManager.h
+++ b/src/Coordination/InMemoryStateManager.h
@@ -4,6 +4,7 @@
 #include <string>
 #include <Coordination/InMemoryLogStore.h>
 #include <libnuraft/nuraft.hxx> // Y_IGNORE
+#include <Poco/Util/AbstractConfiguration.h>
 
 namespace DB
 {
@@ -11,7 +12,10 @@ namespace DB
 class InMemoryStateManager : public nuraft::state_mgr
 {
 public:
-    InMemoryStateManager(int server_id_, const std::string & endpoint_);
+    InMemoryStateManager(
+        int server_id_,
+        const std::string & config_prefix,
+        const Poco::Util::AbstractConfiguration & config);
 
     nuraft::ptr<nuraft::cluster_config> load_config() override { return cluster_config; }
 
@@ -25,15 +29,17 @@ public:
 
     Int32 server_id() override { return my_server_id; }
 
-    nuraft::ptr<nuraft::srv_config> get_srv_config() const { return server_config; }
+    nuraft::ptr<nuraft::srv_config> get_srv_config() const { return my_server_config; }
 
     void system_exit(const int /* exit_code */) override {}
 
+    int getPort() const { return my_port; }
+
 private:
     int my_server_id;
-    std::string endpoint;
+    int my_port;
     nuraft::ptr<InMemoryLogStore> log_store;
-    nuraft::ptr<nuraft::srv_config> server_config;
+    nuraft::ptr<nuraft::srv_config> my_server_config;
     nuraft::ptr<nuraft::cluster_config> cluster_config;
     nuraft::ptr<nuraft::srv_state> server_state;
 };
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index 0d4bdcc60fe..c7deebfdb96 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -19,33 +19,18 @@ namespace ErrorCodes
 }
 
 NuKeeperServer::NuKeeperServer(
-    int server_id_, const std::string & hostname_, int port_,
+    int server_id_,
     const CoordinationSettingsPtr & coordination_settings_,
+    const Poco::Util::AbstractConfiguration & config,
     ResponsesQueue & responses_queue_)
     : server_id(server_id_)
-    , hostname(hostname_)
-    , port(port_)
-    , endpoint(hostname + ":" + std::to_string(port))
     , coordination_settings(coordination_settings_)
     , state_machine(nuraft::cs_new<NuKeeperStateMachine>(responses_queue_, coordination_settings))
-    , state_manager(nuraft::cs_new<InMemoryStateManager>(server_id, endpoint))
+    , state_manager(nuraft::cs_new<InMemoryStateManager>(server_id, "test_keeper_server.raft_configuration", config))
     , responses_queue(responses_queue_)
 {
 }
 
-void NuKeeperServer::addServer(int server_id_, const std::string & server_uri_, bool can_become_leader_, int32_t priority)
-{
-    nuraft::srv_config config(server_id_, 0, server_uri_, "", /* learner = */ !can_become_leader_, priority);
-    auto ret1 = raft_instance->add_srv(config);
-    auto code = ret1->get_result_code();
-    if (code == nuraft::cmd_result_code::TIMEOUT
-        || code == nuraft::cmd_result_code::BAD_REQUEST
-        || code == nuraft::cmd_result_code::NOT_LEADER
-        || code == nuraft::cmd_result_code::FAILED)
-        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot add server to RAFT quorum with code {}, message '{}'", ret1->get_result_code(), ret1->get_result_str());
-}
-
-
 void NuKeeperServer::startup(bool should_build_quorum)
 {
     nuraft::raft_params params;
@@ -69,7 +54,7 @@ void NuKeeperServer::startup(bool should_build_quorum)
     };
 
     raft_instance = launcher.init(
-        state_machine, state_manager, nuraft::cs_new<LoggerWrapper>("RaftInstance", coordination_settings->raft_logs_level), port,
+        state_machine, state_manager, nuraft::cs_new<LoggerWrapper>("RaftInstance", coordination_settings->raft_logs_level), state_manager->getPort(),
         asio_opts, params, init_options);
 
     if (!raft_instance)
@@ -170,7 +155,6 @@ bool NuKeeperServer::isLeaderAlive() const
     return raft_instance->is_leader_alive();
 }
 
-
 nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * /* param */)
 {
     if (type == nuraft::cb_func::Type::BecomeFresh || type == nuraft::cb_func::Type::BecomeLeader)
@@ -182,21 +166,6 @@ nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type t
     return nuraft::cb_func::ReturnCode::Ok;
 }
 
-bool NuKeeperServer::waitForServer(int32_t id) const
-{
-    /// FIXME
-    for (size_t i = 0; i < 30; ++i)
-    {
-        if (raft_instance->get_srv_config(id) != nullptr)
-            return true;
-        LOG_DEBUG(&Poco::Logger::get("NuRaftInit"), "Waiting for server {} to join the cluster", id);
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    LOG_DEBUG(&Poco::Logger::get("NuRaftInit"), "Cannot wait for server {}", id);
-    return false;
-}
-
 void NuKeeperServer::waitInit()
 {
     std::unique_lock lock(initialized_mutex);
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index ce6dd2f0fbb..a37d4d9127a 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -16,17 +16,11 @@ class NuKeeperServer
 private:
     int server_id;
 
-    std::string hostname;
-
-    int port;
-
-    std::string endpoint;
-
     CoordinationSettingsPtr coordination_settings;
 
     nuraft::ptr<NuKeeperStateMachine> state_machine;
 
-    nuraft::ptr<nuraft::state_mgr> state_manager;
+    nuraft::ptr<InMemoryStateManager> state_manager;
 
     nuraft::raft_launcher launcher;
 
@@ -44,8 +38,9 @@ private:
 
 public:
     NuKeeperServer(
-        int server_id_, const std::string & hostname_, int port_,
+        int server_id_,
         const CoordinationSettingsPtr & coordination_settings_,
+        const Poco::Util::AbstractConfiguration & config,
         ResponsesQueue & responses_queue_);
 
     void startup(bool should_build_quorum);
@@ -56,14 +51,10 @@ public:
 
     std::unordered_set<int64_t> getDeadSessions();
 
-    void addServer(int server_id_, const std::string & server_uri, bool can_become_leader_, int32_t priority);
-
     bool isLeader() const;
 
     bool isLeaderAlive() const;
 
-    bool waitForServer(int32_t server_id) const;
-
     void waitInit();
 
     void shutdown();
diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
index 300604e0f6e..9dc420830ad 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -103,97 +103,21 @@ bool NuKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestP
     return true;
 }
 
-namespace
-{
-    bool shouldBuildQuorum(int32_t myid, int32_t my_priority, bool my_can_become_leader, const std::vector<std::tuple<int, std::string, int, bool, int32_t>> & server_configs)
-    {
-        if (!my_can_become_leader)
-            return false;
-
-        int32_t minid = myid;
-        bool has_equal_priority = false;
-        for (const auto & [id, hostname, port, can_become_leader, priority] : server_configs)
-        {
-            if (my_priority < priority)
-                return false;
-            else if (my_priority == priority)
-                has_equal_priority = true;
-            minid = std::min(minid, id);
-        }
-
-        if (has_equal_priority)
-            return minid == myid;
-        else
-            return true;
-    }
-}
-
 void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration & config)
 {
     LOG_DEBUG(log, "Initializing storage dispatcher");
     int myid = config.getInt("test_keeper_server.server_id");
-    std::string myhostname;
-    int myport;
-    int32_t my_priority = 1;
+
     coordination_settings->loadFromConfig("test_keeper_server.coordination_settings", config);
 
-    Poco::Util::AbstractConfiguration::Keys keys;
-    config.keys("test_keeper_server.raft_configuration", keys);
-    bool my_can_become_leader = true;
-
-    std::vector<std::tuple<int, std::string, int, bool, int32_t>> server_configs;
-    std::vector<int32_t> ids;
-    for (const auto & server_key : keys)
-    {
-        int server_id = config.getInt("test_keeper_server.raft_configuration." + server_key + ".id");
-        std::string hostname = config.getString("test_keeper_server.raft_configuration." + server_key + ".hostname");
-        int port = config.getInt("test_keeper_server.raft_configuration." + server_key + ".port");
-        bool can_become_leader = config.getBool("test_keeper_server.raft_configuration." + server_key + ".can_become_leader", true);
-        int32_t priority = config.getInt("test_keeper_server.raft_configuration." + server_key + ".priority", 1);
-        if (server_id == myid)
-        {
-            myhostname = hostname;
-            myport = port;
-            my_can_become_leader = can_become_leader;
-            my_priority = priority;
-        }
-        else
-        {
-            server_configs.emplace_back(server_id, hostname, port, can_become_leader, priority);
-        }
-        ids.push_back(server_id);
-    }
-
-    server = std::make_unique<NuKeeperServer>(myid, myhostname, myport, coordination_settings, responses_queue);
+    server = std::make_unique<NuKeeperServer>(myid, coordination_settings, config, responses_queue);
     try
     {
-        bool should_build_quorum = shouldBuildQuorum(myid, my_priority, my_can_become_leader, server_configs);
-        server->startup(should_build_quorum);
-        if (should_build_quorum)
-        {
-
-            server->waitInit();
-            for (const auto & [id, hostname, port, can_become_leader, priority] : server_configs)
-            {
-                LOG_DEBUG(log, "Adding server with id {} ({}:{})", id, hostname, port);
-                do
-                {
-                    server->addServer(id, hostname + ":" + std::to_string(port), can_become_leader, priority);
-                }
-                while (!server->waitForServer(id));
-
-                LOG_DEBUG(log, "Server with id {} ({}:{}) added to cluster", id, hostname, port);
-            }
-
-            if (server_configs.size() > 1)
-                LOG_DEBUG(log, "All servers were added to quorum");
-        }
-        else
-        {
-            LOG_DEBUG(log, "Waiting as follower");
-            server->waitInit();
-            LOG_DEBUG(log, "Follower became fresh");
-        }
+        LOG_DEBUG(log, "Waiting server to initialize");
+        server->startup(true);
+        LOG_DEBUG(log, "Server intialized, waiting for quorum");
+        server->waitInit();
+        LOG_DEBUG(log, "Quorum initialized");
     }
     catch (...)
     {
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
index 3ae44f926d0..6ff7b1f2b79 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
@@ -15,21 +15,21 @@
                 <hostname>node1</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>3</priority>
+                <priority>100</priority>
             </server>
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>2</priority>
+                <priority>20</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>1</priority>
+                <priority>10</priority>
             </server>
         </raft_configuration>
     </test_keeper_server>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
index 7674c755511..65956104f2b 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
@@ -15,21 +15,21 @@
                 <hostname>node1</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>3</priority>
+                <priority>100</priority>
             </server>
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>2</priority>
+                <priority>20</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>1</priority>
+                <priority>10</priority>
             </server>
         </raft_configuration>
     </test_keeper_server>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
index 59dde3bc1b1..d2279ef00a4 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
@@ -15,21 +15,21 @@
                 <hostname>node1</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>3</priority>
+                <priority>100</priority>
             </server>
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>2</priority>
+                <priority>20</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>1</priority>
+                <priority>10</priority>
             </server>
         </raft_configuration>
     </test_keeper_server>

From 74630acff59879b76cf682d0957151d7cae75044 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 11 Feb 2021 12:49:49 +0300
Subject: [PATCH 0963/1238] More debug in test

---
 .../NuKeeperStorageDispatcher.cpp             |  1 +
 .../configs/enable_test_keeper1.xml           |  6 +--
 .../configs/enable_test_keeper2.xml           |  6 +--
 .../configs/enable_test_keeper3.xml           |  6 +--
 .../test_testkeeper_multinode/test.py         | 37 ++++++++++++++-----
 5 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
index 9dc420830ad..76db01eb70f 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -116,6 +116,7 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
         LOG_DEBUG(log, "Waiting server to initialize");
         server->startup(true);
         LOG_DEBUG(log, "Server intialized, waiting for quorum");
+
         server->waitInit();
         LOG_DEBUG(log, "Quorum initialized");
     }
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
index 6ff7b1f2b79..3ae44f926d0 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
@@ -15,21 +15,21 @@
                 <hostname>node1</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>100</priority>
+                <priority>3</priority>
             </server>
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>20</priority>
+                <priority>2</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>10</priority>
+                <priority>1</priority>
             </server>
         </raft_configuration>
     </test_keeper_server>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
index 65956104f2b..7674c755511 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
@@ -15,21 +15,21 @@
                 <hostname>node1</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>100</priority>
+                <priority>3</priority>
             </server>
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>20</priority>
+                <priority>2</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>10</priority>
+                <priority>1</priority>
             </server>
         </raft_configuration>
     </test_keeper_server>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
index d2279ef00a4..59dde3bc1b1 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
@@ -15,21 +15,21 @@
                 <hostname>node1</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>100</priority>
+                <priority>3</priority>
             </server>
             <server>
                 <id>2</id>
                 <hostname>node2</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>20</priority>
+                <priority>2</priority>
             </server>
             <server>
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
-                <priority>10</priority>
+                <priority>1</priority>
             </server>
         </raft_configuration>
     </test_keeper_server>
diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index e2b0537d5ec..7063c42f31a 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -307,6 +307,19 @@ def test_blocade_leader(started_cluster):
     assert node3.query("SELECT COUNT() FROM t1") == "310\n"
 
 
+def dump_zk(node, zk_path, replica_path):
+    print(node.query("SELECT * FROM system.replication_queue FORMAT Vertical"))
+    print("Replicas")
+    print(node.query("SELECT * FROM system.replicas FORMAT Vertical"))
+    print("Replica 2 info")
+    print(node.query("SELECT * FROM system.zookeeper WHERE path = '{}' FORMAT Vertical".format(zk_path)))
+    print("Queue")
+    print(node.query("SELECT * FROM system.zookeeper WHERE path = '{}/queue' FORMAT Vertical".format(replica_path)))
+    print("Log")
+    print(node.query("SELECT * FROM system.zookeeper WHERE path = '{}/log' FORMAT Vertical".format(zk_path)))
+    print("Parts")
+    print(node.query("SELECT name FROM system.zookeeper WHERE path = '{}/parts' FORMAT Vertical".format(replica_path)))
+
 # in extremely rare case it can take more than 5 minutes in debug build with sanitizer
 @pytest.mark.timeout(600)
 def test_blocade_leader_twice(started_cluster):
@@ -339,6 +352,8 @@ def test_blocade_leader_twice(started_cluster):
                 print("Got exception node2", smaller_exception(ex))
                 time.sleep(0.5)
         else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
             assert False, "Cannot reconnect for node2"
 
         for i in range(100):
@@ -354,6 +369,8 @@ def test_blocade_leader_twice(started_cluster):
                 print("Got exception node3", smaller_exception(ex))
                 time.sleep(0.5)
         else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
             assert False, "Cannot reconnect for node3"
 
 
@@ -389,6 +406,8 @@ def test_blocade_leader_twice(started_cluster):
                 print("Got exception node{}".format(n + 1), smaller_exception(ex))
                 time.sleep(0.5)
         else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
             assert False, "Cannot reconnect for node{}".format(n + 1)
 
     for n, node in enumerate([node1, node2, node3]):
@@ -400,12 +419,14 @@ def test_blocade_leader_twice(started_cluster):
                 print("Got exception node{}".format(n + 1), smaller_exception(ex))
                 time.sleep(0.5)
         else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
             assert False, "Cannot reconnect for node{}".format(n + 1)
 
     for n, node in enumerate([node1, node2, node3]):
         for i in range(100):
             try:
-                node.query("SYSTEM RESTART REPLICA t2", timeout=10)
+                node.query("SYSTEM RESTART REPLICA t2")
                 node.query("SYSTEM SYNC REPLICA t2", timeout=10)
                 break
             except Exception as ex:
@@ -417,18 +438,14 @@ def test_blocade_leader_twice(started_cluster):
                 print("Got exception node{}".format(n + 1), smaller_exception(ex))
                 time.sleep(0.5)
         else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
             assert False, "Cannot reconnect for node{}".format(n + 1)
 
     assert node1.query("SELECT COUNT() FROM t2") == "510\n"
     if node2.query("SELECT COUNT() FROM t2") != "510\n":
-        print(node2.query("SELECT * FROM system.replication_queue FORMAT Vertical"))
-        print("Replicas")
-        print(node2.query("SELECT * FROM system.replicas FORMAT Vertical"))
-        print("Replica 2 info")
-        print(node2.query("SELECT * FROM system.zookeeper WHERE path = '/clickhouse/t2/replicas/2' FORMAT Vertical"))
-        print("Queue")
-        print(node2.query("SELECT * FROM system.zookeeper WHERE path = '/clickhouse/t2/replicas/2/queue' FORMAT Vertical"))
-        print("Log")
-        print(node2.query("SELECT * FROM system.zookeeper WHERE path = '/clickhouse/t2/log' FORMAT Vertical"))
+        for num, node in enumerate([node1, node2, node3]):
+            dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
+
     assert node2.query("SELECT COUNT() FROM t2") == "510\n"
     assert node3.query("SELECT COUNT() FROM t2") == "510\n"

From 0acd01836148b9e8cfb97b04bf31f9cd899a56dc Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 11 Feb 2021 12:58:02 +0300
Subject: [PATCH 0964/1238] Fix typo

---
 src/Coordination/NuKeeperStorageDispatcher.cpp     |  2 +-
 .../integration/test_testkeeper_multinode/test.py  | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
index 76db01eb70f..042f0d2ffb9 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -115,7 +115,7 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
     {
         LOG_DEBUG(log, "Waiting server to initialize");
         server->startup(true);
-        LOG_DEBUG(log, "Server intialized, waiting for quorum");
+        LOG_DEBUG(log, "Server initialized, waiting for quorum");
 
         server->waitInit();
         LOG_DEBUG(log, "Quorum initialized");
diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index 7063c42f31a..f161c28ee83 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -242,6 +242,8 @@ def test_blocade_leader(started_cluster):
                 print("Got exception node2", smaller_exception(ex))
                 time.sleep(0.5)
         else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
             assert False, "Cannot insert anything node2"
 
         for i in range(100):
@@ -257,6 +259,8 @@ def test_blocade_leader(started_cluster):
                 print("Got exception node3", smaller_exception(ex))
                 time.sleep(0.5)
         else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
             assert False, "Cannot insert anything node3"
 
     for n, node in enumerate([node1, node2, node3]):
@@ -283,12 +287,14 @@ def test_blocade_leader(started_cluster):
             print("Got exception node1", smaller_exception(ex))
             time.sleep(0.5)
     else:
+        for num, node in enumerate([node1, node2, node3]):
+            dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
         assert False, "Cannot insert anything node1"
 
     for n, node in enumerate([node1, node2, node3]):
         for i in range(100):
             try:
-                node.query("SYSTEM RESTART REPLICA t1", timeout=10)
+                node.query("SYSTEM RESTART REPLICA t1")
                 node.query("SYSTEM SYNC REPLICA t1", timeout=10)
                 break
             except Exception as ex:
@@ -300,8 +306,14 @@ def test_blocade_leader(started_cluster):
                 print("Got exception node{}".format(n + 1), smaller_exception(ex))
                 time.sleep(0.5)
         else:
+            for num, node in enumerate([node1, node2, node3]):
+                dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
             assert False, "Cannot sync replica node{}".format(n+1)
 
+    if node1.query("SELECT COUNT() FROM t1") != "310\n":
+        for num, node in enumerate([node1, node2, node3]):
+            dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
+
     assert node1.query("SELECT COUNT() FROM t1") == "310\n"
     assert node2.query("SELECT COUNT() FROM t1") == "310\n"
     assert node3.query("SELECT COUNT() FROM t1") == "310\n"

From 99a471e047877b953920ff2d3ab8e73e5030c6be Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 11 Feb 2021 13:25:10 +0300
Subject: [PATCH 0965/1238] Add ability to start as follower

---
 src/Coordination/InMemoryStateManager.cpp                   | 6 ++++++
 src/Coordination/InMemoryStateManager.h                     | 6 ++++++
 src/Coordination/NuKeeperServer.cpp                         | 4 ++--
 src/Coordination/NuKeeperServer.h                           | 2 +-
 src/Coordination/NuKeeperStorageDispatcher.cpp              | 2 +-
 .../configs/enable_test_keeper1.xml                         | 2 ++
 .../configs/enable_test_keeper2.xml                         | 2 ++
 .../configs/enable_test_keeper3.xml                         | 2 ++
 8 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/Coordination/InMemoryStateManager.cpp b/src/Coordination/InMemoryStateManager.cpp
index d90c7e46f0d..a6db3271bc1 100644
--- a/src/Coordination/InMemoryStateManager.cpp
+++ b/src/Coordination/InMemoryStateManager.cpp
@@ -28,6 +28,9 @@ InMemoryStateManager::InMemoryStateManager(
         int port = config.getInt(full_prefix + ".port");
         bool can_become_leader = config.getBool(full_prefix + ".can_become_leader", true);
         int32_t priority = config.getInt(full_prefix + ".priority", 1);
+        bool start_as_follower = config.getBool(full_prefix + ".start_as_follower", false);
+        if (start_as_follower)
+            start_as_follower_servers.insert(server_id);
 
         auto endpoint = hostname + ":" + std::to_string(port);
         auto peer_config = nuraft::cs_new<nuraft::srv_config>(server_id, 0, endpoint, "", !can_become_leader, priority);
@@ -41,6 +44,9 @@ InMemoryStateManager::InMemoryStateManager(
     }
     if (!my_server_config)
         throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section");
+
+    if (start_as_follower_servers.size() == cluster_config->get_servers().size())
+        throw Exception(ErrorCodes::RAFT_ERROR, "At least one of servers should be able to start as leader (without <start_as_follower>)");
 }
 
 void InMemoryStateManager::save_config(const nuraft::cluster_config & config)
diff --git a/src/Coordination/InMemoryStateManager.h b/src/Coordination/InMemoryStateManager.h
index b48b5188f36..a4537602b36 100644
--- a/src/Coordination/InMemoryStateManager.h
+++ b/src/Coordination/InMemoryStateManager.h
@@ -35,9 +35,15 @@ public:
 
     int getPort() const { return my_port; }
 
+    bool shouldStartAsFollower() const
+    {
+        return start_as_follower_servers.count(my_server_id);
+    }
+
 private:
     int my_server_id;
     int my_port;
+    std::unordered_set<int> start_as_follower_servers;
     nuraft::ptr<InMemoryLogStore> log_store;
     nuraft::ptr<nuraft::srv_config> my_server_config;
     nuraft::ptr<nuraft::cluster_config> cluster_config;
diff --git a/src/Coordination/NuKeeperServer.cpp b/src/Coordination/NuKeeperServer.cpp
index c7deebfdb96..7464a06e86f 100644
--- a/src/Coordination/NuKeeperServer.cpp
+++ b/src/Coordination/NuKeeperServer.cpp
@@ -31,7 +31,7 @@ NuKeeperServer::NuKeeperServer(
 {
 }
 
-void NuKeeperServer::startup(bool should_build_quorum)
+void NuKeeperServer::startup()
 {
     nuraft::raft_params params;
     params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds();
@@ -47,7 +47,7 @@ void NuKeeperServer::startup(bool should_build_quorum)
 
     nuraft::asio_service::options asio_opts{};
     nuraft::raft_server::init_options init_options;
-    init_options.skip_initial_election_timeout_ = !should_build_quorum;
+    init_options.skip_initial_election_timeout_ = state_manager->shouldStartAsFollower();
     init_options.raft_callback_ = [this] (nuraft::cb_func::Type type, nuraft::cb_func::Param * param)
     {
         return callbackFunc(type, param);
diff --git a/src/Coordination/NuKeeperServer.h b/src/Coordination/NuKeeperServer.h
index a37d4d9127a..a8d269eb9eb 100644
--- a/src/Coordination/NuKeeperServer.h
+++ b/src/Coordination/NuKeeperServer.h
@@ -43,7 +43,7 @@ public:
         const Poco::Util::AbstractConfiguration & config,
         ResponsesQueue & responses_queue_);
 
-    void startup(bool should_build_quorum);
+    void startup();
 
     void putRequest(const NuKeeperStorage::RequestForSession & request);
 
diff --git a/src/Coordination/NuKeeperStorageDispatcher.cpp b/src/Coordination/NuKeeperStorageDispatcher.cpp
index 042f0d2ffb9..570087757ad 100644
--- a/src/Coordination/NuKeeperStorageDispatcher.cpp
+++ b/src/Coordination/NuKeeperStorageDispatcher.cpp
@@ -114,7 +114,7 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
     try
     {
         LOG_DEBUG(log, "Waiting server to initialize");
-        server->startup(true);
+        server->startup();
         LOG_DEBUG(log, "Server initialized, waiting for quorum");
 
         server->waitInit();
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
index 3ae44f926d0..4ad76889d1e 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
@@ -22,6 +22,7 @@
                 <hostname>node2</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
                 <priority>2</priority>
             </server>
             <server>
@@ -29,6 +30,7 @@
                 <hostname>node3</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
                 <priority>1</priority>
             </server>
         </raft_configuration>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
index 7674c755511..a1954a1e639 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
@@ -22,6 +22,7 @@
                 <hostname>node2</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
                 <priority>2</priority>
             </server>
             <server>
@@ -29,6 +30,7 @@
                 <hostname>node3</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
                 <priority>1</priority>
             </server>
         </raft_configuration>
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
index 59dde3bc1b1..88d2358138f 100644
--- a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
+++ b/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
@@ -22,6 +22,7 @@
                 <hostname>node2</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
                 <priority>2</priority>
             </server>
             <server>
@@ -29,6 +30,7 @@
                 <hostname>node3</hostname>
                 <port>44444</port>
                 <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
                 <priority>1</priority>
             </server>
         </raft_configuration>

From d7dccb8d2c6a74fc6a660a70a0ccdce9c6fdacb0 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Thu, 11 Feb 2021 13:43:12 +0300
Subject: [PATCH 0966/1238] better

---
 .../01701_parallel_parsing_infinite_segmentation.sh           | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
index e9033a08632..f677ff93620 100755
--- a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
+++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
@@ -4,6 +4,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh                                                                                                                                                                                                                        
 . "$CURDIR"/../shell_config.sh   
 
-python3 -c "print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 10000000, 'dbms' * 100000000))" > big_json.json
+python3 -c "for i in range(10):print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000))" > big_json.json
+python3 -c "for i in range(100):print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000))" > big_json.json
+
 
 ${CLICKHOUSE_LOCAL} --input_format_parallel_parsing=1 --max_memory_usage=0 -q "select count() from file('big_json.json', 'JSONEachRow', 'a String, b String')" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||:
\ No newline at end of file

From 325363896946e85e48b8b5b186191dffb68eb07a Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 11 Feb 2021 14:46:18 +0300
Subject: [PATCH 0967/1238] Fix backoff for failed background tasks in
 replicated merge tree

---
 .../MergeTree/BackgroundJobsExecutor.cpp      | 16 +++++++++---
 .../MergeTree/BackgroundJobsExecutor.h        |  4 ++-
 src/Storages/MergeTree/MergeTreeData.cpp      |  2 +-
 src/Storages/StorageMergeTree.cpp             |  7 +++--
 src/Storages/StorageReplicatedMergeTree.cpp   |  2 +-
 .../tests/gtest_background_executor.cpp       |  2 +-
 ...ground_checker_blather_zookeeper.reference |  1 +
 ...5_background_checker_blather_zookeeper.sql | 26 +++++++++++++++++++
 8 files changed, 51 insertions(+), 9 deletions(-)
 create mode 100644 tests/queries/0_stateless/01715_background_checker_blather_zookeeper.reference
 create mode 100644 tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql

diff --git a/src/Storages/MergeTree/BackgroundJobsExecutor.cpp b/src/Storages/MergeTree/BackgroundJobsExecutor.cpp
index 3e3f693addd..8e5a0e8a3b8 100644
--- a/src/Storages/MergeTree/BackgroundJobsExecutor.cpp
+++ b/src/Storages/MergeTree/BackgroundJobsExecutor.cpp
@@ -98,11 +98,21 @@ try
                 {
                     try /// We don't want exceptions in background pool
                     {
-                        job();
+                        bool job_success = job();
                         /// Job done, decrement metric and reset no_work counter
                         CurrentMetrics::values[pool_config.tasks_metric]--;
-                        /// Job done, new empty space in pool, schedule background task
-                        runTaskWithoutDelay();
+
+                        if (job_success)
+                        {
+                            /// Job done, new empty space in pool, schedule background task
+                            runTaskWithoutDelay();
+                        }
+                        else
+                        {
+                            /// Job done, but failed, schedule with backoff
+                            scheduleTask(/* with_backoff = */ true);
+                        }
+
                     }
                     catch (...)
                     {
diff --git a/src/Storages/MergeTree/BackgroundJobsExecutor.h b/src/Storages/MergeTree/BackgroundJobsExecutor.h
index 85067188f09..da22c752e1b 100644
--- a/src/Storages/MergeTree/BackgroundJobsExecutor.h
+++ b/src/Storages/MergeTree/BackgroundJobsExecutor.h
@@ -36,10 +36,12 @@ enum class PoolType
     FETCH,
 };
 
+using BackgroundJobFunc = std::function<bool()>;
+
 /// Result from background job providers. Function which will be executed in pool and pool type.
 struct JobAndPool
 {
-    ThreadPool::Job job;
+    BackgroundJobFunc job;
     PoolType pool_type;
 };
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index c6e77a56db6..4458b5735bb 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3796,7 +3796,7 @@ std::optional<JobAndPool> MergeTreeData::getDataMovingJob()
 
     return JobAndPool{[this, moving_tagger] () mutable
     {
-        moveParts(moving_tagger);
+        return moveParts(moving_tagger);
     }, PoolType::MOVE};
 }
 
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 11a159d4a6c..202e909af0f 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -962,9 +962,11 @@ std::optional<JobAndPool> StorageMergeTree::getDataProcessingJob()
         return JobAndPool{[this, metadata_snapshot, merge_entry, mutate_entry, share_lock] () mutable
         {
             if (merge_entry)
-                mergeSelectedParts(metadata_snapshot, false, {}, *merge_entry, share_lock);
+                return mergeSelectedParts(metadata_snapshot, false, {}, *merge_entry, share_lock);
             else if (mutate_entry)
-                mutateSelectedPart(metadata_snapshot, *mutate_entry, share_lock);
+                return mutateSelectedPart(metadata_snapshot, *mutate_entry, share_lock);
+
+            __builtin_unreachable();
         }, PoolType::MERGE_MUTATE};
     }
     else if (auto lock = time_after_previous_cleanup.compareAndRestartDeferred(1))
@@ -978,6 +980,7 @@ std::optional<JobAndPool> StorageMergeTree::getDataProcessingJob()
             clearOldWriteAheadLogs();
             clearOldMutations();
             clearEmptyParts();
+            return true;
         }, PoolType::MERGE_MUTATE};
     }
     return {};
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 53104efeb43..097b7679899 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2682,7 +2682,7 @@ std::optional<JobAndPool> StorageReplicatedMergeTree::getDataProcessingJob()
 
     return JobAndPool{[this, selected_entry] () mutable
     {
-        processQueueEntry(selected_entry);
+        return processQueueEntry(selected_entry);
     }, pool_type};
 }
 
diff --git a/src/Storages/tests/gtest_background_executor.cpp b/src/Storages/tests/gtest_background_executor.cpp
index bf9a305ccc9..0ddf2d9ea2a 100644
--- a/src/Storages/tests/gtest_background_executor.cpp
+++ b/src/Storages/tests/gtest_background_executor.cpp
@@ -32,7 +32,7 @@ protected:
 
     std::optional<JobAndPool> getBackgroundJob() override
     {
-        return JobAndPool{[] { std::this_thread::sleep_for(1s); counter++; }, PoolType::MERGE_MUTATE};
+        return JobAndPool{[] { std::this_thread::sleep_for(1s); counter++; return true; }, PoolType::MERGE_MUTATE};
     }
 };
 
diff --git a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.reference b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql
new file mode 100644
index 00000000000..a1868dddf22
--- /dev/null
+++ b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql
@@ -0,0 +1,26 @@
+DROP TABLE IF EXISTS i20203_1;
+DROP TABLE IF EXISTS i20203_2;
+
+CREATE TABLE i20203_1 (a Int8)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/01715_background_checker/i20203','r1')
+ORDER BY tuple();
+
+CREATE TABLE i20203_2 (a Int8)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/01715_background_checker/i20203','r2')
+ORDER BY tuple();
+
+DETACH TABLE i20203_2;
+INSERT INTO i20203_1 VALUES (2);
+
+DETACH TABLE i20203_1;
+ATTACH TABLE i20203_2;
+
+-- sleep 10 seconds
+SELECT number from numbers(10) where sleepEachRow(1) Format Null;
+
+SELECT num_tries < 50
+FROM system.replication_queue
+WHERE table = 'i20203_2' AND database = currentDatabase();
+
+DROP TABLE IF EXISTS i20203_1;
+DROP TABLE IF EXISTS i20203_2;

From 47c8537f63e87e08cc9d931c32b60949790768f6 Mon Sep 17 00:00:00 2001
From: filimonov <1549571+filimonov@users.noreply.github.com>
Date: Thu, 11 Feb 2021 12:56:26 +0100
Subject: [PATCH 0968/1238] Add libnss_files to alpine image

It seems it's needed to make some of DNS-related features work
properly in certain scenarios (things like getting proper FQDN, reverse DNS lookup).
---
 docker/server/alpine-build.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docker/server/alpine-build.sh b/docker/server/alpine-build.sh
index 0142149b5bd..329888f2fcb 100755
--- a/docker/server/alpine-build.sh
+++ b/docker/server/alpine-build.sh
@@ -54,8 +54,10 @@ docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libm.so.6       "${CONTAIN
 docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib"
 docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/librt.so.1      "${CONTAINER_ROOT_FOLDER}/lib"
 docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_files.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
 docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2  "${CONTAINER_ROOT_FOLDER}/lib"
 docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2           "${CONTAINER_ROOT_FOLDER}/lib64"
+docker cp -L "${ubuntu20image}":/etc/nsswitch.conf                    "${CONTAINER_ROOT_FOLDER}/etc"
 
 docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull
 rm -rf "$CONTAINER_ROOT_FOLDER"

From e325ab2538145b35ae80429e8c64293635897ee7 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Thu, 11 Feb 2021 15:00:14 +0300
Subject: [PATCH 0969/1238] fix test

---
 .../01701_parallel_parsing_infinite_segmentation.sh         | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
index f677ff93620..b82e179495e 100755
--- a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
+++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
@@ -5,7 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh   
 
 python3 -c "for i in range(10):print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000))" > big_json.json
-python3 -c "for i in range(100):print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000))" > big_json.json
+python3 -c "for i in range(100):print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000))" >> big_json.json
 
 
-${CLICKHOUSE_LOCAL} --input_format_parallel_parsing=1 --max_memory_usage=0 -q "select count() from file('big_json.json', 'JSONEachRow', 'a String, b String')" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||:
\ No newline at end of file
+${CLICKHOUSE_LOCAL} --input_format_parallel_parsing=1 --max_memory_usage=0 -q "select count() from file('big_json.json', 'JSONEachRow', 'a String, b String')" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||:
+
+rm big_json.json
\ No newline at end of file

From 447fcfa1c9763431d81a0e9af85f2588fd092555 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 11 Feb 2021 15:12:01 +0300
Subject: [PATCH 0970/1238] Fix build

---
 src/Coordination/InMemoryStateManager.cpp  | 10 ++++++++++
 src/Coordination/InMemoryStateManager.h    |  5 +++++
 src/Coordination/tests/gtest_for_build.cpp |  3 +--
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/Coordination/InMemoryStateManager.cpp b/src/Coordination/InMemoryStateManager.cpp
index a6db3271bc1..69e93578cc1 100644
--- a/src/Coordination/InMemoryStateManager.cpp
+++ b/src/Coordination/InMemoryStateManager.cpp
@@ -9,6 +9,16 @@ namespace ErrorCodes
     extern const int RAFT_ERROR;
 }
 
+InMemoryStateManager::InMemoryStateManager(int server_id_, const std::string & host, int port)
+    : my_server_id(server_id_)
+    , my_port(port)
+    , log_store(nuraft::cs_new<InMemoryLogStore>())
+    , cluster_config(nuraft::cs_new<nuraft::cluster_config>())
+{
+    auto peer_config = nuraft::cs_new<nuraft::srv_config>(my_server_id, host + ":" + std::to_string(port));
+    cluster_config->get_servers().push_back(peer_config);
+}
+
 InMemoryStateManager::InMemoryStateManager(
     int my_server_id_,
     const std::string & config_prefix,
diff --git a/src/Coordination/InMemoryStateManager.h b/src/Coordination/InMemoryStateManager.h
index a4537602b36..2a5c2f00dba 100644
--- a/src/Coordination/InMemoryStateManager.h
+++ b/src/Coordination/InMemoryStateManager.h
@@ -17,6 +17,11 @@ public:
         const std::string & config_prefix,
         const Poco::Util::AbstractConfiguration & config);
 
+    InMemoryStateManager(
+        int server_id_,
+        const std::string & host,
+        int port);
+
     nuraft::ptr<nuraft::cluster_config> load_config() override { return cluster_config; }
 
     void save_config(const nuraft::cluster_config & config) override;
diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_for_build.cpp
index c6f29831618..ed9777350c5 100644
--- a/src/Coordination/tests/gtest_for_build.cpp
+++ b/src/Coordination/tests/gtest_for_build.cpp
@@ -27,7 +27,6 @@
 TEST(CoordinationTest, BuildTest)
 {
     DB::InMemoryLogStore store;
-    DB::InMemoryStateManager state_manager(1, "localhost:12345");
     DB::SummingStateMachine machine;
     EXPECT_EQ(1, 1);
 }
@@ -74,7 +73,7 @@ struct SimpliestRaftServer
         , port(port_)
         , endpoint(hostname + ":" + std::to_string(port))
         , state_machine(nuraft::cs_new<StateMachine>())
-        , state_manager(nuraft::cs_new<DB::InMemoryStateManager>(server_id, endpoint))
+        , state_manager(nuraft::cs_new<DB::InMemoryStateManager>(server_id, hostname, port))
     {
         nuraft::raft_params params;
         params.heart_beat_interval_ = 100;

From 525400bc415c7ad8111a957627e86718a359722e Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 11 Feb 2021 16:29:30 +0300
Subject: [PATCH 0971/1238] window function rank() and friends

---
 src/AggregateFunctions/IAggregateFunction.h   |  15 ++
 .../registerAggregateFunctions.cpp            |   4 +
 src/Processors/Transforms/WindowTransform.cpp | 191 ++++++++++++++++--
 src/Processors/Transforms/WindowTransform.h   |   9 +
 .../01591_window_functions.reference          |  42 ++++
 .../0_stateless/01591_window_functions.sql    |  12 ++
 6 files changed, 259 insertions(+), 14 deletions(-)

diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h
index f1bbfa40aac..d15ff4e8a78 100644
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@@ -26,6 +26,7 @@ class ReadBuffer;
 class WriteBuffer;
 class IColumn;
 class IDataType;
+class IWindowFunction;
 
 using DataTypePtr = std::shared_ptr<const IDataType>;
 using DataTypes = std::vector<DataTypePtr>;
@@ -215,6 +216,20 @@ public:
     const DataTypes & getArgumentTypes() const { return argument_types; }
     const Array & getParameters() const { return parameters; }
 
+    // Any aggregate function can be calculated over a window, but there are some
+    // window functions such as rank() that require a different interface, e.g.
+    // because they don't respect the window frame, or need to be notified when
+    // a new peer group starts. They pretend to be normal aggregate functions,
+    // but will fail if you actually try to use them in Aggregator. The
+    // WindowTransform recognizes these functions and handles them differently.
+    // We could have a separate factory for window functions, and make all
+    // aggregate functions implement IWindowFunction interface and so on. This
+    // would be more logically correct, but more complex. We only have a handful
+    // of true window functions, so this hack-ish interface suffices.
+    virtual IWindowFunction * asWindowFunction() { return nullptr; }
+    virtual const IWindowFunction * asWindowFunction() const
+    { return const_cast<IAggregateFunction *>(this)->asWindowFunction(); }
+
 protected:
     DataTypes argument_types;
     Array parameters;
diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp
index 1900d5d46c6..ae26fdc5d40 100644
--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@@ -58,6 +58,8 @@ void registerAggregateFunctionCombinatorOrFill(AggregateFunctionCombinatorFactor
 void registerAggregateFunctionCombinatorResample(AggregateFunctionCombinatorFactory &);
 void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory &);
 
+void registerWindowFunctions(AggregateFunctionFactory & factory);
+
 
 void registerAggregateFunctions()
 {
@@ -103,6 +105,8 @@ void registerAggregateFunctions()
         registerAggregateFunctionMannWhitney(factory);
         registerAggregateFunctionWelchTTest(factory);
         registerAggregateFunctionStudentTTest(factory);
+
+        registerWindowFunctions(factory);
     }
 
     {
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 7fc9b56c3d5..45692e9cc7a 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -4,6 +4,9 @@
 
 #include <Common/Arena.h>
 
+#include <DataTypes/DataTypesNumber.h>
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+
 namespace DB
 {
 
@@ -12,6 +15,18 @@ namespace ErrorCodes
     extern const int NOT_IMPLEMENTED;
 }
 
+// Interface for true window functions. It's not much of an interface, they just
+// accept the guts of WindowTransform and do 'something'. Given a small number of
+// true window functions, and the fact that the WindowTransform internals are
+// pretty much well defined in domain terms (e.g. frame boundaries), this is
+// somewhat acceptable. 
+class IWindowFunction {
+public:
+    virtual ~IWindowFunction() {}
+
+    virtual void windowInsertResultInto(IColumn & to, const WindowTransform * transform) = 0;
+};
+
 // Compares ORDER BY column values at given rows to find the boundaries of frame:
 // [compared] with [reference] +/- offset. Return value is -1/0/+1, like in
 // sorting predicates -- -1 means [compared] is less than [reference] +/- offset.
@@ -142,9 +157,14 @@ WindowTransform::WindowTransform(const Block & input_header_,
                 input_header.getPositionByName(argument_name));
         }
 
-        workspace.aggregate_function_state.reset(aggregate_function->sizeOfData(),
-            aggregate_function->alignOfData());
-        aggregate_function->create(workspace.aggregate_function_state.data());
+        workspace.window_function_impl = aggregate_function->asWindowFunction();
+        if (!workspace.window_function_impl)
+        {
+            workspace.aggregate_function_state.reset(
+                aggregate_function->sizeOfData(),
+                aggregate_function->alignOfData());
+            aggregate_function->create(workspace.aggregate_function_state.data());
+        }
 
         workspaces.push_back(std::move(workspace));
     }
@@ -183,8 +203,11 @@ WindowTransform::~WindowTransform()
     // Some states may be not created yet if the creation failed.
     for (auto & ws : workspaces)
     {
-        ws.window_function.aggregate_function->destroy(
-            ws.aggregate_function_state.data());
+        if (!ws.window_function_impl)
+        {
+            ws.window_function.aggregate_function->destroy(
+                ws.aggregate_function_state.data());
+        }
     }
 }
 
@@ -756,6 +779,12 @@ void WindowTransform::updateAggregationState()
 
     for (auto & ws : workspaces)
     {
+        if (ws.window_function_impl)
+        {
+            // No need to do anything for true window functions.
+            continue;
+        }
+
         const auto * a = ws.window_function.aggregate_function.get();
         auto * buf = ws.aggregate_function_state.data();
 
@@ -798,10 +827,10 @@ void WindowTransform::updateAggregationState()
             // For now, add the values one by one.
             auto * columns = ws.argument_columns.data();
             // Removing arena.get() from the loop makes it faster somehow...
-            auto * arena_ = arena.get();
+            auto * arena_ptr = arena.get();
             for (auto row = first_row; row < past_the_end_row; ++row)
             {
-                a->add(buf, columns, row, arena_);
+                a->add(buf, columns, row, arena_ptr);
             }
         }
     }
@@ -819,14 +848,21 @@ void WindowTransform::writeOutCurrentRow()
     for (size_t wi = 0; wi < workspaces.size(); ++wi)
     {
         auto & ws = workspaces[wi];
-        const auto & f = ws.window_function;
-        const auto * a = f.aggregate_function.get();
-        auto * buf = ws.aggregate_function_state.data();
-
         IColumn * result_column = block.output_columns[wi].get();
-        // FIXME does it also allocate the result on the arena?
-        // We'll have to pass it out with blocks then...
-        a->insertResultInto(buf, *result_column, arena.get());
+
+        if (ws.window_function_impl)
+        {
+            ws.window_function_impl->windowInsertResultInto(*result_column, this);
+        }
+        else
+        {
+            const auto & f = ws.window_function;
+            const auto * a = f.aggregate_function.get();
+            auto * buf = ws.aggregate_function_state.data();
+            // FIXME does it also allocate the result on the arena?
+            // We'll have to pass it out with blocks then...
+            a->insertResultInto(buf, *result_column, arena.get());
+        }
     }
 }
 
@@ -893,6 +929,8 @@ void WindowTransform::appendChunk(Chunk & chunk)
             if (!arePeers(peer_group_start, current_row))
             {
                 peer_group_start = current_row;
+                peer_group_start_row_number = current_row_number;
+                ++peer_group_number;
             }
 
             // Advance the frame start.
@@ -950,6 +988,7 @@ void WindowTransform::appendChunk(Chunk & chunk)
             // The peer group start is updated at the beginning of the loop,
             // because current_row might now be past-the-end.
             advanceRowNumber(current_row);
+            ++current_row_number;
             first_not_ready_row = current_row;
             frame_ended = false;
             frame_started = false;
@@ -983,7 +1022,10 @@ void WindowTransform::appendChunk(Chunk & chunk)
         prev_frame_start = partition_start;
         prev_frame_end = partition_start;
         assert(current_row == partition_start);
+        current_row_number = 1;
         peer_group_start = partition_start;
+        peer_group_start_row_number = 1;
+        peer_group_number = 1;
 
 //        fmt::print(stderr, "reinitialize agg data at start of {}\n",
 //            new_partition_start);
@@ -991,6 +1033,11 @@ void WindowTransform::appendChunk(Chunk & chunk)
         // has started.
         for (auto & ws : workspaces)
         {
+            if (ws.window_function_impl)
+            {
+                continue;
+            }
+
             const auto & f = ws.window_function;
             const auto * a = f.aggregate_function.get();
             auto * buf = ws.aggregate_function_state.data();
@@ -1008,6 +1055,11 @@ void WindowTransform::appendChunk(Chunk & chunk)
 
         for (auto & ws : workspaces)
         {
+            if (ws.window_function_impl)
+            {
+                continue;
+            }
+
             const auto & f = ws.window_function;
             const auto * a = f.aggregate_function.get();
             auto * buf = ws.aggregate_function_state.data();
@@ -1175,5 +1227,116 @@ void WindowTransform::work()
     }
 }
 
+// A basic implementation for a true window function. It pretends to be an
+// aggregate function, but refuses to work as such.
+struct WindowFunction
+    : public IAggregateFunctionHelper<WindowFunction>
+    , public IWindowFunction
+{
+    std::string name;
+
+    WindowFunction(const std::string & name_, const DataTypes & argument_types_,
+               const Array & parameters_)
+        : IAggregateFunctionHelper<WindowFunction>(argument_types_, parameters_)
+        , name(name_)
+    {}
+
+    IWindowFunction * asWindowFunction() override { return this; }
+
+    [[noreturn]] void fail() const
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            "The function '{}' can only be used as a window function, not as an aggregate function",
+            getName());
+    }
+
+    String getName() const override { return name; }
+    void create(AggregateDataPtr __restrict) const override { fail(); }
+    void destroy(AggregateDataPtr __restrict) const noexcept override {}
+    bool hasTrivialDestructor() const override { return true; }
+    size_t sizeOfData() const override { return 0; }
+    size_t alignOfData() const override { return 1; }
+    void add(AggregateDataPtr __restrict, const IColumn **, size_t, Arena *) const override { fail(); }
+    void merge(AggregateDataPtr __restrict, ConstAggregateDataPtr, Arena *) const override { fail(); }
+    void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &) const override { fail(); }
+    void deserialize(AggregateDataPtr __restrict, ReadBuffer &, Arena *) const override { fail(); }
+    void insertResultInto(AggregateDataPtr __restrict, IColumn &, Arena *) const override { fail(); }
+};
+
+struct WindowFunctionRank final : public WindowFunction
+{
+    WindowFunctionRank(const std::string & name_,
+            const DataTypes & argument_types_, const Array & parameters_)
+        : WindowFunction(name_, argument_types_, parameters_)
+    {}
+
+    DataTypePtr getReturnType() const override
+    { return std::make_shared<DataTypeUInt64>(); }
+
+    void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
+    {
+        assert_cast<ColumnUInt64 &>(to).getData().push_back(
+            transform->peer_group_start_row_number);
+    }
+};
+
+struct WindowFunctionDenseRank final : public WindowFunction
+{
+    WindowFunctionDenseRank(const std::string & name_,
+            const DataTypes & argument_types_, const Array & parameters_)
+        : WindowFunction(name_, argument_types_, parameters_)
+    {}
+
+    DataTypePtr getReturnType() const override
+    { return std::make_shared<DataTypeUInt64>(); }
+
+    void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
+    {
+        assert_cast<ColumnUInt64 &>(to).getData().push_back(
+            transform->peer_group_number);
+    }
+};
+
+struct WindowFunctionRowNumber final : public WindowFunction
+{
+    WindowFunctionRowNumber(const std::string & name_,
+            const DataTypes & argument_types_, const Array & parameters_)
+        : WindowFunction(name_, argument_types_, parameters_)
+    {}
+
+    DataTypePtr getReturnType() const override
+    { return std::make_shared<DataTypeUInt64>(); }
+
+    void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
+    {
+        assert_cast<ColumnUInt64 &>(to).getData().push_back(
+            transform->current_row_number);
+    }
+};
+
+
+void registerWindowFunctions(AggregateFunctionFactory & factory)
+{
+    factory.registerFunction("rank", [](const std::string & name,
+            const DataTypes & argument_types, const Array & parameters)
+        {
+            return std::make_shared<WindowFunctionRank>(name, argument_types,
+                parameters);
+        });
+
+    factory.registerFunction("dense_rank", [](const std::string & name,
+            const DataTypes & argument_types, const Array & parameters)
+        {
+            return std::make_shared<WindowFunctionDenseRank>(name, argument_types,
+                parameters);
+        });
+
+    factory.registerFunction("row_number", [](const std::string & name,
+            const DataTypes & argument_types, const Array & parameters)
+        {
+            return std::make_shared<WindowFunctionRowNumber>(name, argument_types,
+                parameters);
+        });
+}
 
 }
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 0ba8c8c6010..0d6ea066050 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -22,6 +22,10 @@ struct WindowFunctionWorkspace
     WindowFunctionDescription window_function;
     AlignedBuffer aggregate_function_state;
     std::vector<size_t> argument_column_indices;
+    // This field is set for pure window functions. When set, we ignore the
+    // window_function.aggregate_function, and work through this interface
+    // instead.
+    IWindowFunction * window_function_impl = nullptr;
 
     // Argument columns. Be careful, this is a per-block cache.
     std::vector<const IColumn *> argument_columns;
@@ -282,6 +286,11 @@ public:
     // frames may be earlier.
     RowNumber peer_group_start;
 
+    // Row and group numbers in partition for calculating rank() and friends.
+    uint64_t current_row_number = 1;
+    uint64_t peer_group_start_row_number = 1;
+    uint64_t peer_group_number = 1;
+
     // The frame is [frame_start, frame_end) if frame_ended && frame_started,
     // and unknown otherwise. Note that when we move to the next row, both the
     // frame_start and the frame_end may jump forward by an unknown amount of
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 217a8571d5f..7faae9f6959 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -920,3 +920,45 @@ FROM numbers(2)
 ;
 1	0
 1	1
+-- some true window functions -- rank and friends
+select number, p, o,
+    count(*) over w,
+    rank() over w,
+    dense_rank() over w,
+    row_number() over w
+from (select number, intDiv(number, 5) p, mod(number, 3) o
+    from numbers(31) order by o, number) t
+window w as (partition by p order by o)
+order by p, o, number
+settings max_block_size = 2;
+0	0	0	2	1	1	1
+3	0	0	2	1	1	2
+1	0	1	4	3	2	3
+4	0	1	4	3	2	4
+2	0	2	5	5	3	5
+6	1	0	2	1	1	1
+9	1	0	2	1	1	2
+7	1	1	3	3	2	3
+5	1	2	5	4	3	4
+8	1	2	5	4	3	5
+12	2	0	1	1	1	1
+10	2	1	3	2	2	2
+13	2	1	3	2	2	3
+11	2	2	5	4	3	4
+14	2	2	5	4	3	5
+15	3	0	2	1	1	2
+18	3	0	2	1	1	1
+16	3	1	4	3	2	3
+19	3	1	4	3	2	4
+17	3	2	5	5	3	5
+21	4	0	2	1	1	1
+24	4	0	2	1	1	2
+22	4	1	3	3	2	3
+20	4	2	5	4	3	5
+23	4	2	5	4	3	4
+27	5	0	1	1	1	1
+25	5	1	3	2	2	2
+28	5	1	3	2	2	3
+26	5	2	5	4	3	4
+29	5	2	5	4	3	5
+30	6	0	1	1	1	1
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 8742562a621..66b2c6f862a 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -315,3 +315,15 @@ SELECT
     max(number) OVER (ORDER BY number ASC NULLS FIRST)
 FROM numbers(2)
 ;
+
+-- some true window functions -- rank and friends
+select number, p, o,
+    count(*) over w,
+    rank() over w,
+    dense_rank() over w,
+    row_number() over w
+from (select number, intDiv(number, 5) p, mod(number, 3) o
+    from numbers(31) order by o, number) t
+window w as (partition by p order by o)
+order by p, o, number
+settings max_block_size = 2;

From c32ed77976f3aeabfddceac506eca5a7a1907f21 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 11 Feb 2021 16:37:46 +0300
Subject: [PATCH 0972/1238] Some queries become too fast

---
 tests/performance/decimal_aggregates.xml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/performance/decimal_aggregates.xml b/tests/performance/decimal_aggregates.xml
index 615c3201843..f7bc2ac1868 100644
--- a/tests/performance/decimal_aggregates.xml
+++ b/tests/performance/decimal_aggregates.xml
@@ -11,7 +11,7 @@
     <query>SELECT min(d32), max(d32), argMin(x, d32), argMax(x, d32) FROM t</query>
     <query>SELECT min(d64), max(d64), argMin(x, d64), argMax(x, d64) FROM t</query>
     <query>SELECT min(d128), max(d128), argMin(x, d128), argMax(x, d128) FROM t</query>
-    
+
     <query>SELECT avg(d32), sum(d32), sumWithOverflow(d32) FROM t</query>
     <query>SELECT avg(d64), sum(d64), sumWithOverflow(d64) FROM t</query>
     <query>SELECT avg(d128), sum(d128), sumWithOverflow(d128) FROM t</query>
@@ -19,11 +19,11 @@
     <query>SELECT uniq(d32), uniqCombined(d32), uniqExact(d32), uniqHLL12(d32) FROM     (SELECT * FROM t LIMIT 10000000)</query>
     <query>SELECT uniq(d64), uniqCombined(d64), uniqExact(d64), uniqHLL12(d64) FROM     (SELECT * FROM t LIMIT 10000000)</query>
     <query>SELECT uniq(d128), uniqCombined(d128), uniqExact(d128), uniqHLL12(d128) FROM (SELECT * FROM t LIMIT 1000000)</query>
-    
+
     <query>SELECT median(d32), medianExact(d32), medianExactWeighted(d32, 2) FROM    (SELECT * FROM t LIMIT 10000000)</query>
     <query>SELECT median(d64), medianExact(d64), medianExactWeighted(d64, 2) FROM    (SELECT * FROM t LIMIT 1000000)</query>
     <query>SELECT median(d128), medianExact(d128), medianExactWeighted(d128, 2) FROM (SELECT * FROM t LIMIT 1000000)</query>
-    
+
     <query>SELECT quantile(d32), quantileExact(d32), quantileExactWeighted(d32, 2) FROM    (SELECT * FROM t LIMIT 10000000)</query>
     <query>SELECT quantile(d64), quantileExact(d64), quantileExactWeighted(d64, 2) FROM    (SELECT * FROM t LIMIT 1000000)</query>
     <query>SELECT quantile(d128), quantileExact(d128), quantileExactWeighted(d128, 2) FROM (SELECT * FROM t LIMIT 1000000)</query>
@@ -31,8 +31,8 @@
     <query>SELECT quantilesExact(0.1, 0.9)(d32), quantilesExactWeighted(0.1, 0.9)(d32, 2) FROM   (SELECT * FROM t LIMIT 10000000)</query>
     <query>SELECT quantilesExact(0.1, 0.9)(d64), quantilesExactWeighted(0.1, 0.9)(d64, 2) FROM   (SELECT * FROM t LIMIT 1000000)</query>
     <query>SELECT quantilesExact(0.1, 0.9)(d128), quantilesExactWeighted(0.1, 0.9)(d128, 2) FROM (SELECT * FROM t LIMIT 1000000)</query>
-    
+
     <query>SELECT varPop(d32), varSamp(d32), stddevPop(d32) FROM t</query>
-    <query>SELECT varPop(d64), varSamp(d64), stddevPop(d64) FROM    (SELECT * FROM t LIMIT 1000000)</query>
-    <query>SELECT varPop(d128), varSamp(d128), stddevPop(d128) FROM (SELECT * FROM t LIMIT 1000000)</query>
+    <query>SELECT varPop(d64), varSamp(d64), stddevPop(d64) FROM    (SELECT * FROM t LIMIT 10000000)</query>
+    <query>SELECT varPop(d128), varSamp(d128), stddevPop(d128) FROM (SELECT * FROM t LIMIT 10000000)</query>
 </test>

From d539948fe72f3ee7c7e90a49cdffbc93d0a3749c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 04:41:31 +0300
Subject: [PATCH 0973/1238] In memory compression: a prototype

---
 src/Columns/ColumnVector.cpp    | 51 ++++++++++++++++++
 src/Columns/ColumnVector.h      |  2 +
 src/Columns/IColumn.h           | 11 ++++
 src/Storages/MemorySettings.cpp | 36 +++++++++++++
 src/Storages/MemorySettings.h   | 26 +++++++++
 src/Storages/StorageMemory.cpp  | 96 +++++++++++++++++++++------------
 src/Storages/StorageMemory.h    | 16 +++++-
 src/Storages/StorageSet.cpp     | 11 ++--
 8 files changed, 207 insertions(+), 42 deletions(-)
 create mode 100644 src/Storages/MemorySettings.cpp
 create mode 100644 src/Storages/MemorySettings.h

diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index a075c10a8a9..59c8b5cf33b 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -16,6 +16,9 @@
 #include <common/unaligned.h>
 #include <ext/bit_cast.h>
 #include <ext/scope_guard.h>
+#include <lz4.h>
+#include <Compression/LZ4_decompress_faster.h>
+#include <IO/BufferWithOwnMemory.h>
 
 #include <cmath>
 #include <cstring>
@@ -32,6 +35,8 @@ namespace ErrorCodes
     extern const int PARAMETER_OUT_OF_BOUND;
     extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
     extern const int LOGICAL_ERROR;
+    extern const int CANNOT_COMPRESS;
+    extern const int CANNOT_DECOMPRESS;
 }
 
 template <typename T>
@@ -520,6 +525,52 @@ void ColumnVector<T>::getExtremes(Field & min, Field & max) const
     max = NearestFieldType<T>(cur_max);
 }
 
+
+#pragma GCC diagnostic ignored "-Wold-style-cast"
+
+template <typename T>
+LazyColumn ColumnVector<T>::compress() const
+{
+    size_t source_size = data.size() * sizeof(T);
+    size_t max_dest_size = LZ4_COMPRESSBOUND(source_size);
+
+    if (max_dest_size > std::numeric_limits<int>::max())
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(source_size));
+
+    auto compressed = std::make_shared<Memory<>>(max_dest_size);
+
+    auto compressed_size = LZ4_compress_default(
+        reinterpret_cast<const char *>(data.data()),
+        compressed->data(),
+        source_size,
+        max_dest_size);
+
+    if (compressed_size <= 0)
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column");
+
+    /// If compression is inefficient.
+    if (static_cast<size_t>(compressed_size) * 2 > source_size)
+        return IColumn::compress();
+
+    /// Shrink to fit.
+    auto shrank = std::make_shared<Memory<>>(compressed_size);
+    memcpy(shrank->data(), compressed->data(), compressed_size);
+
+    return [compressed = std::move(shrank), column_size = data.size()]
+    {
+        auto res = ColumnVector<T>::create(column_size);
+        auto processed_size = LZ4_decompress_fast(
+            compressed->data(),
+            reinterpret_cast<char *>(res->getData().data()),
+            column_size * sizeof(T));
+
+        if (processed_size <= 0)
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress column");
+
+        return res;
+    };
+}
+
 /// Explicit template instantiations - to avoid code bloat in headers.
 template class ColumnVector<UInt8>;
 template class ColumnVector<UInt16>;
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index 1b13859bdee..4f1cbcafcbc 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -298,6 +298,8 @@ public:
         return typeid(rhs) == typeid(ColumnVector<T>);
     }
 
+    LazyColumn compress() const override;
+
     /// Replace elements that match the filter with zeroes. If inverted replaces not matched elements.
     void applyZeroMap(const IColumn::Filter & filt, bool inverted = false);
 
diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index 824b5411744..d441e9f7c4e 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -357,6 +357,14 @@ public:
         throw Exception("Method structureEquals is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
     }
 
+    /// Compress column in memory to some representation that allows to decompress it back.
+    using Lazy = std::function<Ptr()>;
+    virtual Lazy compress() const
+    {
+        /// No compression by default, just wrap the object.
+        return [column = getPtr()] { return column; };
+    }
+
 
     static MutablePtr mutate(Ptr ptr)
     {
@@ -462,6 +470,9 @@ using MutableColumns = std::vector<MutableColumnPtr>;
 using ColumnRawPtrs = std::vector<const IColumn *>;
 //using MutableColumnRawPtrs = std::vector<IColumn *>;
 
+using LazyColumn = IColumn::Lazy;
+using LazyColumns = std::vector<LazyColumn>;
+
 template <typename ... Args>
 struct IsMutableColumns;
 
diff --git a/src/Storages/MemorySettings.cpp b/src/Storages/MemorySettings.cpp
new file mode 100644
index 00000000000..f5e182b3484
--- /dev/null
+++ b/src/Storages/MemorySettings.cpp
@@ -0,0 +1,36 @@
+#include <Storages/MemorySettings.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Parsers/ASTSetQuery.h>
+#include <Parsers/ASTFunction.h>
+#include <Common/Exception.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int UNKNOWN_SETTING;
+}
+
+IMPLEMENT_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS)
+
+void MemorySettings::loadFromQuery(ASTStorage & storage_def)
+{
+    if (storage_def.settings)
+    {
+        try
+        {
+            applyChanges(storage_def.settings->changes);
+        }
+        catch (Exception & e)
+        {
+            if (e.code() == ErrorCodes::UNKNOWN_SETTING)
+                e.addMessage("for storage " + storage_def.engine->name);
+            throw;
+        }
+    }
+}
+
+}
+
diff --git a/src/Storages/MemorySettings.h b/src/Storages/MemorySettings.h
new file mode 100644
index 00000000000..4a1ba57475f
--- /dev/null
+++ b/src/Storages/MemorySettings.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <Core/BaseSettings.h>
+
+
+namespace DB
+{
+class ASTStorage;
+
+
+#define MEMORY_SETTINGS(M) \
+    M(Bool, compress, true, "Compress data in memory", 0) \
+
+DECLARE_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS)
+
+
+/** Settings for the Memory engine.
+  * Could be loaded from a CREATE TABLE query (SETTINGS clause).
+  */
+struct MemorySettings : public BaseSettings<memorySettingsTraits>
+{
+    void loadFromQuery(ASTStorage & storage_def);
+};
+
+}
+
diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp
index 4530d93c274..a67eea0f28a 100644
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@@ -6,6 +6,7 @@
 #include <Interpreters/MutationsInterpreter.h>
 #include <Storages/StorageFactory.h>
 #include <Storages/StorageMemory.h>
+#include <Storages/MemorySettings.h>
 
 #include <IO/WriteHelpers.h>
 #include <Processors/Sources/SourceWithProgress.h>
@@ -23,7 +24,7 @@ namespace ErrorCodes
 
 class MemorySource : public SourceWithProgress
 {
-    using InitializerFunc = std::function<void(std::shared_ptr<const Blocks> &)>;
+    using InitializerFunc = std::function<void(std::shared_ptr<const LazyBlocks> &)>;
 public:
     /// Blocks are stored in std::list which may be appended in another thread.
     /// We use pointer to the beginning of the list and its current size.
@@ -34,7 +35,7 @@ public:
         Names column_names_,
         const StorageMemory & storage,
         const StorageMetadataPtr & metadata_snapshot,
-        std::shared_ptr<const Blocks> data_,
+        std::shared_ptr<const LazyBlocks> data_,
         std::shared_ptr<std::atomic<size_t>> parallel_execution_index_,
         InitializerFunc initializer_func_ = {})
         : SourceWithProgress(metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals(), storage.getStorageID()))
@@ -43,6 +44,8 @@ public:
         , parallel_execution_index(parallel_execution_index_)
         , initializer_func(std::move(initializer_func_))
     {
+        for (const auto & elem : column_names_and_types)
+            column_positions.push_back(metadata_snapshot->getSampleBlock().getPositionByName(elem.getNameInStorage()));
     }
 
     String getName() const override { return "Memory"; }
@@ -63,21 +66,25 @@ protected:
             return {};
         }
 
-        const Block & src = (*data)[current_index];
+        const LazyBlock & src = (*data)[current_index];
         Columns columns;
         columns.reserve(columns.size());
 
         /// Add only required columns to `res`.
+        size_t i = 0;
         for (const auto & elem : column_names_and_types)
         {
-            auto current_column = src.getByName(elem.getNameInStorage()).column;
+            auto current_column = src[column_positions[i]]();
             if (elem.isSubcolumn())
                 columns.emplace_back(elem.getTypeInStorage()->getSubcolumn(elem.getSubcolumnName(), *current_column));
             else
                 columns.emplace_back(std::move(current_column));
+
+            ++i;
         }
 
-        return Chunk(std::move(columns), src.rows());
+        size_t rows = columns.at(0)->size();
+        return Chunk(std::move(columns), rows);
     }
 
 private:
@@ -95,9 +102,10 @@ private:
 
     const NamesAndTypesList column_names_and_types;
     size_t execution_index = 0;
-    std::shared_ptr<const Blocks> data;
+    std::shared_ptr<const LazyBlocks> data;
     std::shared_ptr<std::atomic<size_t>> parallel_execution_index;
     InitializerFunc initializer_func;
+    std::vector<size_t> column_positions;
 };
 
 
@@ -149,8 +157,12 @@ private:
 };
 
 
-StorageMemory::StorageMemory(const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_)
-    : IStorage(table_id_), data(std::make_unique<const Blocks>())
+StorageMemory::StorageMemory(
+    const StorageID & table_id_,
+    ColumnsDescription columns_description_,
+    ConstraintsDescription constraints_,
+    bool compress_)
+    : IStorage(table_id_), data(std::make_unique<const LazyBlocks>()), compress(compress_)
 {
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(std::move(columns_description_));
@@ -186,7 +198,7 @@ Pipe StorageMemory::read(
             metadata_snapshot,
             nullptr /* data */,
             nullptr /* parallel execution index */,
-            [this](std::shared_ptr<const Blocks> & data_to_initialize)
+            [this](std::shared_ptr<const LazyBlocks> & data_to_initialize)
             {
                 data_to_initialize = data.get();
             }));
@@ -219,18 +231,18 @@ BlockOutputStreamPtr StorageMemory::write(const ASTPtr & /*query*/, const Storag
 
 void StorageMemory::drop()
 {
-    data.set(std::make_unique<Blocks>());
+    data.set(std::make_unique<LazyBlocks>());
     total_size_bytes.store(0, std::memory_order_relaxed);
     total_size_rows.store(0, std::memory_order_relaxed);
 }
 
-static inline void updateBlockData(Block & old_block, const Block & new_block)
+static inline void updateBlockData(LazyBlock & old_block, const LazyBlock & new_block, const Block & old_header, const Block & new_header)
 {
-    for (const auto & it : new_block)
+    size_t i = 0;
+    for (const auto & it : new_header)
     {
-        auto col_name = it.name;
-        auto & col_with_type_name = old_block.getByName(col_name);
-        col_with_type_name.column = it.column;
+        old_block[old_header.getPositionByName(it.name)] = new_block[i];
+        ++i;
     }
 }
 
@@ -242,36 +254,47 @@ void StorageMemory::mutate(const MutationCommands & commands, const Context & co
     auto storage_ptr = DatabaseCatalog::instance().getTable(storage, context);
     auto interpreter = std::make_unique<MutationsInterpreter>(storage_ptr, metadata_snapshot, commands, context, true);
     auto in = interpreter->execute();
+    Block old_header = metadata_snapshot->getSampleBlock();
+    Block mutation_header = in->getHeader();
 
     in->readPrefix();
-    Blocks out;
-    Block block;
-    while ((block = in->read()))
+    LazyBlocks out;
+    while (Block block = in->read())
     {
-        out.push_back(block);
+        LazyColumns lazy_columns;
+
+        for (const auto & elem : block)
+        {
+            if (compress)
+                lazy_columns.emplace_back(elem.column->compress());
+            else
+                lazy_columns.emplace_back([=]{ return elem.column; });
+        }
+
+        out.emplace_back(std::move(lazy_columns));
     }
     in->readSuffix();
 
-    std::unique_ptr<Blocks> new_data;
+    std::unique_ptr<LazyBlocks> new_data;
 
-    // all column affected
+    /// All columns affected.
     if (interpreter->isAffectingAllColumns())
     {
-        new_data = std::make_unique<Blocks>(out);
+        new_data = std::make_unique<LazyBlocks>(out);
     }
     else
     {
-        /// just some of the column affected, we need update it with new column
-        new_data = std::make_unique<Blocks>(*(data.get()));
+        /// Just some of the columns affected, we need update it with new column.
+        new_data = std::make_unique<LazyBlocks>(*(data.get()));
         auto data_it = new_data->begin();
         auto out_it = out.begin();
 
         while (data_it != new_data->end())
         {
-            /// Mutation does not change the number of blocks
+            /// Mutation does not change the number of blocks.
             assert(out_it != out.end());
 
-            updateBlockData(*data_it, *out_it);
+            updateBlockData(*data_it, *out_it, old_header, mutation_header);
             ++data_it;
             ++out_it;
         }
@@ -279,7 +302,7 @@ void StorageMemory::mutate(const MutationCommands & commands, const Context & co
         assert(out_it == out.end());
     }
 
-    size_t rows = 0;
+/*    size_t rows = 0;
     size_t bytes = 0;
     for (const auto & buffer : *new_data)
     {
@@ -287,7 +310,8 @@ void StorageMemory::mutate(const MutationCommands & commands, const Context & co
         bytes += buffer.bytes();
     }
     total_size_bytes.store(rows, std::memory_order_relaxed);
-    total_size_rows.store(bytes, std::memory_order_relaxed);
+    total_size_rows.store(bytes, std::memory_order_relaxed);*/
+
     data.set(std::move(new_data));
 }
 
@@ -295,7 +319,7 @@ void StorageMemory::mutate(const MutationCommands & commands, const Context & co
 void StorageMemory::truncate(
     const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &)
 {
-    data.set(std::make_unique<Blocks>());
+    data.set(std::make_unique<LazyBlocks>());
     total_size_bytes.store(0, std::memory_order_relaxed);
     total_size_rows.store(0, std::memory_order_relaxed);
 }
@@ -317,13 +341,19 @@ void registerStorageMemory(StorageFactory & factory)
     factory.registerStorage("Memory", [](const StorageFactory::Arguments & args)
     {
         if (!args.engine_args.empty())
-            throw Exception(
-                "Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)",
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Engine {} doesn't support any arguments ({} given)",
+                args.engine_name, args.engine_args.size());
 
-        return StorageMemory::create(args.table_id, args.columns, args.constraints);
+        bool has_settings = args.storage_def->settings;
+        MemorySettings settings;
+        if (has_settings)
+            settings.loadFromQuery(*args.storage_def);
+
+        return StorageMemory::create(args.table_id, args.columns, args.constraints, settings.compress);
     },
     {
+        .supports_settings = true,
         .supports_parallel_insert = true,
     });
 }
diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h
index dc695427156..97ddfa93d9a 100644
--- a/src/Storages/StorageMemory.h
+++ b/src/Storages/StorageMemory.h
@@ -15,6 +15,11 @@
 namespace DB
 {
 
+/// Lazy block contains possibly compressed columns. LazyColumn is std::function that reconstructs Column on call.
+using LazyBlock = LazyColumns;
+using LazyBlocks = std::vector<LazyBlock>;
+
+
 /** Implements storage in the RAM.
   * Suitable for temporary data.
   * It does not support keys.
@@ -95,7 +100,8 @@ public:
 
 private:
     /// MultiVersion data storage, so that we can copy the list of blocks to readers.
-    MultiVersion<Blocks> data;
+
+    MultiVersion<LazyBlocks> data;
 
     mutable std::mutex mutex;
 
@@ -104,8 +110,14 @@ private:
     std::atomic<size_t> total_size_bytes = 0;
     std::atomic<size_t> total_size_rows = 0;
 
+    bool compress;
+
 protected:
-    StorageMemory(const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_);
+    StorageMemory(
+        const StorageID & table_id_,
+        ColumnsDescription columns_description_,
+        ConstraintsDescription constraints_,
+        bool compress_ = false);
 };
 
 }
diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp
index e518c7da0e4..d64042f0c1e 100644
--- a/src/Storages/StorageSet.cpp
+++ b/src/Storages/StorageSet.cpp
@@ -242,15 +242,12 @@ void registerStorageSet(StorageFactory & factory)
                 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
         bool has_settings = args.storage_def->settings;
-
-        auto set_settings = std::make_unique<SetSettings>();
+        SetSettings set_settings;
         if (has_settings)
-        {
-            set_settings->loadFromQuery(*args.storage_def);
-        }
+            set_settings.loadFromQuery(*args.storage_def);
 
-        DiskPtr disk = args.context.getDisk(set_settings->disk);
-        return StorageSet::create(disk, args.relative_data_path, args.table_id, args.columns, args.constraints, set_settings->persistent);
+        DiskPtr disk = args.context.getDisk(set_settings.disk);
+        return StorageSet::create(disk, args.relative_data_path, args.table_id, args.columns, args.constraints, set_settings.persistent);
     }, StorageFactory::StorageFeatures{ .supports_settings = true, });
 }
 

From 280f459f71513752696a2fcc9753aae4a7e342b2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 05:40:06 +0300
Subject: [PATCH 0974/1238] Fix quadratic INSERT

---
 src/Storages/StorageMemory.cpp | 37 ++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp
index a67eea0f28a..20c8a44efd4 100644
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@@ -125,23 +125,32 @@ public:
     void write(const Block & block) override
     {
         metadata_snapshot->check(block, true);
-        new_blocks.emplace_back(block);
+
+        inserted_bytes += block.allocatedBytes();
+        inserted_rows += block.rows();
+
+        Block sample = metadata_snapshot->getSampleBlock();
+
+        LazyColumns lazy_columns;
+        lazy_columns.reserve(sample.columns());
+
+        for (const auto & elem : sample)
+        {
+            const ColumnPtr & column = block.getByName(elem.name).column;
+
+            if (storage.compress)
+                lazy_columns.emplace_back(column->compress());
+            else
+                lazy_columns.emplace_back([=]{ return column; });
+        }
+
+        new_blocks.emplace_back(std::move(lazy_columns));
     }
 
     void writeSuffix() override
     {
-        size_t inserted_bytes = 0;
-        size_t inserted_rows = 0;
-
-        for (const auto & block : new_blocks)
-        {
-            inserted_bytes += block.allocatedBytes();
-            inserted_rows += block.rows();
-        }
-
         std::lock_guard lock(storage.mutex);
-
-        auto new_data = std::make_unique<Blocks>(*(storage.data.get()));
+        auto new_data = std::make_unique<LazyBlocks>(*(storage.data.get()));
         new_data->insert(new_data->end(), new_blocks.begin(), new_blocks.end());
 
         storage.data.set(std::move(new_data));
@@ -150,7 +159,9 @@ public:
     }
 
 private:
-    Blocks new_blocks;
+    LazyBlocks new_blocks;
+    size_t inserted_bytes = 0;
+    size_t inserted_rows = 0;
 
     StorageMemory & storage;
     StorageMetadataPtr metadata_snapshot;

From 58f1d4d910a2b6d34f484ff742df85e421276391 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 06:00:31 +0300
Subject: [PATCH 0975/1238] Add comment to config

---
 programs/server/config.xml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/programs/server/config.xml b/programs/server/config.xml
index 849d3dc32ba..571a8c6cf75 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -284,6 +284,11 @@
          In bytes. Cache is single for server. Memory is allocated only on demand.
          Cache is used when 'use_uncompressed_cache' user setting turned on (off by default).
          Uncompressed cache is advantageous only for very short queries and in rare cases.
+
+         Note: uncompressed cache is pointless for lz4, because memory bandwidth is slower than multi-core decompression.
+         Enabling it will only make queries slower.
+         If number of CPU cores is in order of 100 and memory bandwidth is in range of 100-200 GB/sec,
+         there is a chance it is also being pointless for zstd.
       -->
     <uncompressed_cache_size>8589934592</uncompressed_cache_size>
 

From 4d650a2a5621723f4466db263a8602cb04e6d40b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Feb 2021 06:03:13 +0300
Subject: [PATCH 0976/1238] Adjust config

---
 programs/server/users.xml | 3 ---
 src/Core/Settings.h       | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/programs/server/users.xml b/programs/server/users.xml
index 3223d855651..ef66891a6a0 100644
--- a/programs/server/users.xml
+++ b/programs/server/users.xml
@@ -7,9 +7,6 @@
             <!-- Maximum memory usage for processing single query, in bytes. -->
             <max_memory_usage>10000000000</max_memory_usage>
 
-            <!-- Use cache of uncompressed blocks of data. Meaningful only for processing many of very short queries. -->
-            <use_uncompressed_cache>0</use_uncompressed_cache>
-
             <!-- How to choose between replicas during distributed query processing.
                  random - choose random replica from set of replicas with minimum number of errors
                  nearest_hostname - from set of replicas with minimum number of errors, choose replica
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 9bb9ad30f15..72b89e5e8db 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -69,7 +69,7 @@ class IColumn;
     M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
     M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
     M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
-    M(Bool, use_uncompressed_cache, true, "Whether to use the cache of uncompressed blocks.", 0) \
+    M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \
     M(Bool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \
     M(UInt64, background_buffer_flush_schedule_pool_size, 16, "Number of threads performing background flush for tables with Buffer engine. Only has meaning at server startup.", 0) \
     M(UInt64, background_pool_size, 16, "Number of threads performing background work for tables (for example, merging in merge tree). Only has meaning at server startup.", 0) \

From 9b4f6fb711eafd236933c26b868c11741d572376 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 10 Feb 2021 21:53:31 +0300
Subject: [PATCH 0977/1238] Don't compress small blocks

---
 src/Columns/ColumnVector.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index 59c8b5cf33b..32658eb3e34 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -532,6 +532,11 @@ template <typename T>
 LazyColumn ColumnVector<T>::compress() const
 {
     size_t source_size = data.size() * sizeof(T);
+
+    /// Don't compress small blocks.
+    if (source_size < 4096) /// A wild guess.
+        return IColumn::compress();
+
     size_t max_dest_size = LZ4_COMPRESSBOUND(source_size);
 
     if (max_dest_size > std::numeric_limits<int>::max())

From 772073a0db4fa98266c38e52dccd052956104923 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Thu, 11 Feb 2021 17:01:09 +0300
Subject: [PATCH 0978/1238] Update WindowTransform.cpp

---
 src/Processors/Transforms/WindowTransform.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 7fc9b56c3d5..570f7002813 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -798,10 +798,10 @@ void WindowTransform::updateAggregationState()
             // For now, add the values one by one.
             auto * columns = ws.argument_columns.data();
             // Removing arena.get() from the loop makes it faster somehow...
-            auto * arena_ = arena.get();
+            auto * arena_ptr = arena.get();
             for (auto row = first_row; row < past_the_end_row; ++row)
             {
-                a->add(buf, columns, row, arena_);
+                a->add(buf, columns, row, arena_ptr);
             }
         }
     }

From ecbcf47f28733271f5795cdf6e8fecdc314042c0 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 11 Feb 2021 18:07:42 +0300
Subject: [PATCH 0979/1238] lag/lead stubs + cleanup

---
 src/Interpreters/ExpressionAnalyzer.cpp       |   5 +-
 src/Processors/Transforms/WindowTransform.cpp | 121 ++++++++++++++----
 src/Processors/Transforms/WindowTransform.h   |  10 +-
 .../01591_window_functions.reference          |   6 +
 .../0_stateless/01591_window_functions.sql    |   7 +
 5 files changed, 122 insertions(+), 27 deletions(-)

diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 3f65a6f3f58..b47d78d7568 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -540,7 +540,10 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
         !context.getSettingsRef().allow_experimental_window_functions)
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-            "Window functions are not implemented (while processing '{}')",
+            "The support for window functions is experimental and will change"
+            " in backwards-incompatible ways in the future releases. Set"
+            " allow_experimental_window_functions = 1 to enable it."
+            " While processing '{}'",
             syntax->window_function_asts[0]->formatForErrorMessage());
     }
 
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 45692e9cc7a..90c5deba395 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -1,17 +1,18 @@
 #include <Processors/Transforms/WindowTransform.h>
 
-#include <Interpreters/ExpressionActions.h>
-
-#include <Common/Arena.h>
-
-#include <DataTypes/DataTypesNumber.h>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <Common/Arena.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Interpreters/ExpressionActions.h>
+#include <Interpreters/convertFieldToType.h>
+
 
 namespace DB
 {
 
 namespace ErrorCodes
 {
+    extern const int BAD_ARGUMENTS;
     extern const int NOT_IMPLEMENTED;
 }
 
@@ -19,11 +20,12 @@ namespace ErrorCodes
 // accept the guts of WindowTransform and do 'something'. Given a small number of
 // true window functions, and the fact that the WindowTransform internals are
 // pretty much well defined in domain terms (e.g. frame boundaries), this is
-// somewhat acceptable. 
+// somewhat acceptable.
 class IWindowFunction {
 public:
     virtual ~IWindowFunction() {}
 
+    // Must insert the result for current_row.
     virtual void windowInsertResultInto(IColumn & to, const WindowTransform * transform) = 0;
 };
 
@@ -140,18 +142,15 @@ WindowTransform::WindowTransform(const Block & input_header_,
     for (const auto & f : functions)
     {
         WindowFunctionWorkspace workspace;
-        workspace.window_function = f;
-
-        const auto & aggregate_function
-            = workspace.window_function.aggregate_function;
+        workspace.aggregate_function = f.aggregate_function;
+        const auto & aggregate_function = workspace.aggregate_function;
         if (!arena && aggregate_function->allocatesMemoryInArena())
         {
             arena = std::make_unique<Arena>();
         }
 
-        workspace.argument_column_indices.reserve(
-            workspace.window_function.argument_names.size());
-        for (const auto & argument_name : workspace.window_function.argument_names)
+        workspace.argument_column_indices.reserve(f.argument_names.size());
+        for (const auto & argument_name : f.argument_names)
         {
             workspace.argument_column_indices.push_back(
                 input_header.getPositionByName(argument_name));
@@ -205,7 +204,7 @@ WindowTransform::~WindowTransform()
     {
         if (!ws.window_function_impl)
         {
-            ws.window_function.aggregate_function->destroy(
+            ws.aggregate_function->destroy(
                 ws.aggregate_function_state.data());
         }
     }
@@ -785,7 +784,7 @@ void WindowTransform::updateAggregationState()
             continue;
         }
 
-        const auto * a = ws.window_function.aggregate_function.get();
+        const auto * a = ws.aggregate_function.get();
         auto * buf = ws.aggregate_function_state.data();
 
         if (reset_aggregation)
@@ -856,8 +855,7 @@ void WindowTransform::writeOutCurrentRow()
         }
         else
         {
-            const auto & f = ws.window_function;
-            const auto * a = f.aggregate_function.get();
+            const auto * a = ws.aggregate_function.get();
             auto * buf = ws.aggregate_function_state.data();
             // FIXME does it also allocate the result on the arena?
             // We'll have to pass it out with blocks then...
@@ -891,8 +889,8 @@ void WindowTransform::appendChunk(Chunk & chunk)
                         ->convertToFullColumnIfConst();
             }
 
-            block.output_columns.push_back(ws.window_function.aggregate_function
-                ->getReturnType()->createColumn());
+            block.output_columns.push_back(ws.aggregate_function->getReturnType()
+                ->createColumn());
         }
 
         // Even in case of `count() over ()` we should have a dummy input column.
@@ -1038,8 +1036,7 @@ void WindowTransform::appendChunk(Chunk & chunk)
                 continue;
             }
 
-            const auto & f = ws.window_function;
-            const auto * a = f.aggregate_function.get();
+            const auto * a = ws.aggregate_function.get();
             auto * buf = ws.aggregate_function_state.data();
 
             a->destroy(buf);
@@ -1060,8 +1057,7 @@ void WindowTransform::appendChunk(Chunk & chunk)
                 continue;
             }
 
-            const auto & f = ws.window_function;
-            const auto * a = f.aggregate_function.get();
+            const auto * a = ws.aggregate_function.get();
             auto * buf = ws.aggregate_function_state.data();
 
             a->create(buf);
@@ -1314,6 +1310,71 @@ struct WindowFunctionRowNumber final : public WindowFunction
     }
 };
 
+struct WindowFunctionLagLead final : public WindowFunction
+{
+    bool is_lag = false;
+    // Always positive.
+    uint64_t offset_rows = 1;
+    Field default_value;
+
+    WindowFunctionLagLead(const std::string & name_,
+        const DataTypes & argument_types_, const Array & parameters_,
+            bool is_lag_)
+        : WindowFunction(name_, argument_types_, parameters_)
+        , is_lag(is_lag_)
+    {
+        // offset and default are in parameters
+        if (argument_types.size() != 1)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "The window function {} must have exactly one argument -- the value column. The offset and the default value must be specified as parameters, i.e. `{}(offset, default)(column)`",
+                getName(), getName());
+        }
+
+        if (parameters.size() > 2)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "The window function {} accepts at most two parameters, {} given",
+                getName(), parameters.size());
+        }
+
+        if (parameters.size() >= 1)
+        {
+            if (!isInt64FieldType(parameters[0].getType())
+                || parameters[0].get<Int64>() < 0)
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                    "The first parameter of the window function {} must be a nonnegative integer specifying the number of offset rows. Got '{}' instead",
+                    getName(), toString(parameters[0]));
+            }
+
+            offset_rows = parameters[0].get<UInt64>();
+        }
+
+        if (parameters.size() >= 2)
+        {
+            default_value = convertFieldToTypeOrThrow(parameters[1],
+                *argument_types[0]);
+        }
+    }
+
+    DataTypePtr getReturnType() const override { return argument_types[0]; }
+
+    void windowInsertResultInto(IColumn &, const WindowTransform *) override
+    {
+        // These functions are a mess... they ignore the frame, so we need to
+        // either materialize the whole partition (not practical if it's big),
+        // or track a separate frame for these functions, which would  make the
+        // window transform completely impenetrable to human mind. Our best bet
+        // is probably rewriting, say, `lag(value, offset)` to
+        // `any(value) over rows between offset preceding and offset preceding`,
+        // at the query planning stage. We can keep this class as a stub for
+        // parsing, anyway.
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+            "The window function {} is not implemented",
+            getName());
+    }
+};
 
 void registerWindowFunctions(AggregateFunctionFactory & factory)
 {
@@ -1337,6 +1398,20 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
             return std::make_shared<WindowFunctionRowNumber>(name, argument_types,
                 parameters);
         });
+
+    factory.registerFunction("lag", [](const std::string & name,
+            const DataTypes & argument_types, const Array & parameters)
+        {
+            return std::make_shared<WindowFunctionLagLead>(name, argument_types,
+                parameters, true /* is_lag */);
+        });
+
+    factory.registerFunction("lead", [](const std::string & name,
+            const DataTypes & argument_types, const Array & parameters)
+        {
+            return std::make_shared<WindowFunctionLagLead>(name, argument_types,
+                parameters, false /* is_lag */);
+        });
 }
 
 }
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
index 0d6ea066050..5001b984e9a 100644
--- a/src/Processors/Transforms/WindowTransform.h
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -19,14 +19,18 @@ class Arena;
 // Runtime data for computing one window function.
 struct WindowFunctionWorkspace
 {
-    WindowFunctionDescription window_function;
-    AlignedBuffer aggregate_function_state;
-    std::vector<size_t> argument_column_indices;
+    AggregateFunctionPtr aggregate_function;
+
     // This field is set for pure window functions. When set, we ignore the
     // window_function.aggregate_function, and work through this interface
     // instead.
     IWindowFunction * window_function_impl = nullptr;
 
+    std::vector<size_t> argument_column_indices;
+
+    // Will not be initialized for a pure window function.
+    AlignedBuffer aggregate_function_state;
+
     // Argument columns. Be careful, this is a per-block cache.
     std::vector<const IColumn *> argument_columns;
     uint64_t cached_block_number = std::numeric_limits<uint64_t>::max();
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index 7faae9f6959..c29c496397b 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -962,3 +962,9 @@ settings max_block_size = 2;
 26	5	2	5	4	3	4
 29	5	2	5	4	3	5
 30	6	0	1	1	1	1
+-- very bad functions, not implemented yet
+select
+    lag(1, 5)(number) over (),
+    lead(2)(number) over (),
+    lag(number) over ()
+from numbers(2); -- { serverError 48 }
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 66b2c6f862a..11fb2295b27 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -327,3 +327,10 @@ from (select number, intDiv(number, 5) p, mod(number, 3) o
 window w as (partition by p order by o)
 order by p, o, number
 settings max_block_size = 2;
+
+-- very bad functions, not implemented yet
+select
+    lag(1, 5)(number) over (),
+    lead(2)(number) over (),
+    lag(number) over ()
+from numbers(2); -- { serverError 48 }

From 9afb16759e559fddd17f3cb43d615a31675e95b9 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Thu, 11 Feb 2021 18:14:38 +0300
Subject: [PATCH 0980/1238] fix

---
 src/Databases/DatabaseReplicated.cpp |  3 +-
 src/Interpreters/DDLWorker.cpp       | 45 ++++++++++++++++++++++++----
 2 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 0ac71793e5d..b8ce48a4d5c 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -351,7 +351,6 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
     else if (!tables_to_detach.empty())
         LOG_WARNING(log, "Will DETACH PERMANENTLY {} broken tables to recover replica", tables_to_detach.size());
 
-    auto db_guard = DatabaseCatalog::instance().getDDLGuard(getDatabaseName(), "");
     for (const auto & table_name : tables_to_detach)
     {
         String to_name = fmt::format("{}_{}_{}_{}", BROKEN_TABLE_PREFIX, table_name, max_log_ptr, thread_local_rng() % 1000);
@@ -517,12 +516,12 @@ void DatabaseReplicated::renameTable(const Context & context, const String & tab
             throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", to_table_name);
 
         String statement = readMetadataFile(table_name);
-        String statement_to = readMetadataFile(to_table_name);
         String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_name);
         String metadata_zk_path_to = txn->zookeeper_path + "/metadata/" + escapeForFileName(to_table_name);
         txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
         if (exchange)
         {
+            String statement_to = readMetadataFile(to_table_name);
             txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path_to, -1));
             txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path, statement_to, zkutil::CreateMode::Persistent));
         }
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 975eaeaca1b..9a398df07b5 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -788,15 +788,33 @@ void DDLWorker::cleanupQueue(Int64, const ZooKeeperPtr & zookeeper)
             /// Now we can safely delete entry
             LOG_INFO(log, "Task {} is outdated, deleting it", node_name);
 
-            /// We recursively delete all nodes except entry/finished to prevent staled hosts from
-            /// creating entry/active node (see createStatusDirs(...))
+            /// We recursively delete all nodes except node_path/finished to prevent staled hosts from
+            /// creating node_path/active node (see createStatusDirs(...))
             zookeeper->tryRemoveChildrenRecursive(node_path, "finished");
 
-            /// And then we remove entry and entry/finished in a single transaction
+            /// And then we remove node_path and node_path/finished in a single transaction
             Coordination::Requests ops;
+            Coordination::Responses res;
+            ops.emplace_back(zkutil::makeCheckRequest(node_path, -1));  /// See a comment below
             ops.emplace_back(zkutil::makeRemoveRequest(fs::path(node_path) / "finished", -1));
             ops.emplace_back(zkutil::makeRemoveRequest(node_path, -1));
-            zookeeper->multi(ops);
+            auto rm_entry_res = zookeeper->tryMulti(ops, res);
+            if (rm_entry_res == Coordination::Error::ZNONODE)
+            {
+                /// Most likely both node_path/finished and node_path were removed concurrently.
+                bool entry_removed_concurrently = res[0]->error == Coordination::Error::ZNONODE;
+                if (entry_removed_concurrently)
+                    continue;
+
+                /// Possible rare case: initiator node has lost connection after enqueueing entry and failed to create status dirs.
+                /// No one has started to process the entry, so node_path/active and node_path/finished nodes were never created, node_path has no children.
+                /// Entry became outdated, but we cannot remove remove it in a transaction with node_path/finished.
+                assert(res[0]->error == Coordination::Error::ZOK && res[1]->error == Coordination::Error::ZNONODE);
+                rm_entry_res = zookeeper->tryRemove(node_path);
+                assert(rm_entry_res != Coordination::Error::ZNOTEMPTY);
+                continue;
+            }
+            zkutil::KeeperMultiException::check(rm_entry_res, ops, res);
         }
         catch (...)
         {
@@ -828,12 +846,27 @@ void DDLWorker::createStatusDirs(const std::string & node_path, const ZooKeeperP
 
     Coordination::Responses responses;
     Coordination::Error code = zookeeper->tryMulti(ops, responses);
+
     bool both_created = code == Coordination::Error::ZOK;
+
+    /// Failed on attempt to create node_path/active because it exists, so node_path/finished must exist too
     bool both_already_exists = responses.size() == 2 && responses[0]->error == Coordination::Error::ZNODEEXISTS
-                                                     && responses[1]->error == Coordination::Error::ZNODEEXISTS;
+                                                     && responses[1]->error == Coordination::Error::ZRUNTIMEINCONSISTENCY;
+    assert(!both_already_exists || (zookeeper->exists(fs::path(node_path) / "active") && zookeeper->exists(fs::path(node_path) / "finished")));
+
+    /// Failed on attempt to create node_path/finished, but node_path/active does not exist
+    bool is_currently_deleting = responses.size() == 2 && responses[0]->error == Coordination::Error::ZOK
+                                                       && responses[1]->error == Coordination::Error::ZNODEEXISTS;
     if (both_created || both_already_exists)
         return;
-    throw Coordination::Exception(code);
+
+    if (is_currently_deleting)
+        throw Exception(ErrorCodes::UNFINISHED, "Cannot create status dirs for {}, "
+                        "most likely because someone is deleting it concurrently", node_path);
+
+    /// Connection lost or entry was removed
+    assert(Coordination::isHardwareError(code) || code == Coordination::Error::ZNONODE);
+    zkutil::KeeperMultiException::check(code, ops, responses);
 }
 
 
From 0854dccfdef61a1b8247f37bc2c5f716ed834b2d Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Thu, 11 Feb 2021 18:21:21 +0300
Subject: [PATCH 0981/1238] Fix ANTLR parser tests

---
 src/Interpreters/RewriteCountVariantsVisitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/RewriteCountVariantsVisitor.cpp b/src/Interpreters/RewriteCountVariantsVisitor.cpp
index f7cce82a478..63bf5fce2e8 100644
--- a/src/Interpreters/RewriteCountVariantsVisitor.cpp
+++ b/src/Interpreters/RewriteCountVariantsVisitor.cpp
@@ -23,7 +23,7 @@ void RewriteCountVariantsVisitor::visit(ASTPtr & node)
 
 void RewriteCountVariantsVisitor::visit(ASTFunction & func)
 {
-    if (func.arguments->children.empty() || func.arguments->children.size() > 1 || !func.arguments->children[0])
+    if (!func.arguments || func.arguments->children.empty() || func.arguments->children.size() > 1 || !func.arguments->children[0])
         return;
 
     auto name = Poco::toLower(func.name);

From bf4af9713c7f81e372769ef84d089ea97fe39862 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 11 Feb 2021 18:29:08 +0300
Subject: [PATCH 0982/1238] Fix tests

---
 .../01715_background_checker_blather_zookeeper.sql          | 6 ++++--
 tests/queries/skip_list.json                                | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql
index a1868dddf22..66b53369517 100644
--- a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql
+++ b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql
@@ -2,11 +2,11 @@ DROP TABLE IF EXISTS i20203_1;
 DROP TABLE IF EXISTS i20203_2;
 
 CREATE TABLE i20203_1 (a Int8)
-ENGINE = ReplicatedMergeTree('/clickhouse/tables/01715_background_checker/i20203','r1')
+ENGINE = ReplicatedMergeTree('/clickhouse/01715_background_checker_i20203', 'r1')
 ORDER BY tuple();
 
 CREATE TABLE i20203_2 (a Int8)
-ENGINE = ReplicatedMergeTree('/clickhouse/tables/01715_background_checker/i20203','r2')
+ENGINE = ReplicatedMergeTree('/clickhouse/01715_background_checker_i20203', 'r2')
 ORDER BY tuple();
 
 DETACH TABLE i20203_2;
@@ -22,5 +22,7 @@ SELECT num_tries < 50
 FROM system.replication_queue
 WHERE table = 'i20203_2' AND database = currentDatabase();
 
+ATTACH TABLE i20203_1;
+
 DROP TABLE IF EXISTS i20203_1;
 DROP TABLE IF EXISTS i20203_2;
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index d76603bf633..53fcfe8b13f 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -572,6 +572,7 @@
         "01603_rename_overwrite_bug",
         "01646_system_restart_replicas_smoke", // system restart replicas is a global query
         "01676_dictget_in_default_expression",
+        "01715_background_checker_blather_zookeeper",
         "attach",
         "ddl_dictionaries",
         "dictionary",

From d8f9a8d3cd899b9f50fdcc1bf59938ff198863ca Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 11 Feb 2021 18:41:54 +0300
Subject: [PATCH 0983/1238] first_value and last_value

---
 .../AggregateFunctionAny.cpp                   |  8 ++++++++
 src/Processors/Transforms/WindowTransform.cpp  |  2 +-
 .../01591_window_functions.reference           | 18 ++++++++++++++++++
 .../0_stateless/01591_window_functions.sql     |  9 +++++++++
 4 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionAny.cpp b/src/AggregateFunctions/AggregateFunctionAny.cpp
index 0aeb2548af9..8b18abae884 100644
--- a/src/AggregateFunctions/AggregateFunctionAny.cpp
+++ b/src/AggregateFunctions/AggregateFunctionAny.cpp
@@ -34,6 +34,14 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
     factory.registerFunction("any", { createAggregateFunctionAny, properties });
     factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties });
     factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties });
+
+    // Synonyms for use as window functions.
+    factory.registerFunction("first_value",
+        { createAggregateFunctionAny, properties },
+        AggregateFunctionFactory::CaseInsensitive);
+    factory.registerFunction("last_value",
+        { createAggregateFunctionAnyLast, properties },
+        AggregateFunctionFactory::CaseInsensitive);
 }
 
 }
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 90c5deba395..995efd8fae4 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -1367,7 +1367,7 @@ struct WindowFunctionLagLead final : public WindowFunction
         // or track a separate frame for these functions, which would  make the
         // window transform completely impenetrable to human mind. Our best bet
         // is probably rewriting, say, `lag(value, offset)` to
-        // `any(value) over rows between offset preceding and offset preceding`,
+        // `any(value) over (rows between offset preceding and offset preceding)`,
         // at the query planning stage. We can keep this class as a stub for
         // parsing, anyway.
         throw Exception(ErrorCodes::NOT_IMPLEMENTED,
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index c29c496397b..b0ddff0a824 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -968,3 +968,21 @@ select
     lead(2)(number) over (),
     lag(number) over ()
 from numbers(2); -- { serverError 48 }
+select
+    number,
+    fIrSt_VaLue(number) over w,
+    lAsT_vAlUe(number) over w
+from numbers(10)
+window w as (order by number range between 1 preceding and 1 following)
+order by number
+;
+0	0	1
+1	0	2
+2	1	3
+3	2	4
+4	3	5
+5	4	6
+6	5	7
+7	6	8
+8	7	9
+9	8	9
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 11fb2295b27..009807721d2 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -334,3 +334,12 @@ select
     lead(2)(number) over (),
     lag(number) over ()
 from numbers(2); -- { serverError 48 }
+
+select
+    number,
+    fIrSt_VaLue(number) over w,
+    lAsT_vAlUe(number) over w
+from numbers(10)
+window w as (order by number range between 1 preceding and 1 following)
+order by number
+;

From a2943fd196e05de9e8c38dd056fb9a42b1ba7fd6 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 11 Feb 2021 18:47:52 +0300
Subject: [PATCH 0984/1238] cleanpu

---
 src/Processors/Transforms/WindowTransform.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 995efd8fae4..918b72b3dc5 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -21,7 +21,8 @@ namespace ErrorCodes
 // true window functions, and the fact that the WindowTransform internals are
 // pretty much well defined in domain terms (e.g. frame boundaries), this is
 // somewhat acceptable.
-class IWindowFunction {
+class IWindowFunction
+{
 public:
     virtual ~IWindowFunction() {}
 

From 369dc613c432ad89924e65e34c1152282df3f732 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Thu, 11 Feb 2021 19:05:17 +0300
Subject: [PATCH 0985/1238] Decimal binary operation constants fix

---
 src/Functions/FunctionBinaryArithmetic.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index f61c9c91d00..bb85ae32622 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -894,9 +894,8 @@ class FunctionBinaryArithmetic : public IFunction
             const NativeResultType const_b = helperGetOrConvert<T1, ResultDataType>(col_right_const, right);
 
             const ResultType res = check_decimal_overflow
-                // the arguments are already scaled after conversion
-                ? OpImplCheck::template process<left_is_decimal, right_is_decimal>(const_a, const_b, 1, 1)
-                : OpImpl::template process<left_is_decimal, right_is_decimal>(const_a, const_b, 1, 1);
+                ? OpImplCheck::template process<left_is_decimal, right_is_decimal>(const_a, const_b, scale_a, scale_b)
+                : OpImpl::template process<left_is_decimal, right_is_decimal>(const_a, const_b, scale_a, scale_b);
 
             if constexpr (result_is_decimal)
                 return ResultDataType(type.getPrecision(), type.getScale()).createColumnConst(

From 248a06f930e57f50938f99395fbb6ce8ec17b109 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Thu, 11 Feb 2021 19:11:07 +0300
Subject: [PATCH 0986/1238] Added test

---
 .../0_stateless/01711_decimal_multiplication.reference        | 4 ++++
 tests/queries/0_stateless/01711_decimal_multiplication.sql    | 4 ++++
 2 files changed, 8 insertions(+)
 create mode 100644 tests/queries/0_stateless/01711_decimal_multiplication.reference
 create mode 100644 tests/queries/0_stateless/01711_decimal_multiplication.sql

diff --git a/tests/queries/0_stateless/01711_decimal_multiplication.reference b/tests/queries/0_stateless/01711_decimal_multiplication.reference
new file mode 100644
index 00000000000..37869329ca4
--- /dev/null
+++ b/tests/queries/0_stateless/01711_decimal_multiplication.reference
@@ -0,0 +1,4 @@
+2.0000
+2.0000
+2.0000
+2.0000
diff --git a/tests/queries/0_stateless/01711_decimal_multiplication.sql b/tests/queries/0_stateless/01711_decimal_multiplication.sql
new file mode 100644
index 00000000000..10d23599b4d
--- /dev/null
+++ b/tests/queries/0_stateless/01711_decimal_multiplication.sql
@@ -0,0 +1,4 @@
+SELECT materialize(toDecimal64(4,4)) - materialize(toDecimal32(2,2));
+SELECT toDecimal64(4,4) - materialize(toDecimal32(2,2));
+SELECT materialize(toDecimal64(4,4)) - toDecimal32(2,2);
+SELECT toDecimal64(4,4) - toDecimal32(2,2);

From 62b3bf7b57bafbe3557135cbc49e9359d2a1d904 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 11 Feb 2021 19:20:57 +0300
Subject: [PATCH 0987/1238] some tests and speedup

---
 src/Processors/Transforms/WindowTransform.cpp | 12 ++++++----
 tests/performance/window_functions.xml        | 24 +++++++++++++++++++
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 918b72b3dc5..1b35bb5f4f8 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -156,6 +156,7 @@ WindowTransform::WindowTransform(const Block & input_header_,
             workspace.argument_column_indices.push_back(
                 input_header.getPositionByName(argument_name));
         }
+        workspace.argument_columns.assign(f.argument_names.size(), nullptr);
 
         workspace.window_function_impl = aggregate_function->asWindowFunction();
         if (!workspace.window_function_impl)
@@ -809,12 +810,15 @@ void WindowTransform::updateAggregationState()
         {
             auto & block = blockAt(block_number);
 
-            ws.argument_columns.clear();
-            for (const auto i : ws.argument_column_indices)
+            if (ws.cached_block_number != block_number)
             {
-                ws.argument_columns.push_back(block.input_columns[i].get());
+                for (size_t i = 0; i < ws.argument_column_indices.size(); ++i)
+                {
+                    ws.argument_columns[i] = block.input_columns[
+                        ws.argument_column_indices[i]].get();
+                }
+                ws.cached_block_number = block_number;
             }
-            ws.cached_block_number = block_number;
 
             // First and last blocks may be processed partially, and other blocks
             // are processed in full.
diff --git a/tests/performance/window_functions.xml b/tests/performance/window_functions.xml
index 74df2b64a3b..622e349d060 100644
--- a/tests/performance/window_functions.xml
+++ b/tests/performance/window_functions.xml
@@ -86,4 +86,28 @@
         format Null
     </query>
 
+    <!-- Some synthetic tests.-->
+    <query>
+        select
+            min(number) over w,
+            count(*) over w,
+            max(number) over w
+        from
+            (select number, intDiv(number, 1111) p, mod(number, 111) o
+                from numbers(10000000)) t
+        window w as (partition by p order by o)
+        format Null
+    </query>
+
+    <query>
+        select
+            first_value(number) over w,
+            dense_rank() over w
+        from
+            (select number, intDiv(number, 1111) p, mod(number, 111) o
+                from numbers(10000000)) t
+        window w as (partition by p order by o)
+        format Null
+    </query>
+
 </test>

From a77415781f13af0c32a19e12f550186674e8b563 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Thu, 11 Feb 2021 19:48:27 +0300
Subject: [PATCH 0988/1238] reserve the result columns in advance

---
 src/Processors/Transforms/WindowTransform.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 1b35bb5f4f8..5bd11db2cc2 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -883,6 +883,10 @@ void WindowTransform::appendChunk(Chunk & chunk)
         auto & block = blocks.back();
         block.input_columns = chunk.detachColumns();
 
+        // Even in case of `count() over ()` we should have a dummy input column.
+        // Not sure how reliable this is...
+        block.rows = block.input_columns[0]->size();
+
         for (auto & ws : workspaces)
         {
             // Aggregate functions can't work with constant columns, so we have to
@@ -896,11 +900,8 @@ void WindowTransform::appendChunk(Chunk & chunk)
 
             block.output_columns.push_back(ws.aggregate_function->getReturnType()
                 ->createColumn());
+            block.output_columns.back()->reserve(block.rows);
         }
-
-        // Even in case of `count() over ()` we should have a dummy input column.
-        // Not sure how reliable this is...
-        block.rows = block.input_columns[0]->size();
     }
 
     // Start the calculations. First, advance the partition end.

From 29073854009e3894113e5693093236376c68b8e4 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 11 Feb 2021 21:07:37 +0300
Subject: [PATCH 0989/1238] Avoid invalid dereference in RANGE_HASHED()
 dictionary

UBsan report the following [1], when query does not contains any columns
from the dictionary:

```sql
SELECT
    toUInt32(toUInt32(NULL, toUInt32(NULL, inf, NULL), NULL)),
    toUInt32(toUInt32(toUInt32(toUInt32(toUInt32(NULL, 1., NULL)), toUInt32(toUInt32(NULL, 0.5, NULL)), toUInt32(NULL, NULL)), toUInt32(toUInt32(NULL, 1., NULL)), toUInt32(NULL, NULL)), toUInt32(toUInt32(toUInt32(toUInt32(NULL, 1000.0001220703125, NULL)), toUInt32(toUInt32(NULL, 10.000100135803223, NULL)), toUInt32(NULL, NULL)), NULL, NULL, NULL))
FROM somedict
```

```
std::__1::vector<DB::ColumnWithTypeAndName, std::__1::allocator<DB::ColumnWithTypeAndName> >::back() @ 0x128c07a6 in /workspace/clickhouse
./obj-x86_64-linux-gnu/../src/Dictionaries/RangeDictionaryBlockInputStream.h:0: DB::RangeDictionaryBlockInputStream<DB::RangeHashedDictionary, unsigned short, unsigned long>::fillBlock(DB::PODArray<unsigned long, 4096ul, Allocator<false, false>, 15ul, 16ul> const&, DB::PODArray<unsigned short, 4096ul, Allocator<false, false>, 15ul, 16ul> const&, DB::PODArray<unsigned short, 4096ul, Allocator<false, false>, 15ul, 16ul> const&) const @ 0x1692335e in /workspace/clickhouse
./obj-x86_64-linux-gnu/../src/Dictionaries/RangeDictionaryBlockInputStream.h:0: DB::RangeDictionaryBlockInputStream<DB::RangeHashedDictionary, unsigned short, unsigned long>::getBlock(unsigned long, unsigned long) const @ 0x16922f96 in /workspace/clickhouse
./obj-x86_64-linux-gnu/../src/Dictionaries/DictionaryBlockInputStreamBase.cpp:23: DB::DictionaryBlockInputStreamBase::getHeader() const @ 0x166ab57c in /workspace/clickhouse
```

  [1]: https://clickhouse-test-reports.s3.yandex.net/19451/64c0bf98290362fa216c05b070aa122a12af3c25/fuzzer_ubsan/report.html#fail1
---
 src/Dictionaries/RangeDictionaryBlockInputStream.h     | 10 ++++++----
 .../01125_dict_ddl_cannot_add_column.reference         |  1 +
 .../0_stateless/01125_dict_ddl_cannot_add_column.sql   |  5 ++++-
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/Dictionaries/RangeDictionaryBlockInputStream.h b/src/Dictionaries/RangeDictionaryBlockInputStream.h
index 3da43c85c45..ccd77d49e0f 100644
--- a/src/Dictionaries/RangeDictionaryBlockInputStream.h
+++ b/src/Dictionaries/RangeDictionaryBlockInputStream.h
@@ -47,7 +47,8 @@ private:
         const std::string & default_name,
         const std::unordered_set<std::string> & column_names_set,
         const PaddedPODArray<T> & values,
-        ColumnsWithTypeAndName & columns) const;
+        ColumnsWithTypeAndName & columns,
+        bool force = false) const;
 
     Block fillBlock(
         const PaddedPODArray<Key> & ids_to_fill,
@@ -121,13 +122,14 @@ void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecial
     const std::string & default_name,
     const std::unordered_set<std::string> & column_names_set,
     const PaddedPODArray<T> & values,
-    ColumnsWithTypeAndName & columns) const
+    ColumnsWithTypeAndName & columns,
+    bool force) const
 {
     std::string name = default_name;
     if (attribute)
         name = attribute->name;
 
-    if (column_names_set.find(name) != column_names_set.end())
+    if (force || column_names_set.find(name) != column_names_set.end())
         columns.emplace_back(getColumnFromPODArray(values), type, name);
 }
 
@@ -159,7 +161,7 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock
 
     std::unordered_set<std::string> names(column_names.begin(), column_names.end());
 
-    addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns);
+    addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns, true);
     auto ids_column = columns.back().column;
     addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns);
     addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns);
diff --git a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference
index 1a9e5685a6a..71be9c3fb5b 100644
--- a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference
+++ b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference
@@ -1,3 +1,4 @@
 1	2019-01-05	2020-01-10	1
+1
 date_table
 somedict
diff --git a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql
index 6ad76ee5a7e..471fd7959a9 100644
--- a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql
+++ b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql
@@ -29,6 +29,9 @@ LIFETIME(MIN 300 MAX 360);
 
 SELECT * from somedict;
 
+-- No dictionary columns
+SELECT 1 FROM somedict;
+
 SHOW TABLES;
 
-DROP DATABASE IF EXISTS database_for_dict;
+DROP DATABASE database_for_dict;

From f6cfcd4da9da90394bcdce3bb7100ed90a2c3804 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 11 Feb 2021 21:04:14 +0300
Subject: [PATCH 0990/1238] Fix null dereference with join_use_nulls=1

Found with MSan [1], the following query triggers null dereference:

```sql
SELECT
    Y.id - 1
FROM X
RIGHT JOIN Y ON (X.id + 1) = Y.id
SETTINGS join_use_nulls=1; -- { serverError 53 }
```

```
Received signal 11
(version 21.3.1.5916, build id: 2E9E84AA32AEAAC7C8B6EB45DA3EC0B4F15E9ED4) (from thread 100) (query_id: 9ab8cb0d-be8d-445e-8498-930a7268488b) Received signal Segmentation fault (11)
Address: 0x10 Access: read. Address not mapped to object.
Stack trace: 0x2d079d65 0x29bf1f30 0x12b12220 0x12b13098 0x12b17b08 0x12b20459 0x2ae37913 0x2ae352d9 0x2c746072 0x2c7585dd 0x2c7483e3 0x2c74e63d 0x2c7483e3 0x2c74e63d 0x2c7483e3 0x2c74e63d 0x2c7483e3 0x2c74e63d 0x2c7483e3 0x2c74e63d 0x2c7483e3 0x2c74e63d 0x2c7483e3 0x2c74e63d 0x2c7483e3 0x2c74e63d 0x2c7483e3 0x2c74e63d
4. ./obj-x86_64-linux-gnu/../contrib/boost/boost/smart_ptr/intrusive_ptr.hpp:0: DB::ColumnConst::ColumnConst(COW<DB::IColumn>::immutable_ptr<DB::IColumn> const&, unsigned long) @ 0x2d079d65 in /workspace/clickhouse
5. ./obj-x86_64-linux-gnu/../src/Common/COW.h:0: DB::createBlockWithNestedColumns(std::__1::vector<DB::ColumnWithTypeAndName, std::__1::allocator<DB::ColumnWithTypeAndName> > const&) @ 0x29bf1f30 in /workspace/clickhouse
6. DB::FunctionOverloadResolverAdaptor::getReturnTypeDefaultImplementationForNulls(std::__1::vector<DB::ColumnWithTypeAndName, std::__1::allocator<DB::ColumnWithTypeAndName> > const&, std::__1::function<std::__1::shared_ptr<DB::IDataType const> (std::__1::vector<DB::ColumnWithTypeAndName, std::__1::allocator<DB::ColumnWithTypeAndName> > const&)> const&) @ 0x12b12220 in /workspace/clickhouse
7. DB::FunctionOverloadResolverAdaptor::getReturnTypeWithoutLowCardinality(std::__1::vector<DB::ColumnWithTypeAndName, std::__1::allocator<DB::ColumnWithTypeAndName> > const&) const @ 0x12b13098 in /workspace/clickhouse
8. DB::FunctionOverloadResolverAdaptor::getReturnType(std::__1::vector<DB::ColumnWithTypeAndName, std::__1::allocator<DB::ColumnWithTypeAndName> > const&) const @ 0x12b17b08 in /workspace/clickhouse
9. DB::FunctionOverloadResolverAdaptor::build(std::__1::vector<DB::ColumnWithTypeAndName, std::__1::allocator<DB::ColumnWithTypeAndName> > const&) const @ 0x12b20459 in /workspace/clickhouse
```

  [1]: https://clickhouse-test-reports.s3.yandex.net/19451/64c0bf98290362fa216c05b070aa122a12af3c25/fuzzer_msan/report.html#fail1
---
 src/Functions/FunctionHelpers.cpp                 | 15 +++++++++++++--
 .../0_stateless/01710_join_use_nulls.reference    |  0
 .../queries/0_stateless/01710_join_use_nulls.sql  | 15 +++++++++++++++
 3 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/01710_join_use_nulls.reference
 create mode 100644 tests/queries/0_stateless/01710_join_use_nulls.sql

diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp
index d64646ecaf1..17c28ee3343 100644
--- a/src/Functions/FunctionHelpers.cpp
+++ b/src/Functions/FunctionHelpers.cpp
@@ -70,8 +70,19 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName
             }
             else if (const auto * const_column = checkAndGetColumn<ColumnConst>(*col.column))
             {
-                const auto & nested_col = checkAndGetColumn<ColumnNullable>(const_column->getDataColumn())->getNestedColumnPtr();
-                res.emplace_back(ColumnWithTypeAndName{ ColumnConst::create(nested_col, col.column->size()), nested_type, col.name});
+                const auto * nullable_column = checkAndGetColumn<ColumnNullable>(const_column->getDataColumn());
+
+                ColumnPtr nullable_res;
+                if (nullable_column)
+                {
+                    const auto & nested_col = nullable_column->getNestedColumnPtr();
+                    nullable_res = ColumnConst::create(nested_col, col.column->size());
+                }
+                else
+                {
+                    nullable_res = makeNullable(col.column);
+                }
+                res.emplace_back(ColumnWithTypeAndName{ nullable_res, nested_type, col.name });
             }
             else
                 throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN);
diff --git a/tests/queries/0_stateless/01710_join_use_nulls.reference b/tests/queries/0_stateless/01710_join_use_nulls.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01710_join_use_nulls.sql b/tests/queries/0_stateless/01710_join_use_nulls.sql
new file mode 100644
index 00000000000..2845af8b8ed
--- /dev/null
+++ b/tests/queries/0_stateless/01710_join_use_nulls.sql
@@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS X;
+DROP TABLE IF EXISTS Y;
+
+CREATE TABLE X (id Int) ENGINE=Memory;
+CREATE TABLE Y (id Int) ENGINE=Memory;
+
+-- Type mismatch of columns to JOIN by: plus(id, 1) Int64 at left, Y.id Int32 at right.
+SELECT
+    Y.id - 1
+FROM X
+RIGHT JOIN Y ON (X.id + 1) = Y.id
+SETTINGS join_use_nulls=1; -- { serverError 53 }
+
+DROP TABLE X;
+DROP TABLE Y;

From d3549aca95c1bcdc2b65617afd35f71ee51be4a9 Mon Sep 17 00:00:00 2001
From: Dmitriy <sevirov@yandex-team.ru>
Date: Thu, 11 Feb 2021 21:42:15 +0300
Subject: [PATCH 0991/1238] Fix the description of the table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Поправил описание таблицы.
---
 .../en/operations/system-tables/opentelemetry_span_log.md | 8 ++++++--
 .../ru/operations/system-tables/opentelemetry_span_log.md | 8 ++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md
index 64fd549458a..e45a989742c 100644
--- a/docs/en/operations/system-tables/opentelemetry_span_log.md
+++ b/docs/en/operations/system-tables/opentelemetry_span_log.md
@@ -18,16 +18,20 @@ Columns:
 
 -   `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`.
 
--   `attribute.names` ([Array(String)](../../sql-reference/data-types/array.md)) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard.
+-   `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard.
 
--   `attribute.values` ([Array(String)](../../sql-reference/data-types/array.md)) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard.
+-   `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard.
 
 **Example**
 
+Query:
+
 ``` sql
 SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical;
 ```
 
+Result:
+
 ``` text
 Row 1:
 ──────
diff --git a/docs/ru/operations/system-tables/opentelemetry_span_log.md b/docs/ru/operations/system-tables/opentelemetry_span_log.md
index 5c577eb691d..96555064b0e 100644
--- a/docs/ru/operations/system-tables/opentelemetry_span_log.md
+++ b/docs/ru/operations/system-tables/opentelemetry_span_log.md
@@ -18,16 +18,20 @@
 
 -   `finish_date` ([Date](../../sql-reference/data-types/date.md)) — дата окончания `trace span`.
 
--   `attribute.names` ([Array(String)](../../sql-reference/data-types/array.md)) — имена [атрибутов](https://opentelemetry.io/docs/go/instrumentation/#attributes) в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте [OpenTelemetry](https://opentelemetry.io/).
+-   `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — имена [атрибутов](https://opentelemetry.io/docs/go/instrumentation/#attributes) в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте [OpenTelemetry](https://opentelemetry.io/).
 
--   `attribute.values` ([Array(String)](../../sql-reference/data-types/array.md)) — значения атрибутов в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте `OpenTelemetry`.
+-   `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — значения атрибутов в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте `OpenTelemetry`.
 
 **Пример**
 
+Запрос:
+
 ``` sql
 SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical;
 ```
 
+Результат:
+
 ``` text
 Row 1:
 ──────

From ce1524c4ebaca545feeaa1493d5ae8e66af8dab9 Mon Sep 17 00:00:00 2001
From: sevirov <72220289+sevirov@users.noreply.github.com>
Date: Thu, 11 Feb 2021 22:06:30 +0300
Subject: [PATCH 0992/1238] Update docs/en/operations/settings/settings.md

Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com>
---
 docs/en/operations/settings/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 0554ea79ecd..8f1cb186449 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2599,7 +2599,7 @@ Sets the probability that the ClickHouse can start a trace for executed queries
 Possible values:
 
 -   0 — The trace for a executed queries is disabled (if no parent trace context is supplied).
--   (0, 1) — The probability with which the ClickHouse can start a trace for executed queries (if no parent trace context is supplied). For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries.
+-  Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries.
 -   1 — The trace for all executed queries is enabled.
 
 Default value: `0`.

From 6271709efacad598431127808dae44cd1ac6e0bb Mon Sep 17 00:00:00 2001
From: Dmitriy <sevirov@yandex-team.ru>
Date: Thu, 11 Feb 2021 22:23:19 +0300
Subject: [PATCH 0993/1238] Fix the description of the setting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Поправил описание настройки.
---
 docs/en/operations/settings/settings.md | 2 +-
 docs/ru/operations/settings/settings.md | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 8f1cb186449..6f028b00a5b 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2599,7 +2599,7 @@ Sets the probability that the ClickHouse can start a trace for executed queries
 Possible values:
 
 -   0 — The trace for a executed queries is disabled (if no parent trace context is supplied).
--  Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries.
+-   Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries.
 -   1 — The trace for all executed queries is enabled.
 
 Default value: `0`.
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 47e2666e652..434157401fa 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -2475,12 +2475,12 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
 
 ## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability}
 
-Задает вероятность того, что ClickHouse начнет трассировку для выполненных запросов (если не указан [родительский контекст](https://www.w3.org/TR/trace-context/) трассировки).
+Задает вероятность того, что ClickHouse начнет трассировку для выполненных запросов (если не указан [входящий контекст](https://www.w3.org/TR/trace-context/) трассировки).
 
 Возможные значения:
 
--   0 — трассировка для выполненных запросов отключена (если не указан родительский контекст трассировки).
--   (0, 1) — вероятность, с которой ClickHouse начнет трассировку для выполненных запросов (если не указан родительский контекст трассировки). Например, при значении настройки, равной `0,5`, ClickHouse начнет трассировку в среднем для половины запросов.
+-   0 — трассировка для выполненных запросов отключена (если не указан входящий контекст трассировки).
+-   Положительное число с плавающей точкой в диапазоне [0..1]. Например, при значении настройки, равной `0,5`, ClickHouse начнет трассировку в среднем для половины запросов.
 -   1 — трассировка для всех выполненных запросов включена.
 
 Значение по умолчанию: `0`.

From 3993ad6f01c6f2f3ffd6eafba9eaad30999f316d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 11 Feb 2021 22:21:46 +0300
Subject: [PATCH 0994/1238] Fix test_system_merges by using mutations_sync=1

After early_constant_folding started to ignore not only ignore(), but
all functions with isSuitableForConstantFolding() == false, there became
more sleep(2) calls for this test:
- MergeTreeDataSelectExecutor::readFromParts -> DB::KeyCondition::KeyCondition
- MergeTreeDataMergerMutator::mutatePartToTemporaryPart -> DB::isStorageTouchedByMutations -> FilterTransform::transform
- MergeTreeDataMergerMutator::mutatePartToTemporaryPart -> DB::MergeTreeDataMergerMutator::mutateAllPartColumns -> FilterTransform::transform

While before it was optimized to 0 during WHERE analysis.
---
 tests/integration/test_system_merges/test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_system_merges/test.py b/tests/integration/test_system_merges/test.py
index 1f2da606cd1..672b637f783 100644
--- a/tests/integration/test_system_merges/test.py
+++ b/tests/integration/test_system_merges/test.py
@@ -134,7 +134,9 @@ def test_mutation_simple(started_cluster, replicated):
         result_part = "all_{}_{}_0_{}".format(starting_block, starting_block, starting_block + 1)
 
         def alter():
-            node1.query("ALTER TABLE {name} UPDATE a = 42 WHERE sleep(2) OR 1".format(name=name))
+            node1.query("ALTER TABLE {name} UPDATE a = 42 WHERE sleep(2) OR 1".format(name=name), settings={
+                'mutations_sync': 1,
+            })
 
         t = threading.Thread(target=alter)
         t.start()
@@ -159,8 +161,6 @@ def test_mutation_simple(started_cluster, replicated):
         ]
         t.join()
 
-        time.sleep(1.5)
-
         assert node_check.query("SELECT * FROM system.merges WHERE table = '{name}'".format(name=table_name)) == ""
 
     finally:

From 9a9f88c5bb26d330f7f64bc2f7ff8fd89f79641b Mon Sep 17 00:00:00 2001
From: lehasm <lehasm@gmail.com>
Date: Thu, 11 Feb 2021 23:16:01 +0300
Subject: [PATCH 0995/1238] test markdown

---
 .../sql-reference/aggregate-functions/reference/studentttest.md  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md
index f868e976039..fde6a2ecc01 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md
@@ -24,6 +24,7 @@ The null hypothesis is that means of populations are equal. Normal distribution
 **Returned values**
 
 [Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
+
 -   calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
 -   calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
 

From df181b534e53d64196dfede15a491387cf4f9c63 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 11 Feb 2021 23:29:01 +0300
Subject: [PATCH 0996/1238] Better connection reset

---
 tests/integration/test_testkeeper_multinode/test.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode/test.py
index f161c28ee83..c9bde5c5a02 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode/test.py
@@ -12,7 +12,7 @@ node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1
 node2 = cluster.add_instance('node2', main_configs=['configs/enable_test_keeper2.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
 node3 = cluster.add_instance('node3', main_configs=['configs/enable_test_keeper3.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
 
-from kazoo.client import KazooClient
+from kazoo.client import KazooClient, KazooState
 
 @pytest.fixture(scope="module")
 def started_cluster():
@@ -29,11 +29,13 @@ def smaller_exception(ex):
 
 def get_fake_zk(nodename, timeout=30.0):
     _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
-    def reset_last_zxid_listener(state):
+    def reset_listener(state):
+        nonlocal _fake_zk_instance
         print("Fake zk callback called for state", state)
-        _fake_zk_instance.last_zxid = 0
+        if state != KazooState.CONNECTED:
+            _fake_zk_instance._reset()
 
-        _fake_zk_instance.add_listener(reset_last_zxid_listener)
+    _fake_zk_instance.add_listener(reset_listener)
     _fake_zk_instance.start()
     return _fake_zk_instance
 
@@ -135,7 +137,7 @@ def test_session_expiration(started_cluster):
     try:
         node1_zk = get_fake_zk("node1")
         node2_zk = get_fake_zk("node2")
-        node3_zk = get_fake_zk("node3", timeout=3.0)
+        node3_zk = get_fake_zk("node3", timeout=5.0)
 
         node3_zk.create("/test_ephemeral_node", b"world", ephemeral=True)
 

From b61ce427a883952db600113e5788b1ab6b5a6a65 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 11 Feb 2021 23:59:00 +0300
Subject: [PATCH 0997/1238] Whitespaces

---
 src/Storages/LiveView/StorageLiveView.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp
index cd96ab4ad40..bfec7bffc8c 100644
--- a/src/Storages/LiveView/StorageLiveView.cpp
+++ b/src/Storages/LiveView/StorageLiveView.cpp
@@ -512,8 +512,8 @@ Pipe StorageLiveView::read(
 
     else if (is_periodically_refreshed)
     {
-        Seconds current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
-        Seconds blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
+        Seconds current_time = std::chrono::duration_cast<Seconds>(std::chrono::system_clock::now().time_since_epoch());
+        Seconds blocks_time = std::chrono::duration_cast<Seconds>(getBlocksTime().time_since_epoch());
 
         if ((current_time - periodic_live_view_refresh) >= blocks_time)
             refresh(false);

From 5355175e49b425b754785c411c548c058fd9d100 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 12 Feb 2021 00:26:14 +0300
Subject: [PATCH 0998/1238] Development

---
 src/Columns/IColumn.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index d441e9f7c4e..7697bd116bf 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -358,11 +358,18 @@ public:
     }
 
     /// Compress column in memory to some representation that allows to decompress it back.
-    using Lazy = std::function<Ptr()>;
-    virtual Lazy compress() const
+    /// Return itself if compression is not applicable for this column type.
+    virtual ColumnPtr compress() const
     {
-        /// No compression by default, just wrap the object.
-        return [column = getPtr()] { return column; };
+        /// No compression by default.
+        return getPtr();
+    }
+
+    /// If it's CompressedColumn, decompress it and return.
+    /// Otherwise return itself.
+    virtual ColumnPtr decompress() const
+    {
+        return getPtr();
     }
 
 
@@ -468,10 +475,7 @@ using Columns = std::vector<ColumnPtr>;
 using MutableColumns = std::vector<MutableColumnPtr>;
 
 using ColumnRawPtrs = std::vector<const IColumn *>;
-//using MutableColumnRawPtrs = std::vector<IColumn *>;
 
-using LazyColumn = IColumn::Lazy;
-using LazyColumns = std::vector<LazyColumn>;
 
 template <typename ... Args>
 struct IsMutableColumns;

From b276eac197de02175b15e93ad8ce8e5dd2a541b9 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Fri, 12 Feb 2021 00:54:50 +0300
Subject: [PATCH 0999/1238] Common types template instantiations

---
 src/Columns/ColumnDecimal.cpp      |  6 ++++++
 src/Columns/ColumnDecimal.h        |  6 ++++++
 src/Columns/ColumnVector.cpp       |  1 +
 src/Columns/ColumnVector.h         | 17 +++++++++++++++++
 src/Common/Allocator.cpp           |  5 +++++
 src/Common/Allocator.h             |  5 +++++
 src/Common/PODArray.cpp            | 10 ++++++++++
 src/Common/PODArray.h              | 10 ++++++++++
 src/Common/PODArray_fwd.h          |  2 +-
 src/DataTypes/DataTypeNumberBase.h | 16 ++++++++++++++++
 10 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp
index f6261079287..dc565f5590c 100644
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@@ -30,6 +30,12 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+template class DecimalPaddedPODArray<Decimal32>;
+template class DecimalPaddedPODArray<Decimal64>;
+template class DecimalPaddedPODArray<Decimal128>;
+template class DecimalPaddedPODArray<Decimal256>;
+template class DecimalPaddedPODArray<DateTime64>;
+
 template <typename T>
 int ColumnDecimal<T>::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const
 {
diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h
index 1578633c13d..3844a2af141 100644
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@@ -50,6 +50,12 @@ private:
     UInt32 scale;
 };
 
+extern template class DecimalPaddedPODArray<Decimal32>;
+extern template class DecimalPaddedPODArray<Decimal64>;
+extern template class DecimalPaddedPODArray<Decimal128>;
+extern template class DecimalPaddedPODArray<Decimal256>;
+extern template class DecimalPaddedPODArray<DateTime64>;
+
 /// A ColumnVector for Decimals
 template <typename T>
 class ColumnDecimal final : public COWHelper<ColumnVectorHelper, ColumnDecimal<T>>
diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index a075c10a8a9..ec26500d057 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -535,4 +535,5 @@ template class ColumnVector<Int128>;
 template class ColumnVector<Int256>;
 template class ColumnVector<Float32>;
 template class ColumnVector<Float64>;
+
 }
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index 1b13859bdee..0872aa5859e 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -345,4 +345,21 @@ ColumnPtr ColumnVector<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_
     return res;
 }
 
+/// Prevent template instantiation of ColumnVector for common types
+
+extern template class ColumnVector<UInt8>;
+extern template class ColumnVector<UInt16>;
+extern template class ColumnVector<UInt32>;
+extern template class ColumnVector<UInt64>;
+extern template class ColumnVector<UInt128>;
+extern template class ColumnVector<UInt256>;
+extern template class ColumnVector<Int8>;
+extern template class ColumnVector<Int16>;
+extern template class ColumnVector<Int32>;
+extern template class ColumnVector<Int64>;
+extern template class ColumnVector<Int128>;
+extern template class ColumnVector<Int256>;
+extern template class ColumnVector<Float32>;
+extern template class ColumnVector<Float64>;
+
 }
diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp
index 08c275abfc2..5a66ddb63a2 100644
--- a/src/Common/Allocator.cpp
+++ b/src/Common/Allocator.cpp
@@ -19,3 +19,8 @@
       */
     __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384;
 #endif
+
+template class Allocator<false, false>;
+template class Allocator<true, false>;
+template class Allocator<false, true>;
+template class Allocator<true, true>;
diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h
index a499f4a442b..118ba7b1680 100644
--- a/src/Common/Allocator.h
+++ b/src/Common/Allocator.h
@@ -353,6 +353,11 @@ constexpr size_t allocatorInitialBytes<AllocatorWithStackMemory<
     Base, initial_bytes, Alignment>> = initial_bytes;
 
 
+extern template class Allocator<false, false>;
+extern template class Allocator<true, false>;
+extern template class Allocator<false, true>;
+extern template class Allocator<true, true>;
+
 #if !__clang__
 #pragma GCC diagnostic pop
 #endif
diff --git a/src/Common/PODArray.cpp b/src/Common/PODArray.cpp
index e0b17c8125c..c1edc5bafad 100644
--- a/src/Common/PODArray.cpp
+++ b/src/Common/PODArray.cpp
@@ -6,4 +6,14 @@ namespace DB
 /// Used for left padding of PODArray when empty
 const char empty_pod_array[empty_pod_array_size]{};
 
+template class PODArray<UInt8, 4096, Allocator<false>, 15, 16>;
+template class PODArray<UInt16, 4096, Allocator<false>, 15, 16>;
+template class PODArray<UInt32, 4096, Allocator<false>, 15, 16>;
+template class PODArray<UInt64, 4096, Allocator<false>, 15, 16>;
+
+template class PODArray<Int8, 4096, Allocator<false>, 15, 16>;
+template class PODArray<Int16, 4096, Allocator<false>, 15, 16>;
+template class PODArray<Int32, 4096, Allocator<false>, 15, 16>;
+template class PODArray<Int64, 4096, Allocator<false>, 15, 16>;
+
 }
diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h
index f0cc9df11cd..19b1d61fe85 100644
--- a/src/Common/PODArray.h
+++ b/src/Common/PODArray.h
@@ -725,4 +725,14 @@ void swap(PODArray<T, initial_bytes, TAllocator, pad_right_> & lhs, PODArray<T,
 }
 #pragma GCC diagnostic pop
 
+extern template class PODArray<UInt8, 4096, Allocator<false>, 15, 16>;
+extern template class PODArray<UInt16, 4096, Allocator<false>, 15, 16>;
+extern template class PODArray<UInt32, 4096, Allocator<false>, 15, 16>;
+extern template class PODArray<UInt64, 4096, Allocator<false>, 15, 16>;
+
+extern template class PODArray<Int8, 4096, Allocator<false>, 15, 16>;
+extern template class PODArray<Int16, 4096, Allocator<false>, 15, 16>;
+extern template class PODArray<Int32, 4096, Allocator<false>, 15, 16>;
+extern template class PODArray<Int64, 4096, Allocator<false>, 15, 16>;
+
 }
diff --git a/src/Common/PODArray_fwd.h b/src/Common/PODArray_fwd.h
index f817d2f6dde..22f9230c01c 100644
--- a/src/Common/PODArray_fwd.h
+++ b/src/Common/PODArray_fwd.h
@@ -3,8 +3,8 @@
   * This file contains some using-declarations that define various kinds of
   * PODArray.
   */
-#pragma once
 
+#include <common/types.h>
 #include <Common/Allocator_fwd.h>
 
 namespace DB
diff --git a/src/DataTypes/DataTypeNumberBase.h b/src/DataTypes/DataTypeNumberBase.h
index cbbc203bf4f..7727929ce4d 100644
--- a/src/DataTypes/DataTypeNumberBase.h
+++ b/src/DataTypes/DataTypeNumberBase.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <Common/UInt128.h>
 #include <DataTypes/IDataType.h>
 #include <DataTypes/DataTypeWithSimpleSerialization.h>
 
@@ -70,4 +71,19 @@ public:
     bool canBeInsideLowCardinality() const override { return true; }
 };
 
+extern template class DataTypeNumberBase<UInt8>;
+extern template class DataTypeNumberBase<UInt16>;
+extern template class DataTypeNumberBase<UInt32>;
+extern template class DataTypeNumberBase<UInt64>;
+extern template class DataTypeNumberBase<UInt128>; // base for UUID
+extern template class DataTypeNumberBase<UInt256>;
+extern template class DataTypeNumberBase<Int16>;
+extern template class DataTypeNumberBase<Int8>;
+extern template class DataTypeNumberBase<Int32>;
+extern template class DataTypeNumberBase<Int64>;
+extern template class DataTypeNumberBase<Int128>;
+extern template class DataTypeNumberBase<Int256>;
+extern template class DataTypeNumberBase<Float32>;
+extern template class DataTypeNumberBase<Float64>;
+
 }

From 5a4a5fda208e0887ec4ee32588648058c03eb935 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 12 Feb 2021 01:04:55 +0300
Subject: [PATCH 1000/1238] Update ColumnVector.h

---
 src/Columns/ColumnVector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index 0872aa5859e..586fced88a6 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -345,7 +345,7 @@ ColumnPtr ColumnVector<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_
     return res;
 }
 
-/// Prevent template instantiation of ColumnVector for common types
+/// Prevent implicit template instantiation of ColumnVector for common types
 
 extern template class ColumnVector<UInt8>;
 extern template class ColumnVector<UInt16>;

From c9cf63e958f058098e83c8a46391d249229954db Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Fri, 12 Feb 2021 01:23:40 +0300
Subject: [PATCH 1001/1238] fix

---
 src/Databases/DatabaseAtomic.cpp              |  6 ++
 src/Databases/DatabaseAtomic.h                |  1 +
 src/Databases/DatabaseOnDisk.cpp              | 17 +++--
 src/Databases/DatabaseReplicated.cpp          | 56 ++++++++++++----
 src/Databases/DatabaseReplicatedWorker.cpp    |  4 +-
 src/Interpreters/DDLWorker.cpp                |  2 +-
 .../test_replicated_database/test.py          | 66 ++++++++++++++++++-
 7 files changed, 130 insertions(+), 22 deletions(-)

diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index e6bc3bfcd44..2065e036863 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -362,6 +362,12 @@ void DatabaseAtomic::assertDetachedTableNotInUse(const UUID & uuid)
               ", because it was detached but still used by some query. Retry later.", ErrorCodes::TABLE_ALREADY_EXISTS);
 }
 
+void DatabaseAtomic::setDetachedTableNotInUseForce(const UUID & uuid)
+{
+    std::unique_lock lock{mutex};
+    detached_tables.erase(uuid);
+}
+
 DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables()
 {
     DetachedTables not_in_use;
diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h
index be7227ed8f9..09cdf269b35 100644
--- a/src/Databases/DatabaseAtomic.h
+++ b/src/Databases/DatabaseAtomic.h
@@ -58,6 +58,7 @@ public:
     void tryRemoveSymlink(const String & table_name);
 
     void waitDetachedTableNotInUse(const UUID & uuid) override;
+    void setDetachedTableNotInUseForce(const UUID & uuid);
 
 protected:
     void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path, const String & statement, const Context & query_context) override;
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index 195f57d1bda..24bab42cad2 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -407,6 +407,8 @@ void DatabaseOnDisk::renameTable(
             from_ordinary_to_atomic = true;
         else if (typeid_cast<DatabaseAtomic *>(this) && typeid_cast<DatabaseOrdinary *>(&to_database))
             from_atomic_to_ordinary = true;
+        else if (dynamic_cast<DatabaseAtomic *>(this) && typeid_cast<DatabaseOrdinary *>(&to_database) && getEngineName() == "Replicated")
+            from_atomic_to_ordinary = true;
         else
             throw Exception("Moving tables between databases of different engines is not supported", ErrorCodes::NOT_IMPLEMENTED);
     }
@@ -418,6 +420,7 @@ void DatabaseOnDisk::renameTable(
     /// DatabaseLazy::detachTable may return nullptr even if table exists, so we need tryGetTable for this case.
     StoragePtr table = tryGetTable(table_name, global_context);
     detachTable(table_name);
+    UUID prev_uuid = UUIDHelpers::Nil;
     try
     {
         table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
@@ -430,7 +433,7 @@ void DatabaseOnDisk::renameTable(
         if (from_ordinary_to_atomic)
             create.uuid = UUIDHelpers::generateV4();
         if (from_atomic_to_ordinary)
-            create.uuid = UUIDHelpers::Nil;
+            std::swap(create.uuid, prev_uuid);
 
         if (auto * target_db = dynamic_cast<DatabaseOnDisk *>(&to_database))
             target_db->checkMetadataFilenameAvailability(to_table_name);
@@ -455,12 +458,16 @@ void DatabaseOnDisk::renameTable(
 
     Poco::File(table_metadata_path).remove();
 
-    /// Special case: usually no actions with symlinks are required when detaching/attaching table,
-    /// but not when moving from Atomic database to Ordinary
-    if (from_atomic_to_ordinary && table->storesDataOnDisk())
+    if (from_atomic_to_ordinary)
     {
         auto & atomic_db = assert_cast<DatabaseAtomic &>(*this);
-        atomic_db.tryRemoveSymlink(table_name);
+        /// Special case: usually no actions with symlinks are required when detaching/attaching table,
+        /// but not when moving from Atomic database to Ordinary
+        if (table->storesDataOnDisk())
+            atomic_db.tryRemoveSymlink(table_name);
+        /// Forget about UUID, now it's possible to reuse it for new table
+        DatabaseCatalog::instance().removeUUIDMappingFinally(prev_uuid);
+        atomic_db.setDetachedTableNotInUseForce(prev_uuid);
     }
 }
 
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index b8ce48a4d5c..1756d33958d 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -39,7 +39,7 @@ namespace ErrorCodes
 }
 
 static constexpr const char * DROPPED_MARK = "DROPPED";
-static constexpr const char * BROKEN_TABLE_PREFIX = "_broken_";
+static constexpr const char * BROKEN_TABLES_SUFFIX = "_broken_tables";
 
 
 zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
@@ -312,7 +312,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
 
     Strings tables_to_detach;
     size_t total_tables = 0;
-    auto existing_tables_it = getTablesIterator(global_context, [&](const String & name) { return !startsWith(name, BROKEN_TABLE_PREFIX); });
+    auto existing_tables_it = getTablesIterator(global_context, {});
     while (existing_tables_it->isValid())
     {
         String name = existing_tables_it->name();
@@ -345,30 +345,64 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
         existing_tables_it->next();
         ++total_tables;
     }
+    existing_tables_it.reset();
 
+    String db_name = getDatabaseName();
+    String to_db_name = getDatabaseName() + BROKEN_TABLES_SUFFIX;
     if (total_tables < tables_to_detach.size() * 2)
-        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "Too many tables to detach: {} of {}", tables_to_detach.size(), total_tables);
+        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "Too many tables to recreate: {} of {}", tables_to_detach.size(), total_tables);
     else if (!tables_to_detach.empty())
-        LOG_WARNING(log, "Will DETACH PERMANENTLY {} broken tables to recover replica", tables_to_detach.size());
+    {
+        LOG_WARNING(log, "Will recreate {} broken tables to recover replica", tables_to_detach.size());
+        /// It's too dangerous to automatically drop tables, so we will move them to special database.
+        /// We use Ordinary engine for destination database, because it's the only way to discard table UUID
+        /// and make possible creation of new table with the same UUID.
+        String query = fmt::format("CREATE DATABASE IF NOT EXISTS {} ENGINE=Ordinary", backQuoteIfNeed(to_db_name));
+        Context query_context = global_context;
+        executeQuery(query, query_context, true);
+    }
 
+    size_t dropped_dicts = 0;
+    size_t moved_tables = 0;
+    std::vector<UUID> dropped_tables;
     for (const auto & table_name : tables_to_detach)
     {
-        String to_name = fmt::format("{}_{}_{}_{}", BROKEN_TABLE_PREFIX, table_name, max_log_ptr, thread_local_rng() % 1000);
-        DDLGuardPtr table_guard = DatabaseCatalog::instance().getDDLGuard(getDatabaseName(), std::min(table_name, to_name));
-        DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(getDatabaseName(), std::max(table_name, to_name));
+        String to_name = fmt::format("{}_{}_{}", table_name, max_log_ptr, thread_local_rng() % 1000);
+        assert(db_name < to_db_name);
+        DDLGuardPtr table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, table_name);
+        DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(to_db_name, to_name);
+        if (getDatabaseName() != db_name)
+            throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed, will retry");
 
         if (isDictionaryExist(table_name))
         {
-            /// TODO implement DETACH DICTIONARY PERMANENTLY
+            LOG_DEBUG(log, "Will DROP DICTIONARY {}", backQuoteIfNeed(table_name));
             DatabaseAtomic::removeDictionary(global_context, table_name);
+            ++dropped_dicts;
+        }
+        else if (!tryGetTable(table_name, global_context)->storesDataOnDisk())
+        {
+            LOG_DEBUG(log, "Will DROP TABLE {}, because it does not store data on disk and can be safely dropped", backQuoteIfNeed(table_name));
+            dropped_tables.push_back(tryGetTableUUID(table_name));
+            tryGetTable(table_name, global_context)->shutdown();
+            DatabaseAtomic::dropTable(global_context, table_name, true);
         }
         else
         {
-            DatabaseAtomic::renameTable(global_context, table_name, *this, to_name, false, false);
-            DatabaseAtomic::detachTablePermanently(global_context, to_name);
+            LOG_DEBUG(log, "Will RENAME TABLE {} TO {}.{}", backQuoteIfNeed(table_name), backQuoteIfNeed(to_db_name), backQuoteIfNeed(to_name));
+            auto to_db_ptr = DatabaseCatalog::instance().getDatabase(to_db_name);
+            DatabaseAtomic::renameTable(global_context, table_name, *to_db_ptr, to_name, false, false);
+            ++moved_tables;
         }
     }
 
+    if (!tables_to_detach.empty())
+        LOG_WARNING(log, "Cleaned {} outdated objects: dropped {} dictionaries and {} tables, moved {} tables",
+                    tables_to_detach.size(), dropped_dicts, dropped_tables.size(), moved_tables);
+
+    for (const auto & id : dropped_tables)
+        DatabaseCatalog::instance().waitTableFinallyDropped(id);
+
     for (const auto & name_and_meta : table_name_to_metadata)
     {
         if (isTableExist(name_and_meta.first, global_context))
@@ -535,8 +569,6 @@ void DatabaseReplicated::commitCreateTable(const ASTCreateQuery & query, const S
                        const String & table_metadata_tmp_path, const String & table_metadata_path,
                        const Context & query_context)
 {
-    if (startsWith(query.table, BROKEN_TABLE_PREFIX))
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "It's not allowed to attach broken tables");
     auto txn = query_context.getMetadataTransaction();
     assert(!ddl_worker->isCurrentlyActive() || txn);
     if (txn && txn->is_initial_query)
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index b29a8822c0c..5a350783dcb 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -81,7 +81,7 @@ String DatabaseReplicatedDDLWorker::enqueueQuery(DDLLogEntry & entry)
     return node_path;
 }
 
-String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entry, const Context & query_context)
+String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entry, const Context & /*query_context*/)
 {
     /// NOTE Possibly it would be better to execute initial query on the most up-to-date node,
     /// but it requires more complex logic around /try node.
@@ -106,7 +106,7 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
     task->is_initial_query = true;
 
     LOG_DEBUG(log, "Waiting for worker thread to process all entries before {}", entry_name);
-    UInt64 timeout = query_context.getSettingsRef().distributed_ddl_task_timeout;
+    UInt64 timeout = 600;
     {
         std::unique_lock lock{mutex};
         bool processed = wait_current_task_change.wait_for(lock, std::chrono::seconds(timeout), [&]()
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 9a398df07b5..242ee7ea0e1 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -474,7 +474,7 @@ void DDLWorker::processTask(DDLTaskBase & task)
     auto create_active_res = zookeeper->tryCreate(active_node_path, {}, zkutil::CreateMode::Ephemeral);
     if (create_active_res != Coordination::Error::ZOK)
     {
-        if (create_active_res == Coordination::Error::ZNONODE)
+        if (create_active_res != Coordination::Error::ZNONODE)
             throw Coordination::Exception(create_active_res, active_node_path);
         createStatusDirs(task.entry_path, zookeeper);
         zookeeper->create(active_node_path, {}, zkutil::CreateMode::Ephemeral);
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 04646507ed7..faeb436f279 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -3,7 +3,8 @@ import re
 import pytest
 
 from helpers.cluster import ClickHouseCluster
-from helpers.test_tools import assert_eq_with_retry
+from helpers.test_tools import assert_eq_with_retry, assert_logs_contain
+from helpers.network import PartitionManager
 
 cluster = ClickHouseCluster(__file__)
 
@@ -162,7 +163,7 @@ def test_alters_from_different_replicas(started_cluster):
     assert main_node.query("SELECT shard_num, replica_num, host_name FROM system.clusters WHERE cluster='testdb'") == expected
 
     # test_drop_and_create_replica
-    main_node.query("DROP DATABASE testdb")
+    main_node.query("DROP DATABASE testdb SYNC")
     main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');")
 
     expected = "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n" \
@@ -183,3 +184,64 @@ def test_alters_from_different_replicas(started_cluster):
 
     assert_eq_with_retry(dummy_node, "SELECT CounterID, StartDate, UserID FROM testdb.dist ORDER BY CounterID", expected)
 
+def test_recover_staled_replica(started_cluster):
+    main_node.query("CREATE DATABASE recover ENGINE = Replicated('/clickhouse/databases/recover', 'shard1', 'replica1');")
+    started_cluster.get_kazoo_client('zoo1').set('/clickhouse/databases/recover/logs_to_keep', b'10')
+    dummy_node.query("CREATE DATABASE recover ENGINE = Replicated('/clickhouse/databases/recover', 'shard1', 'replica2');")
+
+    settings = {"distributed_ddl_task_timeout": 0}
+    main_node.query("CREATE TABLE recover.t1 (n int) ENGINE=Memory", settings=settings)
+    dummy_node.query("CREATE TABLE recover.t2 (s String) ENGINE=Memory", settings=settings)
+    main_node.query("CREATE TABLE recover.mt1 (n int) ENGINE=MergeTree order by n", settings=settings)
+    dummy_node.query("CREATE TABLE recover.mt2 (n int) ENGINE=MergeTree order by n", settings=settings)
+    main_node.query("CREATE TABLE recover.rmt1 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings)
+    dummy_node.query("CREATE TABLE recover.rmt2 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings)
+    main_node.query("CREATE DICTIONARY recover.d1 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt1' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())")
+    dummy_node.query("CREATE DICTIONARY recover.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt2' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())")
+
+    for table in ['t1', 't2', 'mt1', 'mt2', 'rmt1', 'rmt2']:
+        main_node.query("INSERT INTO recover.{} VALUES (42)".format(table))
+    for table in ['t1', 't2', 'mt1', 'mt2']:
+        dummy_node.query("INSERT INTO recover.{} VALUES (42)".format(table))
+    for table in ['rmt1', 'rmt2']:
+        main_node.query("SYSTEM SYNC REPLICA recover.{}".format(table))
+
+    with PartitionManager() as pm:
+        pm.drop_instance_zk_connections(dummy_node)
+        dummy_node.query_and_get_error("RENAME TABLE recover.t1 TO recover.m1")
+        main_node.query("RENAME TABLE recover.t1 TO recover.m1", settings=settings)
+        main_node.query("ALTER TABLE recover.mt1  ADD COLUMN m int", settings=settings)
+        main_node.query("ALTER TABLE recover.rmt1 ADD COLUMN m int", settings=settings)
+        main_node.query("DROP DICTIONARY recover.d2", settings=settings)
+        main_node.query("CREATE DICTIONARY recover.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt1' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT());", settings=settings)
+
+        main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings)
+        main_node.query("DROP TABLE recover.tmp", settings=settings)
+        main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings)
+        main_node.query("DROP TABLE recover.tmp", settings=settings)
+        main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings)
+        main_node.query("DROP TABLE recover.tmp", settings=settings)
+        main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings)
+
+    assert main_node.query("SELECT name FROM system.tables WHERE database='recover' ORDER BY name") == "d1\nd2\nm1\nmt1\nmt2\nrmt1\nrmt2\nt2\ntmp\n"
+    query = "SELECT name, uuid, create_table_query FROM system.tables WHERE database='recover' ORDER BY name"
+    expected = main_node.query(query)
+    assert_eq_with_retry(dummy_node, query, expected)
+
+    for table in ['m1', 't2', 'mt1', 'mt2', 'rmt1', 'rmt2', 'd1', 'd2']:
+        assert main_node.query("SELECT (*,).1 FROM recover.{}".format(table)) == "42\n"
+    for table in ['t2', 'rmt1', 'rmt2', 'd1', 'd2', 'mt2']:
+        assert dummy_node.query("SELECT (*,).1 FROM recover.{}".format(table)) == "42\n"
+    for table in ['m1', 'mt1']:
+        assert dummy_node.query("SELECT count() FROM recover.{}".format(table)) == "0\n"
+
+    assert dummy_node.query("SELECT count() FROM system.tables WHERE database='recover_broken_tables'") == "1\n"
+    table = dummy_node.query("SHOW TABLES FROM recover_broken_tables").strip()
+    assert "mt1_22_" in table
+    assert dummy_node.query("SELECT (*,).1 FROM recover_broken_tables.{}".format(table)) == "42\n"
+
+    expected = "Cleaned 3 outdated objects: dropped 1 dictionaries and 1 tables, moved 1 tables"
+    assert_logs_contain(dummy_node, expected)
+
+    dummy_node.query("DROP TABLE recover.tmp")
+

From ed7270dd8bf96e2d67a766f0833d275978791838 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 12 Feb 2021 03:25:00 +0300
Subject: [PATCH 1002/1238] Better interface

---
 src/Columns/ColumnCompressed.cpp |  61 ++++++++++++++++
 src/Columns/ColumnCompressed.h   | 120 +++++++++++++++++++++++++++++++
 src/Columns/ColumnVector.cpp     |  54 ++++----------
 src/Columns/ColumnVector.h       |   2 +-
 src/Columns/IColumn.h            |   4 +-
 src/Storages/StorageMemory.cpp   | 118 ++++++++++++++----------------
 src/Storages/StorageMemory.h     |   7 +-
 7 files changed, 251 insertions(+), 115 deletions(-)
 create mode 100644 src/Columns/ColumnCompressed.cpp
 create mode 100644 src/Columns/ColumnCompressed.h

diff --git a/src/Columns/ColumnCompressed.cpp b/src/Columns/ColumnCompressed.cpp
new file mode 100644
index 00000000000..d7d30745868
--- /dev/null
+++ b/src/Columns/ColumnCompressed.cpp
@@ -0,0 +1,61 @@
+#include <Columns/ColumnCompressed.h>
+
+#pragma GCC diagnostic ignored "-Wold-style-cast"
+
+#include <lz4.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_COMPRESS;
+    extern const int CANNOT_DECOMPRESS;
+}
+
+
+std::shared_ptr<Memory<>> ColumnCompressed::compressBuffer(const void * data, size_t data_size)
+{
+    size_t max_dest_size = LZ4_COMPRESSBOUND(data_size);
+
+    if (max_dest_size > std::numeric_limits<int>::max())
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(data_size));
+
+    Memory<> compressed(max_dest_size);
+
+    auto compressed_size = LZ4_compress_default(
+        reinterpret_cast<const char *>(data),
+        compressed.data(),
+        data_size,
+        max_dest_size);
+
+    if (compressed_size <= 0)
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column");
+
+    /// If compression is inefficient.
+    if (static_cast<size_t>(compressed_size) * 2 > data_size)
+        return {};
+
+    /// Shrink to fit.
+    auto shrank = std::make_shared<Memory<>>(compressed_size);
+    memcpy(shrank->data(), compressed.data(), compressed_size);
+
+    return shrank;
+}
+
+
+void ColumnCompressed::decompressBuffer(
+    const void * compressed_data, void * decompressed_data, size_t compressed_size, size_t decompressed_size)
+{
+    auto processed_size = LZ4_decompress_safe(
+        reinterpret_cast<const char *>(compressed_data),
+        reinterpret_cast<char *>(decompressed_data),
+        compressed_size,
+        decompressed_size);
+
+    if (processed_size <= 0)
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress column");
+}
+
+}
diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h
new file mode 100644
index 00000000000..bd70005ac5d
--- /dev/null
+++ b/src/Columns/ColumnCompressed.h
@@ -0,0 +1,120 @@
+#pragma once
+
+#include <optional>
+#include <Core/Field.h>
+#include <Columns/IColumn.h>
+#include <IO/BufferWithOwnMemory.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+
+/** Wrapper for compressed column data.
+  * The only supported operations are:
+  * - decompress (reconstruct the source column)
+  * - get size in rows or bytes.
+  *
+  * It is needed to implement in-memory compression
+  * - to keep compressed data in Block or pass around.
+  *
+  * It's often beneficial to store compressed data in-memory and decompress on the fly
+  * because it allows to lower memory throughput. More specifically, if:
+  *
+  * decompression speed * num CPU cores >= memory read throughput
+  *
+  * Also in-memory compression allows to keep more data in RAM.
+  */
+class ColumnCompressed : public COWHelper<IColumn, ColumnCompressed>
+{
+public:
+    using Lazy = std::function<ColumnPtr()>;
+
+    ColumnCompressed(size_t rows_, size_t bytes_, Lazy lazy_)
+        : rows(rows_), bytes(bytes_), lazy(lazy_)
+    {
+    }
+
+    const char * getFamilyName() const override { return "Compressed"; }
+
+    size_t size() const override { return rows; }
+    size_t byteSize() const override { return bytes; }
+    size_t allocatedBytes() const override { return bytes; }
+
+    ColumnPtr decompress() const override
+    {
+        return lazy();
+    }
+
+    /** Wrap uncompressed column without compression.
+      * Method can be used when compression is not worth doing.
+      * But returning CompressedColumn is still needed to keep uniform block structure.
+      */
+    static ColumnPtr wrap(ColumnPtr column)
+    {
+        return ColumnCompressed::create(
+            column->size(),
+            column->allocatedBytes(),
+            [column = std::move(column)]{ return column; });
+    }
+
+    /// Helper methods for compression.
+
+    /// If data is not worth to be compressed - returns nullptr. Note: shared_ptr is to allow to be captured by std::function.
+    static std::shared_ptr<Memory<>> compressBuffer(const void * data, size_t data_size);
+
+    static void decompressBuffer(
+        const void * compressed_data, void * decompressed_data, size_t compressed_size, size_t decompressed_size);
+
+    /// All other methods throw exception.
+
+    TypeIndex getDataType() const override { throwMustBeDecompressed(); }
+    Field operator[](size_t) const override { throwMustBeDecompressed(); }
+    void get(size_t, Field &) const override { throwMustBeDecompressed(); }
+    StringRef getDataAt(size_t) const override { throwMustBeDecompressed(); }
+    void insert(const Field &) override { throwMustBeDecompressed(); }
+    void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
+    void insertData(const char *, size_t) override { throwMustBeDecompressed(); }
+    void insertDefault() override { throwMustBeDecompressed(); }
+    void popBack(size_t) override { throwMustBeDecompressed(); }
+    StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeDecompressed(); }
+    const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
+    void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
+    void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); }
+    void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
+    ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
+    ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); }
+    ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); }
+    int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }
+    void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
+    {
+        throwMustBeDecompressed();
+    }
+    void getPermutation(bool, size_t, int, Permutation &) const override { throwMustBeDecompressed(); }
+    void updatePermutation(bool, size_t, int, Permutation &, EqualRanges &) const override { throwMustBeDecompressed(); }
+    ColumnPtr replicate(const Offsets &) const override { throwMustBeDecompressed(); }
+    MutableColumns scatter(ColumnIndex, const Selector &) const override { throwMustBeDecompressed(); }
+    void gather(ColumnGathererStream &) override { throwMustBeDecompressed(); }
+    void getExtremes(Field &, Field &) const override { throwMustBeDecompressed(); }
+    size_t byteSizeAt(size_t) const override { throwMustBeDecompressed(); }
+
+protected:
+    size_t rows;
+    size_t bytes;
+
+    Lazy lazy;
+
+private:
+    [[noreturn]] void throwMustBeDecompressed() const
+    {
+        throw Exception("ColumnCompressed must be decompressed before use", ErrorCodes::LOGICAL_ERROR);
+    }
+};
+
+}
+
diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index 32658eb3e34..324b23eabcc 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -2,6 +2,7 @@
 
 #include <pdqsort.h>
 #include <Columns/ColumnsCommon.h>
+#include <Columns/ColumnCompressed.h>
 #include <DataStreams/ColumnGathererStream.h>
 #include <IO/WriteHelpers.h>
 #include <Common/Arena.h>
@@ -16,9 +17,6 @@
 #include <common/unaligned.h>
 #include <ext/bit_cast.h>
 #include <ext/scope_guard.h>
-#include <lz4.h>
-#include <Compression/LZ4_decompress_faster.h>
-#include <IO/BufferWithOwnMemory.h>
 
 #include <cmath>
 #include <cstring>
@@ -529,51 +527,27 @@ void ColumnVector<T>::getExtremes(Field & min, Field & max) const
 #pragma GCC diagnostic ignored "-Wold-style-cast"
 
 template <typename T>
-LazyColumn ColumnVector<T>::compress() const
+ColumnPtr ColumnVector<T>::compress() const
 {
     size_t source_size = data.size() * sizeof(T);
 
     /// Don't compress small blocks.
     if (source_size < 4096) /// A wild guess.
-        return IColumn::compress();
+        return ColumnCompressed::wrap(this->getPtr());
 
-    size_t max_dest_size = LZ4_COMPRESSBOUND(source_size);
+    auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size);
 
-    if (max_dest_size > std::numeric_limits<int>::max())
-        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(source_size));
+    if (!compressed)
+        return ColumnCompressed::wrap(this->getPtr());
 
-    auto compressed = std::make_shared<Memory<>>(max_dest_size);
-
-    auto compressed_size = LZ4_compress_default(
-        reinterpret_cast<const char *>(data.data()),
-        compressed->data(),
-        source_size,
-        max_dest_size);
-
-    if (compressed_size <= 0)
-        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column");
-
-    /// If compression is inefficient.
-    if (static_cast<size_t>(compressed_size) * 2 > source_size)
-        return IColumn::compress();
-
-    /// Shrink to fit.
-    auto shrank = std::make_shared<Memory<>>(compressed_size);
-    memcpy(shrank->data(), compressed->data(), compressed_size);
-
-    return [compressed = std::move(shrank), column_size = data.size()]
-    {
-        auto res = ColumnVector<T>::create(column_size);
-        auto processed_size = LZ4_decompress_fast(
-            compressed->data(),
-            reinterpret_cast<char *>(res->getData().data()),
-            column_size * sizeof(T));
-
-        if (processed_size <= 0)
-            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress column");
-
-        return res;
-    };
+    return ColumnCompressed::create(data.size(), compressed->size(),
+        [compressed = std::move(compressed), column_size = data.size()]
+        {
+            auto res = ColumnVector<T>::create(column_size);
+            ColumnCompressed::decompressBuffer(
+                compressed->data(), res->getData().data(), compressed->size(), column_size * sizeof(T));
+            return res;
+        });
 }
 
 /// Explicit template instantiations - to avoid code bloat in headers.
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index 4f1cbcafcbc..623a828a110 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -298,7 +298,7 @@ public:
         return typeid(rhs) == typeid(ColumnVector<T>);
     }
 
-    LazyColumn compress() const override;
+    ColumnPtr compress() const override;
 
     /// Replace elements that match the filter with zeroes. If inverted replaces not matched elements.
     void applyZeroMap(const IColumn::Filter & filt, bool inverted = false);
diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index 7697bd116bf..2b4b633f9a5 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -359,7 +359,7 @@ public:
 
     /// Compress column in memory to some representation that allows to decompress it back.
     /// Return itself if compression is not applicable for this column type.
-    virtual ColumnPtr compress() const
+    virtual Ptr compress() const
     {
         /// No compression by default.
         return getPtr();
@@ -367,7 +367,7 @@ public:
 
     /// If it's CompressedColumn, decompress it and return.
     /// Otherwise return itself.
-    virtual ColumnPtr decompress() const
+    virtual Ptr decompress() const
     {
         return getPtr();
     }
diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp
index 20c8a44efd4..01f70db5edd 100644
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@@ -24,7 +24,7 @@ namespace ErrorCodes
 
 class MemorySource : public SourceWithProgress
 {
-    using InitializerFunc = std::function<void(std::shared_ptr<const LazyBlocks> &)>;
+    using InitializerFunc = std::function<void(std::shared_ptr<const Blocks> &)>;
 public:
     /// Blocks are stored in std::list which may be appended in another thread.
     /// We use pointer to the beginning of the list and its current size.
@@ -35,7 +35,7 @@ public:
         Names column_names_,
         const StorageMemory & storage,
         const StorageMetadataPtr & metadata_snapshot,
-        std::shared_ptr<const LazyBlocks> data_,
+        std::shared_ptr<const Blocks> data_,
         std::shared_ptr<std::atomic<size_t>> parallel_execution_index_,
         InitializerFunc initializer_func_ = {})
         : SourceWithProgress(metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals(), storage.getStorageID()))
@@ -44,8 +44,6 @@ public:
         , parallel_execution_index(parallel_execution_index_)
         , initializer_func(std::move(initializer_func_))
     {
-        for (const auto & elem : column_names_and_types)
-            column_positions.push_back(metadata_snapshot->getSampleBlock().getPositionByName(elem.getNameInStorage()));
     }
 
     String getName() const override { return "Memory"; }
@@ -66,25 +64,23 @@ protected:
             return {};
         }
 
-        const LazyBlock & src = (*data)[current_index];
+        const Block & src = (*data)[current_index];
         Columns columns;
         columns.reserve(columns.size());
 
         /// Add only required columns to `res`.
-        size_t i = 0;
         for (const auto & elem : column_names_and_types)
         {
-            auto current_column = src[column_positions[i]]();
+            auto current_column = src.getByName(elem.getNameInStorage()).column;
+            current_column = current_column->decompress();
+
             if (elem.isSubcolumn())
                 columns.emplace_back(elem.getTypeInStorage()->getSubcolumn(elem.getSubcolumnName(), *current_column));
             else
                 columns.emplace_back(std::move(current_column));
-
-            ++i;
         }
 
-        size_t rows = columns.at(0)->size();
-        return Chunk(std::move(columns), rows);
+        return Chunk(std::move(columns), src.rows());
     }
 
 private:
@@ -102,10 +98,9 @@ private:
 
     const NamesAndTypesList column_names_and_types;
     size_t execution_index = 0;
-    std::shared_ptr<const LazyBlocks> data;
+    std::shared_ptr<const Blocks> data;
     std::shared_ptr<std::atomic<size_t>> parallel_execution_index;
     InitializerFunc initializer_func;
-    std::vector<size_t> column_positions;
 };
 
 
@@ -126,31 +121,34 @@ public:
     {
         metadata_snapshot->check(block, true);
 
-        inserted_bytes += block.allocatedBytes();
-        inserted_rows += block.rows();
-
-        Block sample = metadata_snapshot->getSampleBlock();
-
-        LazyColumns lazy_columns;
-        lazy_columns.reserve(sample.columns());
-
-        for (const auto & elem : sample)
+        if (storage.compress)
         {
-            const ColumnPtr & column = block.getByName(elem.name).column;
+            Block compressed_block;
+            for (auto & elem : block)
+                compressed_block.insert({ elem.column->compress(), elem.type, elem.name });
 
-            if (storage.compress)
-                lazy_columns.emplace_back(column->compress());
-            else
-                lazy_columns.emplace_back([=]{ return column; });
+            new_blocks.emplace_back(compressed_block);
+        }
+        else
+        {
+            new_blocks.emplace_back(block);
         }
-
-        new_blocks.emplace_back(std::move(lazy_columns));
     }
 
     void writeSuffix() override
     {
+        size_t inserted_bytes = 0;
+        size_t inserted_rows = 0;
+
+        for (const auto & block : new_blocks)
+        {
+            inserted_bytes += block.allocatedBytes();
+            inserted_rows += block.rows();
+        }
+
         std::lock_guard lock(storage.mutex);
-        auto new_data = std::make_unique<LazyBlocks>(*(storage.data.get()));
+
+        auto new_data = std::make_unique<Blocks>(*(storage.data.get()));
         new_data->insert(new_data->end(), new_blocks.begin(), new_blocks.end());
 
         storage.data.set(std::move(new_data));
@@ -159,9 +157,7 @@ public:
     }
 
 private:
-    LazyBlocks new_blocks;
-    size_t inserted_bytes = 0;
-    size_t inserted_rows = 0;
+    Blocks new_blocks;
 
     StorageMemory & storage;
     StorageMetadataPtr metadata_snapshot;
@@ -173,7 +169,7 @@ StorageMemory::StorageMemory(
     ColumnsDescription columns_description_,
     ConstraintsDescription constraints_,
     bool compress_)
-    : IStorage(table_id_), data(std::make_unique<const LazyBlocks>()), compress(compress_)
+    : IStorage(table_id_), data(std::make_unique<const Blocks>()), compress(compress_)
 {
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(std::move(columns_description_));
@@ -209,7 +205,7 @@ Pipe StorageMemory::read(
             metadata_snapshot,
             nullptr /* data */,
             nullptr /* parallel execution index */,
-            [this](std::shared_ptr<const LazyBlocks> & data_to_initialize)
+            [this](std::shared_ptr<const Blocks> & data_to_initialize)
             {
                 data_to_initialize = data.get();
             }));
@@ -242,18 +238,18 @@ BlockOutputStreamPtr StorageMemory::write(const ASTPtr & /*query*/, const Storag
 
 void StorageMemory::drop()
 {
-    data.set(std::make_unique<LazyBlocks>());
+    data.set(std::make_unique<Blocks>());
     total_size_bytes.store(0, std::memory_order_relaxed);
     total_size_rows.store(0, std::memory_order_relaxed);
 }
 
-static inline void updateBlockData(LazyBlock & old_block, const LazyBlock & new_block, const Block & old_header, const Block & new_header)
+static inline void updateBlockData(Block & old_block, const Block & new_block)
 {
-    size_t i = 0;
-    for (const auto & it : new_header)
+    for (const auto & it : new_block)
     {
-        old_block[old_header.getPositionByName(it.name)] = new_block[i];
-        ++i;
+        auto col_name = it.name;
+        auto & col_with_type_name = old_block.getByName(col_name);
+        col_with_type_name.column = it.column;
     }
 }
 
@@ -265,47 +261,39 @@ void StorageMemory::mutate(const MutationCommands & commands, const Context & co
     auto storage_ptr = DatabaseCatalog::instance().getTable(storage, context);
     auto interpreter = std::make_unique<MutationsInterpreter>(storage_ptr, metadata_snapshot, commands, context, true);
     auto in = interpreter->execute();
-    Block old_header = metadata_snapshot->getSampleBlock();
-    Block mutation_header = in->getHeader();
 
     in->readPrefix();
-    LazyBlocks out;
+    Blocks out;
     while (Block block = in->read())
     {
-        LazyColumns lazy_columns;
+        if (compress)
+            for (auto & elem : block)
+                elem.column = elem.column->compress();
 
-        for (const auto & elem : block)
-        {
-            if (compress)
-                lazy_columns.emplace_back(elem.column->compress());
-            else
-                lazy_columns.emplace_back([=]{ return elem.column; });
-        }
-
-        out.emplace_back(std::move(lazy_columns));
+        out.push_back(block);
     }
     in->readSuffix();
 
-    std::unique_ptr<LazyBlocks> new_data;
+    std::unique_ptr<Blocks> new_data;
 
-    /// All columns affected.
+    // all column affected
     if (interpreter->isAffectingAllColumns())
     {
-        new_data = std::make_unique<LazyBlocks>(out);
+        new_data = std::make_unique<Blocks>(out);
     }
     else
     {
-        /// Just some of the columns affected, we need update it with new column.
-        new_data = std::make_unique<LazyBlocks>(*(data.get()));
+        /// just some of the column affected, we need update it with new column
+        new_data = std::make_unique<Blocks>(*(data.get()));
         auto data_it = new_data->begin();
         auto out_it = out.begin();
 
         while (data_it != new_data->end())
         {
-            /// Mutation does not change the number of blocks.
+            /// Mutation does not change the number of blocks
             assert(out_it != out.end());
 
-            updateBlockData(*data_it, *out_it, old_header, mutation_header);
+            updateBlockData(*data_it, *out_it);
             ++data_it;
             ++out_it;
         }
@@ -313,7 +301,7 @@ void StorageMemory::mutate(const MutationCommands & commands, const Context & co
         assert(out_it == out.end());
     }
 
-/*    size_t rows = 0;
+    size_t rows = 0;
     size_t bytes = 0;
     for (const auto & buffer : *new_data)
     {
@@ -321,8 +309,7 @@ void StorageMemory::mutate(const MutationCommands & commands, const Context & co
         bytes += buffer.bytes();
     }
     total_size_bytes.store(rows, std::memory_order_relaxed);
-    total_size_rows.store(bytes, std::memory_order_relaxed);*/
-
+    total_size_rows.store(bytes, std::memory_order_relaxed);
     data.set(std::move(new_data));
 }
 
@@ -330,7 +317,7 @@ void StorageMemory::mutate(const MutationCommands & commands, const Context & co
 void StorageMemory::truncate(
     const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &)
 {
-    data.set(std::make_unique<LazyBlocks>());
+    data.set(std::make_unique<Blocks>());
     total_size_bytes.store(0, std::memory_order_relaxed);
     total_size_rows.store(0, std::memory_order_relaxed);
 }
@@ -364,7 +351,6 @@ void registerStorageMemory(StorageFactory & factory)
         return StorageMemory::create(args.table_id, args.columns, args.constraints, settings.compress);
     },
     {
-        .supports_settings = true,
         .supports_parallel_insert = true,
     });
 }
diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h
index 97ddfa93d9a..91cf616c57d 100644
--- a/src/Storages/StorageMemory.h
+++ b/src/Storages/StorageMemory.h
@@ -15,11 +15,6 @@
 namespace DB
 {
 
-/// Lazy block contains possibly compressed columns. LazyColumn is std::function that reconstructs Column on call.
-using LazyBlock = LazyColumns;
-using LazyBlocks = std::vector<LazyBlock>;
-
-
 /** Implements storage in the RAM.
   * Suitable for temporary data.
   * It does not support keys.
@@ -101,7 +96,7 @@ public:
 private:
     /// MultiVersion data storage, so that we can copy the list of blocks to readers.
 
-    MultiVersion<LazyBlocks> data;
+    MultiVersion<Blocks> data;
 
     mutable std::mutex mutex;
 

From 71d84b9f67381f3456609caf548a928c5c88cbda Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 12 Feb 2021 03:52:53 +0300
Subject: [PATCH 1003/1238] Fix style

---
 src/Columns/ColumnVector.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index 324b23eabcc..1374b049ccf 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -33,8 +33,6 @@ namespace ErrorCodes
     extern const int PARAMETER_OUT_OF_BOUND;
     extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
     extern const int LOGICAL_ERROR;
-    extern const int CANNOT_COMPRESS;
-    extern const int CANNOT_DECOMPRESS;
 }
 
 template <typename T>

From 170daa5d6514a2a8c78f408ae40c62edc08a15c8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 12 Feb 2021 05:33:39 +0300
Subject: [PATCH 1004/1238] Generate ya.make

---
 src/Columns/ya.make  | 1 +
 src/Storages/ya.make | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/Columns/ya.make b/src/Columns/ya.make
index 2affaeb0fc6..def9dfd4cb7 100644
--- a/src/Columns/ya.make
+++ b/src/Columns/ya.make
@@ -19,6 +19,7 @@ SRCS(
     Collator.cpp
     ColumnAggregateFunction.cpp
     ColumnArray.cpp
+    ColumnCompressed.cpp
     ColumnConst.cpp
     ColumnDecimal.cpp
     ColumnFixedString.cpp
diff --git a/src/Storages/ya.make b/src/Storages/ya.make
index dbf37e58695..e3e1807c566 100644
--- a/src/Storages/ya.make
+++ b/src/Storages/ya.make
@@ -24,6 +24,7 @@ SRCS(
     KeyDescription.cpp
     LiveView/StorageLiveView.cpp
     LiveView/TemporaryLiveViewCleaner.cpp
+    MemorySettings.cpp
     MergeTree/ActiveDataPartSet.cpp
     MergeTree/AllMergeSelector.cpp
     MergeTree/BackgroundJobsExecutor.cpp

From bb2a11bcfd94c525238a768ac10bdeaa1fb1d2b5 Mon Sep 17 00:00:00 2001
From: Olga Revyakina <revolg@yandex-team.ru>
Date: Fri, 12 Feb 2021 07:43:33 +0300
Subject: [PATCH 1005/1238] Misspelling

---
 docs/ru/sql-reference/data-types/map.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/data-types/map.md b/docs/ru/sql-reference/data-types/map.md
index 9c2ffedc4a9..6cb8ccf1143 100644
--- a/docs/ru/sql-reference/data-types/map.md
+++ b/docs/ru/sql-reference/data-types/map.md
@@ -25,7 +25,7 @@ CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
 INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30});
 ```
 
-Выборка всем значений ключа `key2`: 
+Выборка всех значений ключа `key2`: 
 
 ```sql
 SELECT a['key2'] FROM table_map;

From 275a7870bcee5ce55e8ad28b93ab17207a3a7ac7 Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Fri, 12 Feb 2021 09:21:54 +0300
Subject: [PATCH 1006/1238] Update
 docs/ru/sql-reference/functions/ip-address-functions.md

Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
 docs/ru/sql-reference/functions/ip-address-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md
index 68895aac7a6..75ad103a7e6 100644
--- a/docs/ru/sql-reference/functions/ip-address-functions.md
+++ b/docs/ru/sql-reference/functions/ip-address-functions.md
@@ -279,7 +279,7 @@ SELECT isIPv4String('0.0.0.0');
 └─────────────────────────┘
 ```
 
-## isIPv6String {#isipv4string}
+## isIPv6String {#isipv6string}
 
 Определяет, является ли строка адресом IPv6 или нет.
 

From 5ef59032c30f9cc45c6155790245d19637a029c0 Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Fri, 12 Feb 2021 09:21:59 +0300
Subject: [PATCH 1007/1238] Update
 docs/en/sql-reference/functions/ip-address-functions.md

Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com>
---
 docs/en/sql-reference/functions/ip-address-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md
index ab64fdc74d5..616b912b32c 100644
--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@@ -301,7 +301,7 @@ Result:
 └─────────────────────────┘
 ```
 
-## isIPv6String {#isipv4string}
+## isIPv6String {#isipv6string}
 
 Determines whether the input string is an IPv6 address or not. 
 

From 90ba831301c2a63be079dcd741795fc137df84ca Mon Sep 17 00:00:00 2001
From: George <gyuton@yandex-team.ru>
Date: Fri, 12 Feb 2021 09:43:31 +0300
Subject: [PATCH 1008/1238] Fixes

---
 docs/en/sql-reference/functions/ip-address-functions.md | 4 ++--
 docs/ru/sql-reference/functions/ip-address-functions.md | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md
index 616b912b32c..3d03b57bb50 100644
--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@@ -267,7 +267,7 @@ SELECT toIPv6('127.0.0.1')
 
 ## isIPv4String {#isipv4string}
 
-Determines whether the input string is an IPv4 address or not.
+Determines whether the input string is an IPv4 address or not. Also will return `0` if `string` is IPv6 address.
 
 **Syntax**
 
@@ -303,7 +303,7 @@ Result:
 
 ## isIPv6String {#isipv6string}
 
-Determines whether the input string is an IPv6 address or not. 
+Determines whether the input string is an IPv6 address or not. Also will return `0` if `string` is IPv4 address.
 
 **Syntax**
 
diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md
index 75ad103a7e6..6b477e642f1 100644
--- a/docs/ru/sql-reference/functions/ip-address-functions.md
+++ b/docs/ru/sql-reference/functions/ip-address-functions.md
@@ -245,7 +245,7 @@ SELECT
 
 ## isIPv4String {#isipv4string}
 
-Определяет, является ли строка адресом IPv4 или нет.
+Определяет, является ли строка адресом IPv4 или нет. Также вернет `0`, если `string` — адрес IPv6.
 
 **Синтаксис**
 
@@ -281,7 +281,7 @@ SELECT isIPv4String('0.0.0.0');
 
 ## isIPv6String {#isipv6string}
 
-Определяет, является ли строка адресом IPv6 или нет.
+Определяет, является ли строка адресом IPv6 или нет. Также вернет `0`, если `string` — адрес IPv4.
 
 **Синтаксис**
 

From c883b7d154d8f4b87129a2a458ca07187fd900eb Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 12 Feb 2021 11:50:20 +0300
Subject: [PATCH 1009/1238] Split tests to make them stable

---
 .../__init__.py                               |   0
 .../configs/enable_test_keeper1.xml           |   0
 .../configs/enable_test_keeper2.xml           |   0
 .../configs/enable_test_keeper3.xml           |   0
 .../configs/log_conf.xml                      |   0
 .../configs/use_test_keeper.xml               |   0
 .../test.py                                   | 198 ++-------------
 .../__init__.py                               |   1 +
 .../configs/enable_test_keeper1.xml           |  38 +++
 .../configs/enable_test_keeper2.xml           |  38 +++
 .../configs/enable_test_keeper3.xml           |  38 +++
 .../configs/log_conf.xml                      |  12 +
 .../configs/use_test_keeper.xml               |  16 ++
 .../test_testkeeper_multinode_simple/test.py  | 239 ++++++++++++++++++
 14 files changed, 408 insertions(+), 172 deletions(-)
 rename tests/integration/{test_testkeeper_multinode => test_testkeeper_multinode_blocade_leader}/__init__.py (100%)
 rename tests/integration/{test_testkeeper_multinode => test_testkeeper_multinode_blocade_leader}/configs/enable_test_keeper1.xml (100%)
 rename tests/integration/{test_testkeeper_multinode => test_testkeeper_multinode_blocade_leader}/configs/enable_test_keeper2.xml (100%)
 rename tests/integration/{test_testkeeper_multinode => test_testkeeper_multinode_blocade_leader}/configs/enable_test_keeper3.xml (100%)
 rename tests/integration/{test_testkeeper_multinode => test_testkeeper_multinode_blocade_leader}/configs/log_conf.xml (100%)
 rename tests/integration/{test_testkeeper_multinode => test_testkeeper_multinode_blocade_leader}/configs/use_test_keeper.xml (100%)
 rename tests/integration/{test_testkeeper_multinode => test_testkeeper_multinode_blocade_leader}/test.py (69%)
 create mode 100644 tests/integration/test_testkeeper_multinode_simple/__init__.py
 create mode 100644 tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper1.xml
 create mode 100644 tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper2.xml
 create mode 100644 tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper3.xml
 create mode 100644 tests/integration/test_testkeeper_multinode_simple/configs/log_conf.xml
 create mode 100644 tests/integration/test_testkeeper_multinode_simple/configs/use_test_keeper.xml
 create mode 100644 tests/integration/test_testkeeper_multinode_simple/test.py

diff --git a/tests/integration/test_testkeeper_multinode/__init__.py b/tests/integration/test_testkeeper_multinode_blocade_leader/__init__.py
similarity index 100%
rename from tests/integration/test_testkeeper_multinode/__init__.py
rename to tests/integration/test_testkeeper_multinode_blocade_leader/__init__.py
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper1.xml
similarity index 100%
rename from tests/integration/test_testkeeper_multinode/configs/enable_test_keeper1.xml
rename to tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper1.xml
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper2.xml
similarity index 100%
rename from tests/integration/test_testkeeper_multinode/configs/enable_test_keeper2.xml
rename to tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper2.xml
diff --git a/tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper3.xml
similarity index 100%
rename from tests/integration/test_testkeeper_multinode/configs/enable_test_keeper3.xml
rename to tests/integration/test_testkeeper_multinode_blocade_leader/configs/enable_test_keeper3.xml
diff --git a/tests/integration/test_testkeeper_multinode/configs/log_conf.xml b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/log_conf.xml
similarity index 100%
rename from tests/integration/test_testkeeper_multinode/configs/log_conf.xml
rename to tests/integration/test_testkeeper_multinode_blocade_leader/configs/log_conf.xml
diff --git a/tests/integration/test_testkeeper_multinode/configs/use_test_keeper.xml b/tests/integration/test_testkeeper_multinode_blocade_leader/configs/use_test_keeper.xml
similarity index 100%
rename from tests/integration/test_testkeeper_multinode/configs/use_test_keeper.xml
rename to tests/integration/test_testkeeper_multinode_blocade_leader/configs/use_test_keeper.xml
diff --git a/tests/integration/test_testkeeper_multinode/test.py b/tests/integration/test_testkeeper_multinode_blocade_leader/test.py
similarity index 69%
rename from tests/integration/test_testkeeper_multinode/test.py
rename to tests/integration/test_testkeeper_multinode_blocade_leader/test.py
index c9bde5c5a02..899f7212660 100644
--- a/tests/integration/test_testkeeper_multinode/test.py
+++ b/tests/integration/test_testkeeper_multinode_blocade_leader/test.py
@@ -27,6 +27,30 @@ def started_cluster():
 def smaller_exception(ex):
     return '\n'.join(str(ex).split('\n')[0:2])
 
+def wait_node(node):
+    for _ in range(100):
+        zk = None
+        try:
+            node.query("SELECT * FROM system.zookeeper WHERE path = '/'")
+            zk = get_fake_zk(node.name, timeout=30.0)
+            zk.create("/test", sequence=True)
+            print("node", node.name, "ready")
+            break
+        except Exception as ex:
+            time.sleep(0.2)
+            print("Waiting until", node.name, "will be ready, exception", ex)
+        finally:
+            if zk:
+                zk.stop()
+                zk.close()
+    else:
+        raise Exception("Can't wait node", node.name, "to become ready")
+
+def wait_nodes():
+    for node in [node1, node2, node3]:
+        wait_node(node)
+
+
 def get_fake_zk(nodename, timeout=30.0):
     _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
     def reset_listener(state):
@@ -39,182 +63,11 @@ def get_fake_zk(nodename, timeout=30.0):
     _fake_zk_instance.start()
     return _fake_zk_instance
 
-def test_read_write_multinode(started_cluster):
-    try:
-        node1_zk = get_fake_zk("node1")
-        node2_zk = get_fake_zk("node2")
-        node3_zk = get_fake_zk("node3")
-
-        node1_zk.create("/test_read_write_multinode_node1", b"somedata1")
-        node2_zk.create("/test_read_write_multinode_node2", b"somedata2")
-        node3_zk.create("/test_read_write_multinode_node3", b"somedata3")
-
-        # stale reads are allowed
-        while node1_zk.exists("/test_read_write_multinode_node2") is None:
-            time.sleep(0.1)
-
-        while node1_zk.exists("/test_read_write_multinode_node3") is None:
-            time.sleep(0.1)
-
-        while node2_zk.exists("/test_read_write_multinode_node3") is None:
-            time.sleep(0.1)
-
-        assert node3_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1"
-        assert node2_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1"
-        assert node1_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1"
-
-        assert node3_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2"
-        assert node2_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2"
-        assert node1_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2"
-
-        assert node3_zk.get("/test_read_write_multinode_node3")[0] == b"somedata3"
-        assert node2_zk.get("/test_read_write_multinode_node3")[0] == b"somedata3"
-        assert node1_zk.get("/test_read_write_multinode_node3")[0] == b"somedata3"
-
-    finally:
-        try:
-            for zk_conn in [node1_zk, node2_zk, node3_zk]:
-                zk_conn.stop()
-                zk_conn.close()
-        except:
-            pass
-
-
-def test_watch_on_follower(started_cluster):
-    try:
-        node1_zk = get_fake_zk("node1")
-        node2_zk = get_fake_zk("node2")
-        node3_zk = get_fake_zk("node3")
-
-        node1_zk.create("/test_data_watches")
-        node2_zk.set("/test_data_watches", b"hello")
-        node3_zk.set("/test_data_watches", b"world")
-
-        node1_data = None
-        def node1_callback(event):
-            print("node1 data watch called")
-            nonlocal node1_data
-            node1_data = event
-
-        node1_zk.get("/test_data_watches", watch=node1_callback)
-
-        node2_data = None
-        def node2_callback(event):
-            print("node2 data watch called")
-            nonlocal node2_data
-            node2_data = event
-
-        node2_zk.get("/test_data_watches", watch=node2_callback)
-
-        node3_data = None
-        def node3_callback(event):
-            print("node3 data watch called")
-            nonlocal node3_data
-            node3_data = event
-
-        node3_zk.get("/test_data_watches", watch=node3_callback)
-
-        node1_zk.set("/test_data_watches", b"somevalue")
-        time.sleep(3)
-
-        print(node1_data)
-        print(node2_data)
-        print(node3_data)
-
-        assert node1_data == node2_data
-        assert node3_data == node2_data
-
-    finally:
-        try:
-            for zk_conn in [node1_zk, node2_zk, node3_zk]:
-                zk_conn.stop()
-                zk_conn.close()
-        except:
-            pass
-
-
-def test_session_expiration(started_cluster):
-    try:
-        node1_zk = get_fake_zk("node1")
-        node2_zk = get_fake_zk("node2")
-        node3_zk = get_fake_zk("node3", timeout=5.0)
-
-        node3_zk.create("/test_ephemeral_node", b"world", ephemeral=True)
-
-        with PartitionManager() as pm:
-            pm.partition_instances(node3, node2)
-            pm.partition_instances(node3, node1)
-            node3_zk.stop()
-            node3_zk.close()
-            for _ in range(100):
-                if node1_zk.exists("/test_ephemeral_node") is None and node2_zk.exists("/test_ephemeral_node") is None:
-                    break
-                time.sleep(0.1)
-
-        assert node1_zk.exists("/test_ephemeral_node") is None
-        assert node2_zk.exists("/test_ephemeral_node") is None
-
-    finally:
-        try:
-            for zk_conn in [node1_zk, node2_zk, node3_zk]:
-                try:
-                    zk_conn.stop()
-                    zk_conn.close()
-                except:
-                    pass
-        except:
-            pass
-
-
-def test_follower_restart(started_cluster):
-    try:
-        node1_zk = get_fake_zk("node1")
-
-        node1_zk.create("/test_restart_node", b"hello")
-
-        node3.restart_clickhouse(kill=True)
-
-        node3_zk = get_fake_zk("node3")
-
-        # got data from log
-        assert node3_zk.get("/test_restart_node")[0] == b"hello"
-
-    finally:
-        try:
-            for zk_conn in [node1_zk, node3_zk]:
-                try:
-                    zk_conn.stop()
-                    zk_conn.close()
-                except:
-                    pass
-        except:
-            pass
-
-
-def test_simple_replicated_table(started_cluster):
-    # something may be wrong after partition in other tests
-    # so create with retry
-    for i, node in enumerate([node1, node2, node3]):
-        for i in range(100):
-            try:
-                node.query("CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{}') ORDER BY tuple()".format(i + 1))
-                break
-            except:
-                time.sleep(0.1)
-
-    node2.query("INSERT INTO t SELECT number FROM numbers(10)")
-
-    node1.query("SYSTEM SYNC REPLICA t", timeout=10)
-    node3.query("SYSTEM SYNC REPLICA t", timeout=10)
-
-    assert node1.query("SELECT COUNT() FROM t") == "10\n"
-    assert node2.query("SELECT COUNT() FROM t") == "10\n"
-    assert node3.query("SELECT COUNT() FROM t") == "10\n"
-
 
 # in extremely rare case it can take more than 5 minutes in debug build with sanitizer
 @pytest.mark.timeout(600)
 def test_blocade_leader(started_cluster):
+    wait_nodes()
     for i, node in enumerate([node1, node2, node3]):
         node.query("CREATE TABLE t1 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t1', '{}') ORDER BY tuple()".format(i + 1))
 
@@ -337,6 +190,7 @@ def dump_zk(node, zk_path, replica_path):
 # in extremely rare case it can take more than 5 minutes in debug build with sanitizer
 @pytest.mark.timeout(600)
 def test_blocade_leader_twice(started_cluster):
+    wait_nodes()
     for i, node in enumerate([node1, node2, node3]):
         node.query("CREATE TABLE t2 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t2', '{}') ORDER BY tuple()".format(i + 1))
 
diff --git a/tests/integration/test_testkeeper_multinode_simple/__init__.py b/tests/integration/test_testkeeper_multinode_simple/__init__.py
new file mode 100644
index 00000000000..e5a0d9b4834
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper1.xml b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper1.xml
new file mode 100644
index 00000000000..4ad76889d1e
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper1.xml
@@ -0,0 +1,38 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>1</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper2.xml b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper2.xml
new file mode 100644
index 00000000000..a1954a1e639
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper2.xml
@@ -0,0 +1,38 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>2</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper3.xml b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper3.xml
new file mode 100644
index 00000000000..88d2358138f
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/configs/enable_test_keeper3.xml
@@ -0,0 +1,38 @@
+<yandex>
+    <test_keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>3</server_id>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <priority>3</priority>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>2</priority>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>44444</port>
+                <can_become_leader>true</can_become_leader>
+                <start_as_follower>true</start_as_follower>
+                <priority>1</priority>
+            </server>
+        </raft_configuration>
+    </test_keeper_server>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_simple/configs/log_conf.xml b/tests/integration/test_testkeeper_multinode_simple/configs/log_conf.xml
new file mode 100644
index 00000000000..318a6bca95d
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/configs/log_conf.xml
@@ -0,0 +1,12 @@
+<yandex>
+    <shutdown_wait_unfinished>3</shutdown_wait_unfinished>
+    <logger>
+        <level>trace</level>
+        <log>/var/log/clickhouse-server/log.log</log>
+        <errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
+        <size>1000M</size>
+        <count>10</count>
+        <stderr>/var/log/clickhouse-server/stderr.log</stderr>
+        <stdout>/var/log/clickhouse-server/stdout.log</stdout>
+    </logger>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_simple/configs/use_test_keeper.xml b/tests/integration/test_testkeeper_multinode_simple/configs/use_test_keeper.xml
new file mode 100644
index 00000000000..b6139005d2f
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/configs/use_test_keeper.xml
@@ -0,0 +1,16 @@
+<yandex>
+    <zookeeper>
+        <node index="1">
+            <host>node1</host>
+            <port>9181</port>
+        </node>
+        <node index="2">
+            <host>node2</host>
+            <port>9181</port>
+        </node>
+        <node index="3">
+            <host>node3</host>
+            <port>9181</port>
+        </node>
+    </zookeeper>
+</yandex>
diff --git a/tests/integration/test_testkeeper_multinode_simple/test.py b/tests/integration/test_testkeeper_multinode_simple/test.py
new file mode 100644
index 00000000000..a7ece4bbd56
--- /dev/null
+++ b/tests/integration/test_testkeeper_multinode_simple/test.py
@@ -0,0 +1,239 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+import random
+import string
+import os
+import time
+from multiprocessing.dummy import Pool
+from helpers.network import PartitionManager
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+node2 = cluster.add_instance('node2', main_configs=['configs/enable_test_keeper2.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+node3 = cluster.add_instance('node3', main_configs=['configs/enable_test_keeper3.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
+
+from kazoo.client import KazooClient, KazooState
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+def smaller_exception(ex):
+    return '\n'.join(str(ex).split('\n')[0:2])
+
+def wait_node(node):
+    for _ in range(100):
+        zk = None
+        try:
+            node.query("SELECT * FROM system.zookeeper WHERE path = '/'")
+            zk = get_fake_zk(node.name, timeout=30.0)
+            zk.create("/test", sequence=True)
+            print("node", node.name, "ready")
+            break
+        except Exception as ex:
+            time.sleep(0.2)
+            print("Waiting until", node.name, "will be ready, exception", ex)
+        finally:
+            if zk:
+                zk.stop()
+                zk.close()
+    else:
+        raise Exception("Can't wait node", node.name, "to become ready")
+
+def wait_nodes():
+    for node in [node1, node2, node3]:
+        wait_node(node)
+
+
+def get_fake_zk(nodename, timeout=30.0):
+    _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
+    def reset_listener(state):
+        nonlocal _fake_zk_instance
+        print("Fake zk callback called for state", state)
+        if state != KazooState.CONNECTED:
+            _fake_zk_instance._reset()
+
+    _fake_zk_instance.add_listener(reset_listener)
+    _fake_zk_instance.start()
+    return _fake_zk_instance
+
+def test_read_write_multinode(started_cluster):
+    try:
+        wait_nodes()
+        node1_zk = get_fake_zk("node1")
+        node2_zk = get_fake_zk("node2")
+        node3_zk = get_fake_zk("node3")
+
+        node1_zk.create("/test_read_write_multinode_node1", b"somedata1")
+        node2_zk.create("/test_read_write_multinode_node2", b"somedata2")
+        node3_zk.create("/test_read_write_multinode_node3", b"somedata3")
+
+        # stale reads are allowed
+        while node1_zk.exists("/test_read_write_multinode_node2") is None:
+            time.sleep(0.1)
+
+        while node1_zk.exists("/test_read_write_multinode_node3") is None:
+            time.sleep(0.1)
+
+        while node2_zk.exists("/test_read_write_multinode_node3") is None:
+            time.sleep(0.1)
+
+        assert node3_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1"
+        assert node2_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1"
+        assert node1_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1"
+
+        assert node3_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2"
+        assert node2_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2"
+        assert node1_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2"
+
+        assert node3_zk.get("/test_read_write_multinode_node3")[0] == b"somedata3"
+        assert node2_zk.get("/test_read_write_multinode_node3")[0] == b"somedata3"
+        assert node1_zk.get("/test_read_write_multinode_node3")[0] == b"somedata3"
+
+    finally:
+        try:
+            for zk_conn in [node1_zk, node2_zk, node3_zk]:
+                zk_conn.stop()
+                zk_conn.close()
+        except:
+            pass
+
+
+def test_watch_on_follower(started_cluster):
+    try:
+        wait_nodes()
+        node1_zk = get_fake_zk("node1")
+        node2_zk = get_fake_zk("node2")
+        node3_zk = get_fake_zk("node3")
+
+        node1_zk.create("/test_data_watches")
+        node2_zk.set("/test_data_watches", b"hello")
+        node3_zk.set("/test_data_watches", b"world")
+
+        node1_data = None
+        def node1_callback(event):
+            print("node1 data watch called")
+            nonlocal node1_data
+            node1_data = event
+
+        node1_zk.get("/test_data_watches", watch=node1_callback)
+
+        node2_data = None
+        def node2_callback(event):
+            print("node2 data watch called")
+            nonlocal node2_data
+            node2_data = event
+
+        node2_zk.get("/test_data_watches", watch=node2_callback)
+
+        node3_data = None
+        def node3_callback(event):
+            print("node3 data watch called")
+            nonlocal node3_data
+            node3_data = event
+
+        node3_zk.get("/test_data_watches", watch=node3_callback)
+
+        node1_zk.set("/test_data_watches", b"somevalue")
+        time.sleep(3)
+
+        print(node1_data)
+        print(node2_data)
+        print(node3_data)
+
+        assert node1_data == node2_data
+        assert node3_data == node2_data
+
+    finally:
+        try:
+            for zk_conn in [node1_zk, node2_zk, node3_zk]:
+                zk_conn.stop()
+                zk_conn.close()
+        except:
+            pass
+
+
+def test_session_expiration(started_cluster):
+    try:
+        wait_nodes()
+        node1_zk = get_fake_zk("node1")
+        node2_zk = get_fake_zk("node2")
+        node3_zk = get_fake_zk("node3", timeout=3.0)
+        print("Node3 session id", node3_zk._session_id)
+
+        node3_zk.create("/test_ephemeral_node", b"world", ephemeral=True)
+
+        with PartitionManager() as pm:
+            pm.partition_instances(node3, node2)
+            pm.partition_instances(node3, node1)
+            node3_zk.stop()
+            node3_zk.close()
+            for _ in range(100):
+                if node1_zk.exists("/test_ephemeral_node") is None and node2_zk.exists("/test_ephemeral_node") is None:
+                    break
+                print("Node1 exists", node1_zk.exists("/test_ephemeral_node"))
+                print("Node2 exists", node2_zk.exists("/test_ephemeral_node"))
+                time.sleep(0.1)
+                node1_zk.sync("/")
+                node2_zk.sync("/")
+
+        assert node1_zk.exists("/test_ephemeral_node") is None
+        assert node2_zk.exists("/test_ephemeral_node") is None
+
+    finally:
+        try:
+            for zk_conn in [node1_zk, node2_zk, node3_zk]:
+                try:
+                    zk_conn.stop()
+                    zk_conn.close()
+                except:
+                    pass
+        except:
+            pass
+
+
+def test_follower_restart(started_cluster):
+    try:
+        wait_nodes()
+        node1_zk = get_fake_zk("node1")
+
+        node1_zk.create("/test_restart_node", b"hello")
+
+        node3.restart_clickhouse(kill=True)
+
+        node3_zk = get_fake_zk("node3")
+
+        # got data from log
+        assert node3_zk.get("/test_restart_node")[0] == b"hello"
+
+    finally:
+        try:
+            for zk_conn in [node1_zk, node3_zk]:
+                try:
+                    zk_conn.stop()
+                    zk_conn.close()
+                except:
+                    pass
+        except:
+            pass
+
+
+def test_simple_replicated_table(started_cluster):
+    wait_nodes()
+    for i, node in enumerate([node1, node2, node3]):
+        node.query("CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{}') ORDER BY tuple()".format(i + 1))
+
+    node2.query("INSERT INTO t SELECT number FROM numbers(10)")
+
+    node1.query("SYSTEM SYNC REPLICA t", timeout=10)
+    node3.query("SYSTEM SYNC REPLICA t", timeout=10)
+
+    assert node1.query("SELECT COUNT() FROM t") == "10\n"
+    assert node2.query("SELECT COUNT() FROM t") == "10\n"
+    assert node3.query("SELECT COUNT() FROM t") == "10\n"

From f9527738c9ce98e09e5329434e04ae3de54998a3 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Fri, 12 Feb 2021 12:12:04 +0300
Subject: [PATCH 1010/1238] Added comments

---
 src/Columns/ColumnDecimal.cpp      |  1 +
 src/Columns/ColumnDecimal.h        | 12 ++++++++++++
 src/Common/Allocator.h             |  1 +
 src/Common/PODArray.h              |  2 ++
 src/DataTypes/DataTypeNumberBase.h |  2 ++
 5 files changed, 18 insertions(+)

diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp
index dc565f5590c..ddc971032b6 100644
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@@ -376,4 +376,5 @@ template class ColumnDecimal<Decimal64>;
 template class ColumnDecimal<Decimal128>;
 template class ColumnDecimal<Decimal256>;
 template class ColumnDecimal<DateTime64>;
+
 }
diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h
index 3844a2af141..ef841292a7d 100644
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@@ -50,6 +50,8 @@ private:
     UInt32 scale;
 };
 
+/// Prevent implicit template instantiation of DecimalPaddedPODArray for common decimal types
+
 extern template class DecimalPaddedPODArray<Decimal32>;
 extern template class DecimalPaddedPODArray<Decimal64>;
 extern template class DecimalPaddedPODArray<Decimal128>;
@@ -221,4 +223,14 @@ ColumnPtr ColumnDecimal<T>::indexImpl(const PaddedPODArray<Type> & indexes, size
     return res;
 }
 
+
+/// Prevent implicit template instantiation of ColumnDecimal for common decimal types
+
+extern template class ColumnDecimal<Decimal32>;
+extern template class ColumnDecimal<Decimal64>;
+extern template class ColumnDecimal<Decimal128>;
+extern template class ColumnDecimal<Decimal256>;
+extern template class ColumnDecimal<DateTime64>;
+
+
 }
diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h
index 118ba7b1680..e3c6ddf9ff4 100644
--- a/src/Common/Allocator.h
+++ b/src/Common/Allocator.h
@@ -352,6 +352,7 @@ template<typename Base, size_t initial_bytes, size_t Alignment>
 constexpr size_t allocatorInitialBytes<AllocatorWithStackMemory<
     Base, initial_bytes, Alignment>> = initial_bytes;
 
+/// Prevent implicit template instantiation of Allocator
 
 extern template class Allocator<false, false>;
 extern template class Allocator<true, false>;
diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h
index 19b1d61fe85..8e05dfea8b3 100644
--- a/src/Common/PODArray.h
+++ b/src/Common/PODArray.h
@@ -725,6 +725,8 @@ void swap(PODArray<T, initial_bytes, TAllocator, pad_right_> & lhs, PODArray<T,
 }
 #pragma GCC diagnostic pop
 
+/// Prevent implicit template instantiation of PODArray for common numeric types
+
 extern template class PODArray<UInt8, 4096, Allocator<false>, 15, 16>;
 extern template class PODArray<UInt16, 4096, Allocator<false>, 15, 16>;
 extern template class PODArray<UInt32, 4096, Allocator<false>, 15, 16>;
diff --git a/src/DataTypes/DataTypeNumberBase.h b/src/DataTypes/DataTypeNumberBase.h
index 7727929ce4d..1491eabfbd5 100644
--- a/src/DataTypes/DataTypeNumberBase.h
+++ b/src/DataTypes/DataTypeNumberBase.h
@@ -71,6 +71,8 @@ public:
     bool canBeInsideLowCardinality() const override { return true; }
 };
 
+/// Prevent implicit template instantiation of DataTypeNumberBase for common numeric types
+
 extern template class DataTypeNumberBase<UInt8>;
 extern template class DataTypeNumberBase<UInt16>;
 extern template class DataTypeNumberBase<UInt32>;

From 6aecb62416ece880cbb8ee3a803e14d841388dde Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 12 Feb 2021 12:17:10 +0300
Subject: [PATCH 1011/1238] Replace database with ordinary

---
 .../test.py                                   | 98 ++++++++++---------
 1 file changed, 50 insertions(+), 48 deletions(-)

diff --git a/tests/integration/test_testkeeper_multinode_blocade_leader/test.py b/tests/integration/test_testkeeper_multinode_blocade_leader/test.py
index 899f7212660..3b2867ef3c7 100644
--- a/tests/integration/test_testkeeper_multinode_blocade_leader/test.py
+++ b/tests/integration/test_testkeeper_multinode_blocade_leader/test.py
@@ -69,16 +69,17 @@ def get_fake_zk(nodename, timeout=30.0):
 def test_blocade_leader(started_cluster):
     wait_nodes()
     for i, node in enumerate([node1, node2, node3]):
-        node.query("CREATE TABLE t1 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t1', '{}') ORDER BY tuple()".format(i + 1))
+        node.query("CREATE DATABASE IF NOT EXISTS ordinary ENGINE=Ordinary")
+        node.query("CREATE TABLE ordinary.t1 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t1', '{}') ORDER BY tuple()".format(i + 1))
 
-    node2.query("INSERT INTO t1 SELECT number FROM numbers(10)")
+    node2.query("INSERT INTO ordinary.t1 SELECT number FROM numbers(10)")
 
-    node1.query("SYSTEM SYNC REPLICA t1", timeout=10)
-    node3.query("SYSTEM SYNC REPLICA t1", timeout=10)
+    node1.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
+    node3.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
 
-    assert node1.query("SELECT COUNT() FROM t1") == "10\n"
-    assert node2.query("SELECT COUNT() FROM t1") == "10\n"
-    assert node3.query("SELECT COUNT() FROM t1") == "10\n"
+    assert node1.query("SELECT COUNT() FROM ordinary.t1") == "10\n"
+    assert node2.query("SELECT COUNT() FROM ordinary.t1") == "10\n"
+    assert node3.query("SELECT COUNT() FROM ordinary.t1") == "10\n"
 
     with PartitionManager() as pm:
         pm.partition_instances(node2, node1)
@@ -86,12 +87,12 @@ def test_blocade_leader(started_cluster):
 
         for i in range(100):
             try:
-                node2.query("SYSTEM RESTART REPLICA t1")
-                node2.query("INSERT INTO t1 SELECT rand() FROM numbers(100)")
+                node2.query("SYSTEM RESTART REPLICA ordinary.t1")
+                node2.query("INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100)")
                 break
             except Exception as ex:
                 try:
-                    node2.query("ATTACH TABLE t1")
+                    node2.query("ATTACH TABLE ordinary.t1")
                 except Exception as attach_ex:
                     print("Got exception node2", smaller_exception(attach_ex))
                 print("Got exception node2", smaller_exception(ex))
@@ -103,12 +104,12 @@ def test_blocade_leader(started_cluster):
 
         for i in range(100):
             try:
-                node3.query("SYSTEM RESTART REPLICA t1")
-                node3.query("INSERT INTO t1 SELECT rand() FROM numbers(100)")
+                node3.query("SYSTEM RESTART REPLICA ordinary.t1")
+                node3.query("INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100)")
                 break
             except Exception as ex:
                 try:
-                    node3.query("ATTACH TABLE t1")
+                    node3.query("ATTACH TABLE ordinary.t1")
                 except Exception as attach_ex:
                     print("Got exception node3", smaller_exception(attach_ex))
                 print("Got exception node3", smaller_exception(ex))
@@ -121,11 +122,11 @@ def test_blocade_leader(started_cluster):
     for n, node in enumerate([node1, node2, node3]):
         for i in range(100):
             try:
-                node.query("SYSTEM RESTART REPLICA t1")
+                node.query("SYSTEM RESTART REPLICA ordinary.t1")
                 break
             except Exception as ex:
                 try:
-                    node.query("ATTACH TABLE t1")
+                    node.query("ATTACH TABLE ordinary.t1")
                 except Exception as attach_ex:
                     print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
 
@@ -136,7 +137,7 @@ def test_blocade_leader(started_cluster):
 
     for i in range(100):
         try:
-            node1.query("INSERT INTO t1 SELECT rand() FROM numbers(100)")
+            node1.query("INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100)")
             break
         except Exception as ex:
             print("Got exception node1", smaller_exception(ex))
@@ -149,12 +150,12 @@ def test_blocade_leader(started_cluster):
     for n, node in enumerate([node1, node2, node3]):
         for i in range(100):
             try:
-                node.query("SYSTEM RESTART REPLICA t1")
-                node.query("SYSTEM SYNC REPLICA t1", timeout=10)
+                node.query("SYSTEM RESTART REPLICA ordinary.t1")
+                node.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
                 break
             except Exception as ex:
                 try:
-                    node.query("ATTACH TABLE t1")
+                    node.query("ATTACH TABLE ordinary.t1")
                 except Exception as attach_ex:
                     print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
 
@@ -165,13 +166,13 @@ def test_blocade_leader(started_cluster):
                 dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
             assert False, "Cannot sync replica node{}".format(n+1)
 
-    if node1.query("SELECT COUNT() FROM t1") != "310\n":
+    if node1.query("SELECT COUNT() FROM ordinary.t1") != "310\n":
         for num, node in enumerate([node1, node2, node3]):
             dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
 
-    assert node1.query("SELECT COUNT() FROM t1") == "310\n"
-    assert node2.query("SELECT COUNT() FROM t1") == "310\n"
-    assert node3.query("SELECT COUNT() FROM t1") == "310\n"
+    assert node1.query("SELECT COUNT() FROM ordinary.t1") == "310\n"
+    assert node2.query("SELECT COUNT() FROM ordinary.t1") == "310\n"
+    assert node3.query("SELECT COUNT() FROM ordinary.t1") == "310\n"
 
 
 def dump_zk(node, zk_path, replica_path):
@@ -192,16 +193,17 @@ def dump_zk(node, zk_path, replica_path):
 def test_blocade_leader_twice(started_cluster):
     wait_nodes()
     for i, node in enumerate([node1, node2, node3]):
-        node.query("CREATE TABLE t2 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t2', '{}') ORDER BY tuple()".format(i + 1))
+        node.query("CREATE DATABASE IF NOT EXISTS ordinary ENGINE=Ordinary")
+        node.query("CREATE TABLE ordinary.t2 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t2', '{}') ORDER BY tuple()".format(i + 1))
 
-    node2.query("INSERT INTO t2 SELECT number FROM numbers(10)")
+    node2.query("INSERT INTO ordinary.t2 SELECT number FROM numbers(10)")
 
-    node1.query("SYSTEM SYNC REPLICA t2", timeout=10)
-    node3.query("SYSTEM SYNC REPLICA t2", timeout=10)
+    node1.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
+    node3.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
 
-    assert node1.query("SELECT COUNT() FROM t2") == "10\n"
-    assert node2.query("SELECT COUNT() FROM t2") == "10\n"
-    assert node3.query("SELECT COUNT() FROM t2") == "10\n"
+    assert node1.query("SELECT COUNT() FROM ordinary.t2") == "10\n"
+    assert node2.query("SELECT COUNT() FROM ordinary.t2") == "10\n"
+    assert node3.query("SELECT COUNT() FROM ordinary.t2") == "10\n"
 
     with PartitionManager() as pm:
         pm.partition_instances(node2, node1)
@@ -209,12 +211,12 @@ def test_blocade_leader_twice(started_cluster):
 
         for i in range(100):
             try:
-                node2.query("SYSTEM RESTART REPLICA t2")
-                node2.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
+                node2.query("SYSTEM RESTART REPLICA ordinary.t2")
+                node2.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
                 break
             except Exception as ex:
                 try:
-                    node2.query("ATTACH TABLE t2")
+                    node2.query("ATTACH TABLE ordinary.t2")
                 except Exception as attach_ex:
                     print("Got exception node2", smaller_exception(attach_ex))
                 print("Got exception node2", smaller_exception(ex))
@@ -226,12 +228,12 @@ def test_blocade_leader_twice(started_cluster):
 
         for i in range(100):
             try:
-                node3.query("SYSTEM RESTART REPLICA t2")
-                node3.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
+                node3.query("SYSTEM RESTART REPLICA ordinary.t2")
+                node3.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
                 break
             except Exception as ex:
                 try:
-                    node3.query("ATTACH TABLE t2")
+                    node3.query("ATTACH TABLE ordinary.t2")
                 except Exception as attach_ex:
                     print("Got exception node3", smaller_exception(attach_ex))
                 print("Got exception node3", smaller_exception(ex))
@@ -247,14 +249,14 @@ def test_blocade_leader_twice(started_cluster):
 
         for i in range(10):
             try:
-                node3.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
+                node3.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
                 assert False, "Node3 became leader?"
             except Exception as ex:
                 time.sleep(0.5)
 
         for i in range(10):
             try:
-                node2.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
+                node2.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
                 assert False, "Node2 became leader?"
             except Exception as ex:
                 time.sleep(0.5)
@@ -263,11 +265,11 @@ def test_blocade_leader_twice(started_cluster):
     for n, node in enumerate([node1, node2, node3]):
         for i in range(100):
             try:
-                node.query("SYSTEM RESTART REPLICA t2")
+                node.query("SYSTEM RESTART REPLICA ordinary.t2")
                 break
             except Exception as ex:
                 try:
-                    node.query("ATTACH TABLE t2")
+                    node.query("ATTACH TABLE ordinary.t2")
                 except Exception as attach_ex:
                     print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
 
@@ -281,7 +283,7 @@ def test_blocade_leader_twice(started_cluster):
     for n, node in enumerate([node1, node2, node3]):
         for i in range(100):
             try:
-                node.query("INSERT INTO t2 SELECT rand() FROM numbers(100)")
+                node.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
                 break
             except Exception as ex:
                 print("Got exception node{}".format(n + 1), smaller_exception(ex))
@@ -294,12 +296,12 @@ def test_blocade_leader_twice(started_cluster):
     for n, node in enumerate([node1, node2, node3]):
         for i in range(100):
             try:
-                node.query("SYSTEM RESTART REPLICA t2")
-                node.query("SYSTEM SYNC REPLICA t2", timeout=10)
+                node.query("SYSTEM RESTART REPLICA ordinary.t2")
+                node.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
                 break
             except Exception as ex:
                 try:
-                    node.query("ATTACH TABLE t2")
+                    node.query("ATTACH TABLE ordinary.t2")
                 except Exception as attach_ex:
                     print("Got exception node{}".format(n + 1), smaller_exception(attach_ex))
 
@@ -310,10 +312,10 @@ def test_blocade_leader_twice(started_cluster):
                 dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
             assert False, "Cannot reconnect for node{}".format(n + 1)
 
-    assert node1.query("SELECT COUNT() FROM t2") == "510\n"
-    if node2.query("SELECT COUNT() FROM t2") != "510\n":
+    assert node1.query("SELECT COUNT() FROM ordinary.t2") == "510\n"
+    if node2.query("SELECT COUNT() FROM ordinary.t2") != "510\n":
         for num, node in enumerate([node1, node2, node3]):
             dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
 
-    assert node2.query("SELECT COUNT() FROM t2") == "510\n"
-    assert node3.query("SELECT COUNT() FROM t2") == "510\n"
+    assert node2.query("SELECT COUNT() FROM ordinary.t2") == "510\n"
+    assert node3.query("SELECT COUNT() FROM ordinary.t2") == "510\n"

From 824475b224a3a317355750093f2593ac3f63f488 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 12 Feb 2021 13:37:27 +0300
Subject: [PATCH 1012/1238] cleanup

---
 src/Processors/Transforms/WindowTransform.cpp | 99 ++++---------------
 .../01591_window_functions.reference          | 17 +++-
 .../0_stateless/01591_window_functions.sql    | 12 ++-
 3 files changed, 37 insertions(+), 91 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 5bd11db2cc2..0013e0061e2 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -24,7 +24,7 @@ namespace ErrorCodes
 class IWindowFunction
 {
 public:
-    virtual ~IWindowFunction() {}
+    virtual ~IWindowFunction() = default;
 
     // Must insert the result for current_row.
     virtual void windowInsertResultInto(IColumn & to, const WindowTransform * transform) = 0;
@@ -1316,74 +1316,25 @@ struct WindowFunctionRowNumber final : public WindowFunction
     }
 };
 
-struct WindowFunctionLagLead final : public WindowFunction
-{
-    bool is_lag = false;
-    // Always positive.
-    uint64_t offset_rows = 1;
-    Field default_value;
-
-    WindowFunctionLagLead(const std::string & name_,
-        const DataTypes & argument_types_, const Array & parameters_,
-            bool is_lag_)
-        : WindowFunction(name_, argument_types_, parameters_)
-        , is_lag(is_lag_)
-    {
-        // offset and default are in parameters
-        if (argument_types.size() != 1)
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                "The window function {} must have exactly one argument -- the value column. The offset and the default value must be specified as parameters, i.e. `{}(offset, default)(column)`",
-                getName(), getName());
-        }
-
-        if (parameters.size() > 2)
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                "The window function {} accepts at most two parameters, {} given",
-                getName(), parameters.size());
-        }
-
-        if (parameters.size() >= 1)
-        {
-            if (!isInt64FieldType(parameters[0].getType())
-                || parameters[0].get<Int64>() < 0)
-            {
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                    "The first parameter of the window function {} must be a nonnegative integer specifying the number of offset rows. Got '{}' instead",
-                    getName(), toString(parameters[0]));
-            }
-
-            offset_rows = parameters[0].get<UInt64>();
-        }
-
-        if (parameters.size() >= 2)
-        {
-            default_value = convertFieldToTypeOrThrow(parameters[1],
-                *argument_types[0]);
-        }
-    }
-
-    DataTypePtr getReturnType() const override { return argument_types[0]; }
-
-    void windowInsertResultInto(IColumn &, const WindowTransform *) override
-    {
-        // These functions are a mess... they ignore the frame, so we need to
-        // either materialize the whole partition (not practical if it's big),
-        // or track a separate frame for these functions, which would  make the
-        // window transform completely impenetrable to human mind. Our best bet
-        // is probably rewriting, say, `lag(value, offset)` to
-        // `any(value) over (rows between offset preceding and offset preceding)`,
-        // at the query planning stage. We can keep this class as a stub for
-        // parsing, anyway.
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-            "The window function {} is not implemented",
-            getName());
-    }
-};
-
 void registerWindowFunctions(AggregateFunctionFactory & factory)
 {
+    // Why didn't I implement lag/lead yet? Because they are a mess. I imagine
+    // they are from the older generation of window functions, when the concept
+    // of frame was not yet invented, so they ignore the frame and use the
+    // partition instead. This means we have to track a separate frame for
+    // these functions, which would  make the window transform completely
+    // impenetrable to human mind. We can't just get away with materializing
+    // the whole partition like Postgres does, because using a linear amount
+    // of additional memory is not an option when we have a lot of data. We must
+    // be able to process at least the lag/lead in streaming fashion.
+    // Our best bet is probably rewriting, say `lag(value, offset)` to
+    // `any(value) over (rows between offset preceding and offset preceding)`,
+    // at the query planning stage.
+    // Functions like cume_dist() do require materializing the entire
+    // partition, but it's probably also simpler to implement them by rewriting
+    // to a (rows between unbounded preceding and unbounded following) frame,
+    // instead of adding separate logic for them.
+
     factory.registerFunction("rank", [](const std::string & name,
             const DataTypes & argument_types, const Array & parameters)
         {
@@ -1404,20 +1355,6 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
             return std::make_shared<WindowFunctionRowNumber>(name, argument_types,
                 parameters);
         });
-
-    factory.registerFunction("lag", [](const std::string & name,
-            const DataTypes & argument_types, const Array & parameters)
-        {
-            return std::make_shared<WindowFunctionLagLead>(name, argument_types,
-                parameters, true /* is_lag */);
-        });
-
-    factory.registerFunction("lead", [](const std::string & name,
-            const DataTypes & argument_types, const Array & parameters)
-        {
-            return std::make_shared<WindowFunctionLagLead>(name, argument_types,
-                parameters, false /* is_lag */);
-        });
 }
 
 }
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
index b0ddff0a824..d2543f0db75 100644
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -962,12 +962,19 @@ settings max_block_size = 2;
 26	5	2	5	4	3	4
 29	5	2	5	4	3	5
 30	6	0	1	1	1	1
--- very bad functions, not implemented yet
+-- our replacement for lag/lead
 select
-    lag(1, 5)(number) over (),
-    lead(2)(number) over (),
-    lag(number) over ()
-from numbers(2); -- { serverError 48 }
+    anyOrNull(number)
+        over (order by number rows between 1 preceding and 1 preceding),
+    anyOrNull(number)
+        over (order by number rows between 1 following and 1 following)
+from numbers(5);
+\N	1
+0	2
+1	3
+2	4
+3	\N
+-- case-insensitive SQL-standard synonyms for any and anyLast
 select
     number,
     fIrSt_VaLue(number) over w,
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 009807721d2..03bd8371e23 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -328,13 +328,15 @@ window w as (partition by p order by o)
 order by p, o, number
 settings max_block_size = 2;
 
--- very bad functions, not implemented yet
+-- our replacement for lag/lead
 select
-    lag(1, 5)(number) over (),
-    lead(2)(number) over (),
-    lag(number) over ()
-from numbers(2); -- { serverError 48 }
+    anyOrNull(number)
+        over (order by number rows between 1 preceding and 1 preceding),
+    anyOrNull(number)
+        over (order by number rows between 1 following and 1 following)
+from numbers(5);
 
+-- case-insensitive SQL-standard synonyms for any and anyLast
 select
     number,
     fIrSt_VaLue(number) over w,

From 999062e926401066cb663a6fc5ffefb7942c8702 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Fri, 12 Feb 2021 13:45:18 +0300
Subject: [PATCH 1013/1238] fix test

---
 .../01701_parallel_parsing_infinite_segmentation.sh       | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
index b82e179495e..d3e634eb560 100755
--- a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
+++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh
@@ -4,10 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh                                                                                                                                                                                                                        
 . "$CURDIR"/../shell_config.sh   
 
-python3 -c "for i in range(10):print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000))" > big_json.json
-python3 -c "for i in range(100):print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000))" >> big_json.json
+${CLICKHOUSE_CLIENT} -q "create table insert_big_json(a String, b String) engine=MergeTree() order by tuple()";
 
-
-${CLICKHOUSE_LOCAL} --input_format_parallel_parsing=1 --max_memory_usage=0 -q "select count() from file('big_json.json', 'JSONEachRow', 'a String, b String')" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||:
-
-rm big_json.json
\ No newline at end of file
+python3 -c "[print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000)) for i in range(10)]; [print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000)) for i in range(10)]" 2>/dev/null  | ${CLICKHOUSE_CLIENT} --input_format_parallel_parsing=1 --max_memory_usage=0 -q "insert into insert_big_json FORMAT JSONEachRow" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||:
\ No newline at end of file

From 3174c575623dfd633efb65f059d834e1a1c29370 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Fri, 12 Feb 2021 15:29:19 +0300
Subject: [PATCH 1014/1238] Update src/Formats/JSONEachRowUtils.cpp

Co-authored-by: tavplubix <tavplubix@gmail.com>
---
 src/Formats/JSONEachRowUtils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp
index 407e3f37c5c..56bef9e09ea 100644
--- a/src/Formats/JSONEachRowUtils.cpp
+++ b/src/Formats/JSONEachRowUtils.cpp
@@ -23,7 +23,7 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
         if (current_object_size > 10 * min_chunk_size)
             throw ParsingException("Size of JSON object is extremely large. Expected not greater than " +
             std::to_string(min_chunk_size) + " bytes, but current is " + std::to_string(current_object_size) +
-            " bytes. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually", ErrorCodes::INCORRECT_DATA);
+            " bytes per row. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely JSON is malformed", ErrorCodes::INCORRECT_DATA);
 
         if (quotes)
         {

From 7d02d58390f7e3e85461a3e14da4c81a601a1ddc Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Fri, 12 Feb 2021 16:14:34 +0300
Subject: [PATCH 1015/1238] bump CI

---
 tests/queries/0_stateless/01602_runningConcurrency.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/01602_runningConcurrency.sql b/tests/queries/0_stateless/01602_runningConcurrency.sql
index 40fdc54ba7a..55b3aae867a 100644
--- a/tests/queries/0_stateless/01602_runningConcurrency.sql
+++ b/tests/queries/0_stateless/01602_runningConcurrency.sql
@@ -47,3 +47,5 @@ SELECT runningConcurrency(toDate('2000-01-01'), toDateTime('2000-01-01 00:00:00'
 
 -- begin > end
 SELECT runningConcurrency(toDate('2000-01-02'), toDate('2000-01-01')); -- { serverError 117 }
+
+                                                       

From c925e34e73819d803b4ef6c5f879b9bda9d14349 Mon Sep 17 00:00:00 2001
From: Vladimir <vdimir@yandex-team.ru>
Date: Fri, 12 Feb 2021 16:52:33 +0300
Subject: [PATCH 1016/1238] Bit more complicated example for isIPv4String

---
 .../functions/ip-address-functions.md         | 29 +++++++++++--------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md
index 3d03b57bb50..0c1f675304b 100644
--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@@ -267,7 +267,7 @@ SELECT toIPv6('127.0.0.1')
 
 ## isIPv4String {#isipv4string}
 
-Determines whether the input string is an IPv4 address or not. Also will return `0` if `string` is IPv6 address.
+Determines whether the input string is an IPv4 address or not. If `string` is IPv6 address returns `0`.
 
 **Syntax**
 
@@ -281,7 +281,7 @@ isIPv4String(string)
 
 **Returned value**
 
--   `1` if `string` is IPv4 address, `0` if not.
+-   `1` if `string` is IPv4 address, `0` otherwise.
 
 Type: [UInt8](../../sql-reference/data-types/int-uint.md).
 
@@ -290,20 +290,22 @@ Type: [UInt8](../../sql-reference/data-types/int-uint.md).
 Query:
 
 ```sql
-SELECT isIPv4String('0.0.0.0');
+SELECT addr, isIPv4String(addr) FROM ( SELECT ['0.0.0.0', '127.0.0.1', '::ffff:127.0.0.1'] AS addr ) ARRAY JOIN addr
 ```
 
 Result:
 
 ``` text
-┌─isIPv4String('0.0.0.0')─┐
-│                       1 │
-└─────────────────────────┘
+┌─addr─────────────┬─isIPv4String(addr)─┐
+│ 0.0.0.0          │                  1 │
+│ 127.0.0.1        │                  1 │
+│ ::ffff:127.0.0.1 │                  0 │
+└──────────────────┴────────────────────┘
 ```
 
 ## isIPv6String {#isipv6string}
 
-Determines whether the input string is an IPv6 address or not. Also will return `0` if `string` is IPv4 address.
+Determines whether the input string is an IPv6 address or not. If `string` is IPv4 address returns `0`.
 
 **Syntax**
 
@@ -317,7 +319,7 @@ isIPv6String(string)
 
 **Returned value**
 
--   `1` if `string` is IPv6 address, `0` if not.
+-   `1` if `string` is IPv6 address, `0` otherwise.
 
 Type: [UInt8](../../sql-reference/data-types/int-uint.md).
 
@@ -326,15 +328,18 @@ Type: [UInt8](../../sql-reference/data-types/int-uint.md).
 Query:
 
 ``` sql
-SELECT isIPv6String('::ffff:127.0.0.1');
+SELECT addr, isIPv6String(addr) FROM ( SELECT ['::', '1111::ffff', '::ffff:127.0.0.1', '127.0.0.1'] AS addr ) ARRAY JOIN addr
 ```
 
 Result:
 
 ``` text
-┌─isIPv6String('::ffff:127.0.0.1')─┐
-│                                1 │
-└──────────────────────────────────┘
+┌─addr─────────────┬─isIPv6String(addr)─┐
+│ ::               │                  1 │
+│ 1111::ffff       │                  1 │
+│ ::ffff:127.0.0.1 │                  1 │
+│ 127.0.0.1        │                  0 │
+└──────────────────┴────────────────────┘
 ```
 
 [Original article](https://clickhouse.tech/docs/en/query_language/functions/ip_address_functions/) <!--hide-->

From 0bd16745de4d6b19c4cce6eaf6fc73a295d1d5fb Mon Sep 17 00:00:00 2001
From: Vladimir <vdimir@yandex-team.ru>
Date: Fri, 12 Feb 2021 16:53:44 +0300
Subject: [PATCH 1017/1238] Bit more complicated example for isIPv4String - ru

---
 .../functions/ip-address-functions.md         | 25 +++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md
index 6b477e642f1..52f0a92bc9f 100644
--- a/docs/ru/sql-reference/functions/ip-address-functions.md
+++ b/docs/ru/sql-reference/functions/ip-address-functions.md
@@ -259,7 +259,7 @@ isIPv4String(string)
 
 **Возвращаемое значение**
 
--   `1` если `string` является адресом IPv4 , `0` если нет.
+-   `1` если `string` является адресом IPv4 , иначе — `0`.
 
 Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
 
@@ -268,15 +268,17 @@ isIPv4String(string)
 Запрос:
 
 ```sql
-SELECT isIPv4String('0.0.0.0');
+SELECT addr, isIPv4String(addr) FROM ( SELECT ['0.0.0.0', '127.0.0.1', '::ffff:127.0.0.1'] AS addr ) ARRAY JOIN addr
 ```
 
 Результат:
 
 ``` text
-┌─isIPv4String('0.0.0.0')─┐
-│                       1 │
-└─────────────────────────┘
+┌─addr─────────────┬─isIPv4String(addr)─┐
+│ 0.0.0.0          │                  1 │
+│ 127.0.0.1        │                  1 │
+│ ::ffff:127.0.0.1 │                  0 │
+└──────────────────┴────────────────────┘
 ```
 
 ## isIPv6String {#isipv6string}
@@ -295,7 +297,7 @@ isIPv6String(string)
 
 **Возвращаемое значение**
 
--   `1` если `string` является адресом IPv6 , `0` если нет.
+-   `1` если `string` является адресом IPv6 , иначе — `0`.
 
 Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
 
@@ -304,15 +306,18 @@ isIPv6String(string)
 Запрос:
 
 ``` sql
-SELECT isIPv6String('::ffff:127.0.0.1');
+SELECT addr, isIPv6String(addr) FROM ( SELECT ['::', '1111::ffff', '::ffff:127.0.0.1', '127.0.0.1'] AS addr ) ARRAY JOIN addr
 ```
 
 Результат:
 
 ``` text
-┌─isIPv6String('::ffff:127.0.0.1')─┐
-│                                1 │
-└──────────────────────────────────┘
+┌─addr─────────────┬─isIPv6String(addr)─┐
+│ ::               │                  1 │
+│ 1111::ffff       │                  1 │
+│ ::ffff:127.0.0.1 │                  1 │
+│ 127.0.0.1        │                  0 │
+└──────────────────┴────────────────────┘
 ```
 
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/ip_address_functions/) <!--hide-->

From 06b21c207fb98075097a94c3424a4e0950349f2d Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Fri, 12 Feb 2021 17:22:03 +0300
Subject: [PATCH 1018/1238] Update index.md

---
 docs/en/sql-reference/window-functions/index.md | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
index 5a6f13226a5..4fb279f1ad1 100644
--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -10,13 +10,18 @@ This is an experimental feature that is currently in development and is not read
 for general use. It will change in unpredictable backwards-incompatible ways in
 the future releases. Set `allow_experimental_window_functions = 1` to enable it.
 
-ClickHouse currently supports calculation of aggregate functions over a window.
-Pure window functions such as `rank`, `lag`, `lead` and so on are not yet supported.
+ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported:
 
-The window can be specified either with an `OVER` clause or with a separate
-`WINDOW` clause.
-
-Only two variants of frame are supported, `ROWS` and `RANGE`. Offsets for the `RANGE` frame are not yet supported.
+| Feature | Support or workaround |
+| --------| ----------|
+| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | yes |
+| `WINDOW` clause (`select ... from table window w as (partiton by id)`) | yes |
+| `ROWS` frame | yes |
+| `RANGE` frame | yes, it is the default |
+| `GROUPS` frame | no |
+| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
+| `rank()`, `dense_rank()`, `row_number()` | yes |
+| `lag/lead(value, offset)` | no, replace with `any(value) over (.... rows between <offset> preceding and <offset> following)`| 
 
 ## References
 

From a0c1bfd9bde05edf4dc05afb24d205c896ad95b6 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Fri, 12 Feb 2021 17:36:21 +0300
Subject: [PATCH 1019/1238] Update index.md

---
 docs/en/sql-reference/window-functions/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
index 4fb279f1ad1..72421daca1c 100644
--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -21,7 +21,7 @@ ClickHouse supports the standard grammar for defining windows and window functio
 | `GROUPS` frame | no |
 | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
 | `rank()`, `dense_rank()`, `row_number()` | yes |
-| `lag/lead(value, offset)` | no, replace with `any(value) over (.... rows between <offset> preceding and <offset> following)`| 
+| `lag/lead(value, offset)` | no, replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`| 
 
 ## References
 

From ef2b40cf8931993b81cdc8704bf09116736969b8 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Fri, 12 Feb 2021 17:37:22 +0300
Subject: [PATCH 1020/1238] Update index.md

---
 docs/en/sql-reference/window-functions/index.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
index 72421daca1c..46f7ed3824e 100644
--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -26,22 +26,33 @@ ClickHouse supports the standard grammar for defining windows and window functio
 ## References
 
 ### GitHub Issues
+
 The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097).
 
 All GitHub issues related to window funtions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag.
 
 ### Tests
+
 These tests contain the examples of the currently supported grammar:
+
 https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml
+
 https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql
 
 ### Postgres Docs
+
 https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW
+
 https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS
+
 https://www.postgresql.org/docs/devel/functions-window.html
+
 https://www.postgresql.org/docs/devel/tutorial-window.html
 
 ### MySQL Docs
+
 https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
+
 https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html
+
 https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html

From a25ce1c166eaf05723ff029afc4db48ab6d36719 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 12 Feb 2021 19:13:57 +0300
Subject: [PATCH 1021/1238] Revert "Fix access control manager destruction
 order"

---
 src/Interpreters/Context.cpp | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index ca4a313da62..5c99d39dc2e 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -331,7 +331,7 @@ struct ContextShared
     mutable std::optional<ExternalModelsLoader> external_models_loader;
     String default_profile_name;                            /// Default profile name used for default values.
     String system_profile_name;                             /// Profile used by system processes
-    std::unique_ptr<AccessControlManager> access_control_manager;
+    AccessControlManager access_control_manager;
     mutable UncompressedCachePtr uncompressed_cache;        /// The cache of decompressed blocks.
     mutable MarkCachePtr mark_cache;                        /// Cache of marks in compressed files.
     ProcessList process_list;                               /// Executing queries at the moment.
@@ -388,8 +388,7 @@ struct ContextShared
     Context::ConfigReloadCallback config_reload_callback;
 
     ContextShared()
-        : access_control_manager(std::make_unique<AccessControlManager>())
-        , macros(std::make_unique<Macros>())
+        : macros(std::make_unique<Macros>())
     {
         /// TODO: make it singleton (?)
         static std::atomic<size_t> num_calls{0};
@@ -435,7 +434,6 @@ struct ContextShared
         /// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference).
         /// TODO: Get rid of this.
 
-        access_control_manager.reset();
         system_logs.reset();
         embedded_dictionaries.reset();
         external_dictionaries_loader.reset();
@@ -642,7 +640,7 @@ void Context::setConfig(const ConfigurationPtr & config)
 {
     auto lock = getLock();
     shared->config = config;
-    shared->access_control_manager->setExternalAuthenticatorsConfig(*shared->config);
+    shared->access_control_manager.setExternalAuthenticatorsConfig(*shared->config);
 }
 
 const Poco::Util::AbstractConfiguration & Context::getConfigRef() const
@@ -654,25 +652,25 @@ const Poco::Util::AbstractConfiguration & Context::getConfigRef() const
 
 AccessControlManager & Context::getAccessControlManager()
 {
-    return *shared->access_control_manager;
+    return shared->access_control_manager;
 }
 
 const AccessControlManager & Context::getAccessControlManager() const
 {
-    return *shared->access_control_manager;
+    return shared->access_control_manager;
 }
 
 void Context::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config)
 {
     auto lock = getLock();
-    shared->access_control_manager->setExternalAuthenticatorsConfig(config);
+    shared->access_control_manager.setExternalAuthenticatorsConfig(config);
 }
 
 void Context::setUsersConfig(const ConfigurationPtr & config)
 {
     auto lock = getLock();
     shared->users_config = config;
-    shared->access_control_manager->setUsersConfig(*shared->users_config);
+    shared->access_control_manager.setUsersConfig(*shared->users_config);
 }
 
 ConfigurationPtr Context::getUsersConfig()

From f64f9b672b472eaf0a0f76447a21cf30c361f816 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Fri, 12 Feb 2021 19:22:01 +0300
Subject: [PATCH 1022/1238] fix

---
 src/Databases/DatabaseOrdinary.cpp            |  5 ---
 src/Databases/DatabaseReplicated.cpp          |  3 +-
 src/Databases/DatabaseReplicatedWorker.cpp    | 10 ++++-
 src/Databases/DatabaseReplicatedWorker.h      |  2 +
 src/Interpreters/DDLWorker.cpp                | 37 +++++++++++++++++--
 src/Interpreters/DDLWorker.h                  |  2 +-
 .../test_distributed_ddl/cluster.py           |  4 +-
 .../integration/test_distributed_ddl/test.py  |  2 +-
 .../test_replicated_alter.py                  |  2 +-
 9 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp
index d859578eb46..a94668dacf7 100644
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@@ -33,11 +33,6 @@ static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256;
 static constexpr size_t PRINT_MESSAGE_EACH_N_SECONDS = 5;
 static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768;
 
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
 namespace
 {
     void tryAttachTable(
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 1756d33958d..d365ea24bbf 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -33,6 +33,7 @@ namespace ErrorCodes
     extern const int REPLICA_IS_ALREADY_EXIST;
     extern const int DATABASE_REPLICATION_FAILED;
     extern const int UNKNOWN_DATABASE;
+    extern const int UNKNOWN_TABLE;
     extern const int NOT_IMPLEMENTED;
     extern const int INCORRECT_QUERY;
     extern const int ALL_CONNECTION_TRIES_FAILED;
@@ -332,7 +333,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
                 ASTPtr zk_create = parseQuery(parser, in_zk->second, size, depth);
                 if (local_create->as<ASTCreateQuery>()->uuid == zk_create->as<ASTCreateQuery>()->uuid)
                 {
-                    /// For ReplicatedMergeTree tables we can compare only UUIDs to ensure that it's tha same table.
+                    /// For ReplicatedMergeTree tables we can compare only UUIDs to ensure that it's the same table.
                     /// Metadata can be different, it's handled on table replication level.
                     /// TODO maybe we should also compare MergeTree SETTINGS?
                     should_detach = false;
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index 5a350783dcb..521ba5b7cb2 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -41,6 +41,12 @@ void DatabaseReplicatedDDLWorker::initializeMainThread()
     }
 }
 
+void DatabaseReplicatedDDLWorker::shutdown()
+{
+    DDLWorker::shutdown();
+    wait_current_task_change.notify_all();
+}
+
 void DatabaseReplicatedDDLWorker::initializeReplication()
 {
     /// Check if we need to recover replica.
@@ -120,8 +126,8 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
                             "most likely because replica is busy with previous queue entries");
     }
 
-    if (zookeeper->expired())
-        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "ZooKeeper session expired, try again");
+    if (zookeeper->expired() || stop_flag)
+        throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "ZooKeeper session expired or replication stopped, try again");
 
     processTask(*task);
 
diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h
index 33806df88ba..1eafe2489e7 100644
--- a/src/Databases/DatabaseReplicatedWorker.h
+++ b/src/Databases/DatabaseReplicatedWorker.h
@@ -15,6 +15,8 @@ public:
 
     String tryEnqueueAndExecuteEntry(DDLLogEntry & entry, const Context & query_context);
 
+    void shutdown() override;
+
 private:
     void initializeMainThread() override;
     void initializeReplication();
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 242ee7ea0e1..1f4c7932329 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -471,16 +471,42 @@ void DDLWorker::processTask(DDLTaskBase & task)
     String active_node_path = task.getActiveNodePath();
     String finished_node_path = task.getFinishedNodePath();
 
+    /// It will tryRemove(...) on exception
+    auto active_node = zkutil::EphemeralNodeHolder::existing(active_node_path, *zookeeper);
+
+    /// Try fast path
     auto create_active_res = zookeeper->tryCreate(active_node_path, {}, zkutil::CreateMode::Ephemeral);
     if (create_active_res != Coordination::Error::ZOK)
     {
-        if (create_active_res != Coordination::Error::ZNONODE)
+        if (create_active_res != Coordination::Error::ZNONODE && create_active_res != Coordination::Error::ZNODEEXISTS)
+        {
+            assert(Coordination::isHardwareError(create_active_res));
             throw Coordination::Exception(create_active_res, active_node_path);
-        createStatusDirs(task.entry_path, zookeeper);
+        }
+
+        /// Status dirs were not created in enqueueQuery(...) or someone is removing entry
+        if (create_active_res == Coordination::Error::ZNONODE)
+            createStatusDirs(task.entry_path, zookeeper);
+
+        if (create_active_res == Coordination::Error::ZNODEEXISTS)
+        {
+            /// Connection has been lost and now we are retrying to write query status,
+            /// but our previous ephemeral node still exists.
+            assert(task.was_executed);
+            zkutil::EventPtr eph_node_disappeared = std::make_shared<Poco::Event>();
+            String dummy;
+            if (zookeeper->tryGet(active_node_path, dummy, nullptr, eph_node_disappeared))
+            {
+                constexpr int timeout_ms = 5000;
+                if (!eph_node_disappeared->tryWait(timeout_ms))
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Ephemeral node {} still exists, "
+                                    "probably it's owned by someone else", active_node_path);
+            }
+        }
+
         zookeeper->create(active_node_path, {}, zkutil::CreateMode::Ephemeral);
 
     }
-    auto active_node = zkutil::EphemeralNodeHolder::existing(active_node_path, *zookeeper);
 
     if (!task.was_executed)
     {
@@ -560,10 +586,12 @@ void DDLWorker::processTask(DDLTaskBase & task)
     if (!status_written)
     {
         zookeeper->multi(task.ops);
-        active_node->reset();
         task.ops.clear();
     }
 
+    /// Active node was removed in multi ops
+    active_node->reset();
+
     task.completely_processed = true;
 }
 
@@ -947,6 +975,7 @@ void DDLWorker::runMainThread()
         current_tasks.clear();
         last_skipped_entry_name.reset();
         max_id = 0;
+        LOG_INFO(log, "Cleaned DDLWorker state");
     };
 
     setThreadName("DDLWorker");
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 1ae4f815b44..03c80e3f669 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -57,7 +57,7 @@ public:
     }
 
     void startup();
-    void shutdown();
+    virtual void shutdown();
 
     bool isCurrentlyActive() const { return initialized && !stop_flag; }
 
diff --git a/tests/integration/test_distributed_ddl/cluster.py b/tests/integration/test_distributed_ddl/cluster.py
index 45a159ed2b9..24f11fec547 100644
--- a/tests/integration/test_distributed_ddl/cluster.py
+++ b/tests/integration/test_distributed_ddl/cluster.py
@@ -10,8 +10,8 @@ from helpers.test_tools import TSV
 
 
 class ClickHouseClusterWithDDLHelpers(ClickHouseCluster):
-    def __init__(self, base_path, config_dir):
-        ClickHouseCluster.__init__(self, base_path)
+    def __init__(self, base_path, config_dir, testcase_name):
+        ClickHouseCluster.__init__(self, base_path, name=testcase_name)
 
         self.test_config_dir = config_dir
 
diff --git a/tests/integration/test_distributed_ddl/test.py b/tests/integration/test_distributed_ddl/test.py
index f0e78dfec41..58e1d0d06f7 100755
--- a/tests/integration/test_distributed_ddl/test.py
+++ b/tests/integration/test_distributed_ddl/test.py
@@ -14,7 +14,7 @@ from .cluster import ClickHouseClusterWithDDLHelpers
 
 @pytest.fixture(scope="module", params=["configs", "configs_secure"])
 def test_cluster(request):
-    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param)
+    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param, request.param)
 
     try:
         cluster.prepare()
diff --git a/tests/integration/test_distributed_ddl/test_replicated_alter.py b/tests/integration/test_distributed_ddl/test_replicated_alter.py
index bd95f5660b7..148ad5fca5e 100644
--- a/tests/integration/test_distributed_ddl/test_replicated_alter.py
+++ b/tests/integration/test_distributed_ddl/test_replicated_alter.py
@@ -12,7 +12,7 @@ from .cluster import ClickHouseClusterWithDDLHelpers
 
 @pytest.fixture(scope="module", params=["configs", "configs_secure"])
 def test_cluster(request):
-    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param)
+    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param, "alters_" + request.param)
 
     try:
         # TODO: Fix ON CLUSTER alters when nodes have different configs. Need to canonicalize node identity.

From 9cbda346bb4388014896ef2af92414bc8cccd782 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 12 Feb 2021 19:49:27 +0300
Subject: [PATCH 1023/1238] Add test for already working code

---
 .../01716_drop_rename_sign_column.reference        |  0
 .../0_stateless/01716_drop_rename_sign_column.sql  | 14 ++++++++++++++
 2 files changed, 14 insertions(+)
 create mode 100644 tests/queries/0_stateless/01716_drop_rename_sign_column.reference
 create mode 100644 tests/queries/0_stateless/01716_drop_rename_sign_column.sql

diff --git a/tests/queries/0_stateless/01716_drop_rename_sign_column.reference b/tests/queries/0_stateless/01716_drop_rename_sign_column.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01716_drop_rename_sign_column.sql b/tests/queries/0_stateless/01716_drop_rename_sign_column.sql
new file mode 100644
index 00000000000..c9119ee2b46
--- /dev/null
+++ b/tests/queries/0_stateless/01716_drop_rename_sign_column.sql
@@ -0,0 +1,14 @@
+DROP TABLE IF EXISTS signed_table;
+
+CREATE TABLE signed_table (
+    k UInt32,
+    v String,
+    s Int8
+) ENGINE CollapsingMergeTree(s) ORDER BY k;
+
+INSERT INTO signed_table(k, v, s) VALUES (1, 'a', 1);
+
+ALTER TABLE signed_table DROP COLUMN s; --{serverError 524}
+ALTER TABLE signed_table RENAME COLUMN s TO s1; --{serverError 524}
+
+DROP TABLE IF EXISTS signed_table;

From 2a52aa8ca30146c8eede353d5a4886781d82d53d Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Fri, 12 Feb 2021 20:25:40 +0300
Subject: [PATCH 1024/1238] fix test

---
 CMakeLists.txt                                               | 1 -
 src/Functions/ya.make                                        | 1 +
 tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 1 +
 tests/queries/skip_list.json                                 | 1 +
 4 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9002f1df140..853b2df7aca 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -490,7 +490,6 @@ include (cmake/find/rapidjson.cmake)
 include (cmake/find/fastops.cmake)
 include (cmake/find/odbc.cmake)
 include (cmake/find/rocksdb.cmake)
-include (cmake/find/libpqxx.cmake)
 include (cmake/find/nuraft.cmake)
 
 
diff --git a/src/Functions/ya.make b/src/Functions/ya.make
index 7f9c7add0b8..173c71ee557 100644
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@@ -39,6 +39,7 @@ SRCS(
     CRC.cpp
     FunctionFQDN.cpp
     FunctionFactory.cpp
+    FunctionFile.cpp
     FunctionHelpers.cpp
     FunctionJoinGet.cpp
     FunctionsAES.cpp
diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index 02b0beee550..43e1e11a193 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -9,6 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # Now we can get the user_files_path by use the table file function for trick. also we can get it by query as:
 #  "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')"
 user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+
 mkdir -p ${user_files_path}/
 echo -n aaaaaaaaa > ${user_files_path}/a.txt
 echo -n bbbbbbbbb > ${user_files_path}/b.txt
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 53fcfe8b13f..7a0bd3375f3 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -581,5 +581,6 @@
         "memory_leak",
         "memory_limit",
         "polygon_dicts" // they use an explicitly specified database
+        "01658_read_file_to_stringcolumn"
     ]
 }

From 609ced42ef5948f7e8ad9af7e275f3cc88ab5320 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Fri, 12 Feb 2021 20:27:55 +0300
Subject: [PATCH 1025/1238] better

---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 853b2df7aca..9002f1df140 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -490,6 +490,7 @@ include (cmake/find/rapidjson.cmake)
 include (cmake/find/fastops.cmake)
 include (cmake/find/odbc.cmake)
 include (cmake/find/rocksdb.cmake)
+include (cmake/find/libpqxx.cmake)
 include (cmake/find/nuraft.cmake)
 
 
From 801d109234f68baceb7894f0008790248192d723 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Fri, 12 Feb 2021 22:05:31 +0300
Subject: [PATCH 1026/1238] fix

---
 tests/queries/skip_list.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 7a0bd3375f3..f3a21092aa0 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -580,7 +580,7 @@
         "live_view",
         "memory_leak",
         "memory_limit",
-        "polygon_dicts" // they use an explicitly specified database
+        "polygon_dicts", // they use an explicitly specified database
         "01658_read_file_to_stringcolumn"
     ]
 }

From 184ec67dac727f89702ce12db5d7b51a8dfc2f25 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Fri, 12 Feb 2021 22:23:50 +0300
Subject: [PATCH 1027/1238] better ddl queue cleanup

---
 src/Common/ZooKeeper/ZooKeeper.cpp            |  21 +--
 src/Common/ZooKeeper/ZooKeeper.h              |  11 +-
 src/Interpreters/DDLWorker.cpp                | 149 +++++++++++-------
 .../test_distributed_ddl/cluster.py           |   8 +-
 .../integration/test_distributed_ddl/test.py  |   2 +-
 .../test_replicated_alter.py                  |   2 +-
 6 files changed, 114 insertions(+), 79 deletions(-)

diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index 4537d5ad8cd..a1c6eb9b481 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -602,7 +602,7 @@ void ZooKeeper::removeChildren(const std::string & path)
 }
 
 
-void ZooKeeper::removeChildrenRecursive(const std::string & path)
+void ZooKeeper::removeChildrenRecursive(const std::string & path, const String & keep_child_node)
 {
     Strings children = getChildren(path);
     while (!children.empty())
@@ -611,14 +611,15 @@ void ZooKeeper::removeChildrenRecursive(const std::string & path)
         for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i)
         {
             removeChildrenRecursive(path + "/" + children.back());
-            ops.emplace_back(makeRemoveRequest(path + "/" + children.back(), -1));
+            if (likely(keep_child_node.empty() || keep_child_node != children.back()))
+                ops.emplace_back(makeRemoveRequest(path + "/" + children.back(), -1));
             children.pop_back();
         }
         multi(ops);
     }
 }
 
-void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path)
+void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path, const String & keep_child_node)
 {
     Strings children;
     if (tryGetChildren(path, children) != Coordination::Error::ZOK)
@@ -629,14 +630,14 @@ void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path)
         Strings batch;
         for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i)
         {
-            batch.push_back(path + "/" + children.back());
+            String child_path = path + "/" + children.back();
+            tryRemoveChildrenRecursive(child_path);
+            if (likely(keep_child_node.empty() || keep_child_node != children.back()))
+            {
+                batch.push_back(child_path);
+                ops.emplace_back(zkutil::makeRemoveRequest(child_path, -1));
+            }
             children.pop_back();
-            tryRemoveChildrenRecursive(batch.back());
-
-            Coordination::RemoveRequest request;
-            request.path = batch.back();
-
-            ops.emplace_back(std::make_shared<Coordination::RemoveRequest>(std::move(request)));
         }
 
         /// Try to remove the children with a faster method - in bulk. If this fails,
diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index 0d9dc104c48..90d15e2ac4a 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -184,6 +184,12 @@ public:
     /// result would be the same as for the single call.
     void tryRemoveRecursive(const std::string & path);
 
+    /// Similar to removeRecursive(...) and tryRemoveRecursive(...), but does not remove path itself.
+    /// If keep_child_node is not empty, this method will not remove path/keep_child_node (but will remove its subtree).
+    /// It can be useful to keep some child node as a flag which indicates that path is currently removing.
+    void removeChildrenRecursive(const std::string & path, const String & keep_child_node = {});
+    void tryRemoveChildrenRecursive(const std::string & path, const String & keep_child_node = {});
+
     /// Remove all children nodes (non recursive).
     void removeChildren(const std::string & path);
 
@@ -246,9 +252,6 @@ private:
     void init(const std::string & implementation_, const std::string & hosts_, const std::string & identity_,
               int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_);
 
-    void removeChildrenRecursive(const std::string & path);
-    void tryRemoveChildrenRecursive(const std::string & path);
-
     /// The following methods don't throw exceptions but return error codes.
     Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created);
     Coordination::Error removeImpl(const std::string & path, int32_t version);
@@ -320,7 +323,7 @@ public:
         catch (...)
         {
             ProfileEvents::increment(ProfileEvents::CannotRemoveEphemeralNode);
-            DB::tryLogCurrentException(__PRETTY_FUNCTION__);
+            DB::tryLogCurrentException(__PRETTY_FUNCTION__, "Cannot remove " + path + ": ");
         }
     }
 
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 05370a6a3b7..fc460a5584c 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -652,15 +652,10 @@ void DDLWorker::enqueueTask(DDLTaskPtr task_ptr)
             {
                 recoverZooKeeper();
             }
-            else if (e.code == Coordination::Error::ZNONODE)
-            {
-                LOG_ERROR(log, "ZooKeeper error: {}", getCurrentExceptionMessage(true));
-                // TODO: retry?
-            }
             else
             {
                 LOG_ERROR(log, "Unexpected ZooKeeper error: {}.", getCurrentExceptionMessage(true));
-                return;
+                throw;
             }
         }
         catch (...)
@@ -695,25 +690,44 @@ void DDLWorker::processTask(DDLTask & task)
 
     LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query);
 
-    String dummy;
     String active_node_path = task.entry_path + "/active/" + task.host_id_str;
     String finished_node_path = task.entry_path + "/finished/" + task.host_id_str;
 
-    auto code = zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy);
+    /// It will tryRemove(...) on exception
+    auto active_node = zkutil::EphemeralNodeHolder::existing(active_node_path, *zookeeper);
 
-    if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS)
+    /// Try fast path
+    auto create_active_res = zookeeper->tryCreate(active_node_path, {}, zkutil::CreateMode::Ephemeral);
+    if (create_active_res != Coordination::Error::ZOK)
     {
-        // Ok
+        if (create_active_res != Coordination::Error::ZNONODE && create_active_res != Coordination::Error::ZNODEEXISTS)
+        {
+            assert(Coordination::isHardwareError(create_active_res));
+            throw Coordination::Exception(create_active_res, active_node_path);
+        }
+
+        /// Status dirs were not created in enqueueQuery(...) or someone is removing entry
+        if (create_active_res == Coordination::Error::ZNONODE)
+            createStatusDirs(task.entry_path, zookeeper);
+
+        if (create_active_res == Coordination::Error::ZNODEEXISTS)
+        {
+            /// Connection has been lost and now we are retrying to write query status,
+            /// but our previous ephemeral node still exists.
+            assert(task.was_executed);
+            zkutil::EventPtr eph_node_disappeared = std::make_shared<Poco::Event>();
+            String dummy;
+            if (zookeeper->tryGet(active_node_path, dummy, nullptr, eph_node_disappeared))
+            {
+                constexpr int timeout_ms = 5000;
+                if (!eph_node_disappeared->tryWait(timeout_ms))
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Ephemeral node {} still exists, "
+                                    "probably it's owned by someone else", active_node_path);
+            }
+        }
+
+        zookeeper->create(active_node_path, {}, zkutil::CreateMode::Ephemeral);
     }
-    else if (code == Coordination::Error::ZNONODE)
-    {
-        /// There is no parent
-        createStatusDirs(task.entry_path, zookeeper);
-        if (Coordination::Error::ZOK != zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy))
-            throw Coordination::Exception(code, active_node_path);
-    }
-    else
-        throw Coordination::Exception(code, active_node_path);
 
     if (!task.was_executed)
     {
@@ -969,7 +983,6 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo
 
         String node_name = *it;
         String node_path = fs::path(queue_dir) / node_name;
-        String lock_path = fs::path(node_path) / "lock";
 
         Coordination::Stat stat;
         String dummy;
@@ -991,19 +1004,14 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo
             if (!node_lifetime_is_expired && !node_is_outside_max_window)
                 continue;
 
-            /// Skip if there are active nodes (it is weak guard)
-            if (zookeeper->exists(fs::path(node_path) / "active", &stat) && stat.numChildren > 0)
+            /// At first we remove entry/active node to prevent staled hosts from executing entry concurrently
+            auto rm_active_res = zookeeper->tryRemove(fs::path(node_path) / "active");
+            if (rm_active_res != Coordination::Error::ZOK && rm_active_res != Coordination::Error::ZNONODE)
             {
-                LOG_INFO(log, "Task {} should be deleted, but there are active workers. Skipping it.", node_name);
-                continue;
-            }
-
-            /// Usage of the lock is not necessary now (tryRemoveRecursive correctly removes node in a presence of concurrent cleaners)
-            /// But the lock will be required to implement system.distributed_ddl_queue table
-            auto lock = createSimpleZooKeeperLock(zookeeper, node_path, "lock", host_fqdn_id);
-            if (!lock->tryLock())
-            {
-                LOG_INFO(log, "Task {} should be deleted, but it is locked. Skipping it.", node_name);
+                if (rm_active_res == Coordination::Error::ZNOTEMPTY)
+                    LOG_DEBUG(log, "Task {} should be deleted, but there are active workers. Skipping it.", node_name);
+                else
+                    LOG_WARNING(log, "Unexpected status code {} on attempt to remove {}/active", rm_active_res, node_name);
                 continue;
             }
 
@@ -1012,21 +1020,33 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo
             else if (node_is_outside_max_window)
                 LOG_INFO(log, "Task {} is outdated, deleting it", node_name);
 
-            /// Deleting
-            {
-                Strings children = zookeeper->getChildren(node_path);
-                for (const String & child : children)
-                {
-                    if (child != "lock")
-                        zookeeper->tryRemoveRecursive(fs::path(node_path) / child);
-                }
+            /// We recursively delete all nodes except node_path/finished to prevent staled hosts from
+            /// creating node_path/active node (see createStatusDirs(...))
+            zookeeper->tryRemoveChildrenRecursive(node_path, "finished");
 
-                /// Remove the lock node and its parent atomically
-                Coordination::Requests ops;
-                ops.emplace_back(zkutil::makeRemoveRequest(lock_path, -1));
-                ops.emplace_back(zkutil::makeRemoveRequest(node_path, -1));
-                zookeeper->multi(ops);
+            /// And then we remove node_path and node_path/finished in a single transaction
+            Coordination::Requests ops;
+            Coordination::Responses res;
+            ops.emplace_back(zkutil::makeCheckRequest(node_path, -1));  /// See a comment below
+            ops.emplace_back(zkutil::makeRemoveRequest(fs::path(node_path) / "finished", -1));
+            ops.emplace_back(zkutil::makeRemoveRequest(node_path, -1));
+            auto rm_entry_res = zookeeper->tryMulti(ops, res);
+            if (rm_entry_res == Coordination::Error::ZNONODE)
+            {
+                /// Most likely both node_path/finished and node_path were removed concurrently.
+                bool entry_removed_concurrently = res[0]->error == Coordination::Error::ZNONODE;
+                if (entry_removed_concurrently)
+                    continue;
+
+                /// Possible rare case: initiator node has lost connection after enqueueing entry and failed to create status dirs.
+                /// No one has started to process the entry, so node_path/active and node_path/finished nodes were never created, node_path has no children.
+                /// Entry became outdated, but we cannot remove remove it in a transaction with node_path/finished.
+                assert(res[0]->error == Coordination::Error::ZOK && res[1]->error == Coordination::Error::ZNONODE);
+                rm_entry_res = zookeeper->tryRemove(node_path);
+                assert(rm_entry_res != Coordination::Error::ZNOTEMPTY);
+                continue;
             }
+            zkutil::KeeperMultiException::check(rm_entry_res, ops, res);
         }
         catch (...)
         {
@@ -1040,21 +1060,32 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo
 void DDLWorker::createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper)
 {
     Coordination::Requests ops;
-    {
-        Coordination::CreateRequest request;
-        request.path = fs::path(node_path) / "active";
-        ops.emplace_back(std::make_shared<Coordination::CreateRequest>(std::move(request)));
-    }
-    {
-        Coordination::CreateRequest request;
-        request.path = fs::path(node_path) / "finished";
-        ops.emplace_back(std::make_shared<Coordination::CreateRequest>(std::move(request)));
-    }
+    ops.emplace_back(zkutil::makeCreateRequest(fs::path(node_path) / "active", {}, zkutil::CreateMode::Persistent));
+    ops.emplace_back(zkutil::makeCreateRequest(fs::path(node_path) / "finished", {}, zkutil::CreateMode::Persistent));
+
     Coordination::Responses responses;
     Coordination::Error code = zookeeper->tryMulti(ops, responses);
-    if (code != Coordination::Error::ZOK
-        && code != Coordination::Error::ZNODEEXISTS)
-        throw Coordination::Exception(code);
+
+    bool both_created = code == Coordination::Error::ZOK;
+
+    /// Failed on attempt to create node_path/active because it exists, so node_path/finished must exist too
+    bool both_already_exists = responses.size() == 2 && responses[0]->error == Coordination::Error::ZNODEEXISTS
+                                                     && responses[1]->error == Coordination::Error::ZRUNTIMEINCONSISTENCY;
+    assert(!both_already_exists || (zookeeper->exists(fs::path(node_path) / "active") && zookeeper->exists(fs::path(node_path) / "finished")));
+
+    /// Failed on attempt to create node_path/finished, but node_path/active does not exist
+    bool is_currently_deleting = responses.size() == 2 && responses[0]->error == Coordination::Error::ZOK
+                                                       && responses[1]->error == Coordination::Error::ZNODEEXISTS;
+    if (both_created || both_already_exists)
+        return;
+
+    if (is_currently_deleting)
+        throw Exception(ErrorCodes::UNFINISHED, "Cannot create status dirs for {}, "
+                        "most likely because someone is deleting it concurrently", node_path);
+
+    /// Connection lost or entry was removed
+    assert(Coordination::isHardwareError(code) || code == Coordination::Error::ZNONODE);
+    zkutil::KeeperMultiException::check(code, ops, responses);
 }
 
 
@@ -1114,7 +1145,7 @@ void DDLWorker::runMainThread()
             if (!Coordination::isHardwareError(e.code))
             {
                 /// A logical error.
-                LOG_ERROR(log, "ZooKeeper error: {}. Failed to start DDLWorker.",getCurrentExceptionMessage(true));
+                LOG_ERROR(log, "ZooKeeper error: {}. Failed to start DDLWorker.", getCurrentExceptionMessage(true));
                 reset_state(false);
                 assert(false);  /// Catch such failures in tests with debug build
             }
diff --git a/tests/integration/test_distributed_ddl/cluster.py b/tests/integration/test_distributed_ddl/cluster.py
index 811eb94bad4..24f11fec547 100644
--- a/tests/integration/test_distributed_ddl/cluster.py
+++ b/tests/integration/test_distributed_ddl/cluster.py
@@ -10,8 +10,8 @@ from helpers.test_tools import TSV
 
 
 class ClickHouseClusterWithDDLHelpers(ClickHouseCluster):
-    def __init__(self, base_path, config_dir):
-        ClickHouseCluster.__init__(self, base_path)
+    def __init__(self, base_path, config_dir, testcase_name):
+        ClickHouseCluster.__init__(self, base_path, name=testcase_name)
 
         self.test_config_dir = config_dir
 
@@ -104,8 +104,8 @@ class ClickHouseClusterWithDDLHelpers(ClickHouseCluster):
     def ddl_check_there_are_no_dublicates(instance):
         query = "SELECT max(c), argMax(q, c) FROM (SELECT lower(query) AS q, count() AS c FROM system.query_log WHERE type=2 AND q LIKE '/* ddl_entry=query-%' GROUP BY query)"
         rows = instance.query(query)
-        assert len(rows) > 0 and rows[0][0] == "1", "dublicates on {} {}, query {}".format(instance.name,
-                                                                                           instance.ip_address, query)
+        assert len(rows) > 0 and rows[0][0] == "1", "dublicates on {} {}: {}".format(instance.name,
+                                                                                           instance.ip_address, rows)
 
     @staticmethod
     def insert_reliable(instance, query_insert):
diff --git a/tests/integration/test_distributed_ddl/test.py b/tests/integration/test_distributed_ddl/test.py
index f0e78dfec41..58e1d0d06f7 100755
--- a/tests/integration/test_distributed_ddl/test.py
+++ b/tests/integration/test_distributed_ddl/test.py
@@ -14,7 +14,7 @@ from .cluster import ClickHouseClusterWithDDLHelpers
 
 @pytest.fixture(scope="module", params=["configs", "configs_secure"])
 def test_cluster(request):
-    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param)
+    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param, request.param)
 
     try:
         cluster.prepare()
diff --git a/tests/integration/test_distributed_ddl/test_replicated_alter.py b/tests/integration/test_distributed_ddl/test_replicated_alter.py
index bd95f5660b7..148ad5fca5e 100644
--- a/tests/integration/test_distributed_ddl/test_replicated_alter.py
+++ b/tests/integration/test_distributed_ddl/test_replicated_alter.py
@@ -12,7 +12,7 @@ from .cluster import ClickHouseClusterWithDDLHelpers
 
 @pytest.fixture(scope="module", params=["configs", "configs_secure"])
 def test_cluster(request):
-    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param)
+    cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param, "alters_" + request.param)
 
     try:
         # TODO: Fix ON CLUSTER alters when nodes have different configs. Need to canonicalize node identity.

From 939a3e95550140f34a0a3b98231ebef6541e4d34 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 12 Feb 2021 22:28:00 +0300
Subject: [PATCH 1028/1238] Fix tests for better parallel run

---
 .../01650_drop_part_and_deduplication_zookeeper.sql    | 10 +++++-----
 tests/queries/skip_list.json                           |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper.sql
index 50596680618..c3e459dfc49 100644
--- a/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper.sql
+++ b/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper.sql
@@ -5,7 +5,7 @@ CREATE TABLE partitioned_table (
     partitioner UInt8,
     value String
 )
-ENGINE ReplicatedMergeTree('/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table', '1')
+ENGINE ReplicatedMergeTree('/clickhouse/01650_drop_part_and_deduplication_partitioned_table', '1')
 ORDER BY key
 PARTITION BY partitioner;
 
@@ -16,24 +16,24 @@ INSERT INTO partitioned_table VALUES (11, 1, 'AA'), (22, 2, 'BB'), (33, 3, 'CC')
 
 SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name;
 
-SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table/blocks/' ORDER BY value;
+SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value;
 
 INSERT INTO partitioned_table VALUES (33, 3, 'CC'); -- must be deduplicated
 
 SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name;
 
-SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table/blocks/' ORDER BY value;
+SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value;
 
 ALTER TABLE partitioned_table DROP PART '3_1_1_0';
 
 SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name;
 
-SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table/blocks/' ORDER BY value;
+SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value;
 
 INSERT INTO partitioned_table VALUES (33, 3, 'CC'); -- mustn't be deduplicated
 
 SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name;
 
-SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table/blocks/' ORDER BY value;
+SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value;
 
 DROP TABLE IF EXISTS partitioned_table;
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 53fcfe8b13f..07250cd9c90 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -573,6 +573,7 @@
         "01646_system_restart_replicas_smoke", // system restart replicas is a global query
         "01676_dictget_in_default_expression",
         "01715_background_checker_blather_zookeeper",
+        "01700_system_zookeeper_path_in",
         "attach",
         "ddl_dictionaries",
         "dictionary",

From eff5bdf3321c4f9ed01017254a914a065a314cc5 Mon Sep 17 00:00:00 2001
From: lehasm <lehasm@gmail.com>
Date: Fri, 12 Feb 2021 22:28:03 +0300
Subject: [PATCH 1029/1238] Documented decodeXMLComponent function

---
 .../functions/string-functions.md             | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 2b93dd924a3..fa9c84fa9af 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -600,4 +600,48 @@ Hello, &quot;world&quot;!
 &apos;foo&apos;
 ```
 
+## decodeXMLComponent {#decode-xml-component}
+
+Replaces XML predefined entities with characters. 
+Predefined entities are `&quot;` `&amp;` `&apos;` `&gt;` `&lt;`
+This function also replaces numeric character references with Unicode characters.
+Both decimal (like `&#10003;`) and hexadecimal (`&#x2713;`) forms are supported.
+
+**Syntax**
+
+``` sql
+decodeXMLComponent(x)
+```
+
+**Parameters**
+
+-   `x` — A sequence of characters. [String](../../sql-reference/data-types/string.md).
+
+**Returned value**
+
+-   The sequence of characters after replacement.
+
+Type: [String](../../sql-reference/data-types/string.md).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT decodeXMLComponent('&apos;foo&apos;');
+SELECT decodeXMLComponent('&lt; &#x3A3; &gt;');
+```
+
+Result:
+
+``` text
+'foo' 
+< Σ >
+```
+
+**See Also**
+
+-   [List of XML and HTML character entity references](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references)
+
+
 [Original article](https://clickhouse.tech/docs/en/query_language/functions/string_functions/) <!--hide-->

From 051c9533b91a9f61ed7f10d94e723d785617ccb6 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 12 Feb 2021 22:42:20 +0300
Subject: [PATCH 1030/1238] Fix dependent test

---
 .../01700_system_zookeeper_path_in.reference  | 23 +++++++++++------
 .../01700_system_zookeeper_path_in.sql        | 25 ++++++++++++++-----
 2 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference b/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
index 78462f9fc0e..2fc177c812e 100644
--- a/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
+++ b/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference
@@ -1,7 +1,16 @@
-clickhouse
-task_queue
-clickhouse
-task_queue
-clickhouse
-task_queue
-ddl
+block_numbers
+blocks
+1
+========
+block_numbers
+blocks
+1
+========
+block_numbers
+blocks
+========
+1
+failed_parts
+last_part
+leader_election-0000000000
+parallel
diff --git a/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql b/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql
index a5c7488ef97..d4126098c7c 100644
--- a/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql
+++ b/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql
@@ -1,6 +1,19 @@
-SELECT name FROM system.zookeeper WHERE path = '/';
-SELECT name FROM system.zookeeper WHERE path = 'clickhouse';
-SELECT name FROM system.zookeeper WHERE path IN ('/');
-SELECT name FROM system.zookeeper WHERE path IN ('clickhouse');
-SELECT name FROM system.zookeeper WHERE path IN ('/','/clickhouse');
-SELECT name FROM system.zookeeper WHERE path IN (SELECT concat('/clickhouse/',name) FROM system.zookeeper WHERE (path = '/clickhouse/'));
\ No newline at end of file
+DROP TABLE IF EXISTS sample_table;
+
+CREATE TABLE sample_table (
+    key UInt64
+)
+ENGINE ReplicatedMergeTree('/clickhouse/01700_system_zookeeper_path_in', '1')
+ORDER BY tuple();
+
+SELECT name FROM system.zookeeper WHERE path = '/clickhouse/01700_system_zookeeper_path_in' AND name like 'block%' ORDER BY name;
+SELECT name FROM system.zookeeper WHERE path = '/clickhouse/01700_system_zookeeper_path_in/replicas' ORDER BY name;
+SELECT '========';
+SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/01700_system_zookeeper_path_in') AND name LIKE 'block%' ORDER BY name;
+SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/01700_system_zookeeper_path_in/replicas') ORDER BY name;
+SELECT '========';
+SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/01700_system_zookeeper_path_in','/clickhouse/01700_system_zookeeper_path_in/replicas') AND name LIKE 'block%' ORDER BY name;
+SELECT '========';
+SELECT name FROM system.zookeeper WHERE path IN (SELECT concat('/clickhouse/01700_system_zookeeper_path_in/', name) FROM system.zookeeper WHERE (path = '/clickhouse/01700_system_zookeeper_path_in')) ORDER BY name;
+
+DROP TABLE IF EXISTS sample_table;

From d9b85874c0139a3936cc15d85c3869ec22959a36 Mon Sep 17 00:00:00 2001
From: lehasm <lehasm@gmail.com>
Date: Fri, 12 Feb 2021 22:52:02 +0300
Subject: [PATCH 1031/1238] welchttest, mannwhitneyutest markup fixed

---
 .../aggregate-functions/reference/mannwhitneyutest.md            | 1 +
 .../en/sql-reference/aggregate-functions/reference/welchttest.md | 1 +
 2 files changed, 2 insertions(+)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
index 012df7052aa..bc808ab0a9e 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
@@ -30,6 +30,7 @@ The null hypothesis is that two populations are stochastically equal. Also one-s
 **Returned values**
 
 [Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
+
 -   calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md).
 -   calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md
index 3fe1c9d58b9..44c320c4565 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md
@@ -24,6 +24,7 @@ The null hypothesis is that means of populations are equal. Normal distribution
 **Returned values**
 
 [Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
+
 -   calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
 -   calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
 

From 00ac1e691abbae0f656a4d913ac489d52ad9c3e4 Mon Sep 17 00:00:00 2001
From: lehasm <lehasm@gmail.com>
Date: Fri, 12 Feb 2021 23:01:47 +0300
Subject: [PATCH 1032/1238] studentttest, welchttest, mannwhitneyutest markup
 fixed (ru)

---
 .../aggregate-functions/reference/mannwhitneyutest.md            | 1 +
 .../sql-reference/aggregate-functions/reference/studentttest.md  | 1 +
 .../ru/sql-reference/aggregate-functions/reference/welchttest.md | 1 +
 3 files changed, 3 insertions(+)

diff --git a/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
index fb73fff5f00..a4647ecfb34 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
@@ -31,6 +31,7 @@ mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_ind
 **Возвращаемые значения**
 
 [Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
+
 -   вычисленное значение критерия Манна — Уитни. [Float64](../../../sql-reference/data-types/float.md).
 -   вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
 
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md b/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md
index 5361e06c5e2..77378de95d1 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md
@@ -24,6 +24,7 @@ studentTTest(sample_data, sample_index)
 **Возвращаемые значения**
 
 [Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
+
 -   вычисленное значение критерия Стьюдента. [Float64](../../../sql-reference/data-types/float.md).
 -   вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
 
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md b/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md
index 1f36b2d04ee..16c122d1b49 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md
@@ -24,6 +24,7 @@ welchTTest(sample_data, sample_index)
 **Возвращаемые значения**
 
 [Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
+
 -   вычисленное значение критерия Уэлча. [Float64](../../../sql-reference/data-types/float.md).
 -   вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
 

From 1c656830fc32606cbc52699beb775f80b7094243 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 13 Feb 2021 00:26:12 +0300
Subject: [PATCH 1033/1238] Fix clang-tidy

---
 src/Storages/StorageMemory.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp
index 01f70db5edd..d7b0ae055ab 100644
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@@ -124,7 +124,7 @@ public:
         if (storage.compress)
         {
             Block compressed_block;
-            for (auto & elem : block)
+            for (const auto & elem : block)
                 compressed_block.insert({ elem.column->compress(), elem.type, elem.name });
 
             new_blocks.emplace_back(compressed_block);
@@ -351,6 +351,7 @@ void registerStorageMemory(StorageFactory & factory)
         return StorageMemory::create(args.table_id, args.columns, args.constraints, settings.compress);
     },
     {
+        .supports_settings = true,
         .supports_parallel_insert = true,
     });
 }

From 453450985f9b5452779b6b4a7ec6c0a44105e3dc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 13 Feb 2021 00:26:25 +0300
Subject: [PATCH 1034/1238] Performance improvement by Nikolai Kochetov

---
 src/Storages/StorageMemory.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h
index 91cf616c57d..db71c13ca99 100644
--- a/src/Storages/StorageMemory.h
+++ b/src/Storages/StorageMemory.h
@@ -45,6 +45,8 @@ public:
     /// Smaller blocks (e.g. 64K rows) are better for CPU cache.
     bool prefersLargeBlocks() const override { return false; }
 
+    bool hasEvenlyDistributedRead() const override { return true; }
+
     BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, const Context & context) override;
 
     void drop() override;

From b5826121db6379acb5eb54e800ba73bd8cf0cd06 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 13 Feb 2021 00:29:06 +0300
Subject: [PATCH 1035/1238] Fix Arcadia

---
 src/Columns/ya.make    | 1 +
 src/Columns/ya.make.in | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/Columns/ya.make b/src/Columns/ya.make
index def9dfd4cb7..061391b5214 100644
--- a/src/Columns/ya.make
+++ b/src/Columns/ya.make
@@ -13,6 +13,7 @@ PEERDIR(
     clickhouse/src/Common
     contrib/libs/icu
     contrib/libs/pdqsort
+    contrib/libs/lz4
 )
 
 SRCS(
diff --git a/src/Columns/ya.make.in b/src/Columns/ya.make.in
index 677a5bcbd70..4422d222ce1 100644
--- a/src/Columns/ya.make.in
+++ b/src/Columns/ya.make.in
@@ -12,6 +12,7 @@ PEERDIR(
     clickhouse/src/Common
     contrib/libs/icu
     contrib/libs/pdqsort
+    contrib/libs/lz4
 )
 
 SRCS(

From edd5844bede6295e7747796a6e7cf0540b6fea7f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 13 Feb 2021 00:29:47 +0300
Subject: [PATCH 1036/1238] Print stack trace on SIGTRAP

---
 base/daemon/BaseDaemon.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp
index 491ffe6a775..367fa0446ba 100644
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@@ -784,7 +784,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
     /// Setup signal handlers.
     /// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime.
 
-    addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP}, signalHandler, &handled_signals);
+    addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, &handled_signals);
     addSignalHandler({SIGHUP, SIGUSR1}, closeLogsSignalHandler, &handled_signals);
     addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals);
 

From 3f8336963b83f4054c5bcc1ad7a4ab4128d59616 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 13 Feb 2021 00:30:58 +0300
Subject: [PATCH 1037/1238] Non significant change in AggregationCommon

---
 src/Interpreters/AggregationCommon.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h
index 9b0872d3df1..f70ab282e6f 100644
--- a/src/Interpreters/AggregationCommon.h
+++ b/src/Interpreters/AggregationCommon.h
@@ -77,12 +77,8 @@ static inline T ALWAYS_INLINE packFixed(
     const ColumnRawPtrs * low_cardinality_positions [[maybe_unused]] = nullptr,
     const Sizes * low_cardinality_sizes [[maybe_unused]] = nullptr)
 {
-    union
-    {
-        T key;
-        char bytes[sizeof(key)] = {};
-    };
-
+    T key{};
+    char * bytes = reinterpret_cast<char *>(&key);
     size_t offset = 0;
 
     for (size_t j = 0; j < keys_size; ++j)

From 643dcc5ec22b0dd78d7ca5d1c693d574f35f99b2 Mon Sep 17 00:00:00 2001
From: Ivan Blinkov <github@blinkov.ru>
Date: Sat, 13 Feb 2021 01:46:13 +0300
Subject: [PATCH 1038/1238] Update README.md

---
 README.md | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/README.md b/README.md
index 1c6a021c00c..3329a98877f 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,3 @@ ClickHouse® is an open-source column-oriented database management system that a
 * [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
 * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
 * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
-
-## Upcoming Events
-* [Chinese ClickHouse Meetup (online)](http://hdxu.cn/8KxZE) on 6 February 2021.

From 1c55be261c449f93984f2dbf9b962a1123f394e1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 13 Feb 2021 03:45:06 +0300
Subject: [PATCH 1039/1238] Fix UBSan report in arrayDifference

---
 src/Functions/array/arrayDifference.cpp       | 33 +++++++++++++++----
 .../01716_array_difference_overflow.reference |  1 +
 .../01716_array_difference_overflow.sql       |  2 ++
 3 files changed, 29 insertions(+), 7 deletions(-)
 create mode 100644 tests/queries/0_stateless/01716_array_difference_overflow.reference
 create mode 100644 tests/queries/0_stateless/01716_array_difference_overflow.sql

diff --git a/src/Functions/array/arrayDifference.cpp b/src/Functions/array/arrayDifference.cpp
index 2c71c58867f..b4b30079a4e 100644
--- a/src/Functions/array/arrayDifference.cpp
+++ b/src/Functions/array/arrayDifference.cpp
@@ -47,6 +47,29 @@ struct ArrayDifferenceImpl
     }
 
 
+    template <typename Element, typename Result>
+    static void NO_SANITIZE_UNDEFINED impl(const Element * __restrict src, Result * __restrict dst, size_t begin, size_t end)
+    {
+        /// First element is zero, then the differences of ith and i-1th elements.
+
+        Element prev{};
+        for (size_t pos = begin; pos < end; ++pos)
+        {
+            if (pos == begin)
+            {
+                dst[pos] = 0;
+                prev = src[pos];
+            }
+            else
+            {
+                Element curr = src[pos];
+                dst[pos] = curr - prev;
+                prev = curr;
+            }
+        }
+    }
+
+
     template <typename Element, typename Result>
     static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
     {
@@ -73,14 +96,10 @@ struct ArrayDifferenceImpl
         size_t pos = 0;
         for (auto offset : offsets)
         {
-            // skip empty arrays
-            if (pos < offset)
-            {
-                res_values[pos] = 0;
-                for (++pos; pos < offset; ++pos)
-                    res_values[pos] = static_cast<Result>(data[pos]) - static_cast<Result>(data[pos - 1]);
-            }
+            impl(data.data(), res_values.data(), pos, offset);
+            pos = offset;
         }
+
         res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
         return true;
     }
diff --git a/tests/queries/0_stateless/01716_array_difference_overflow.reference b/tests/queries/0_stateless/01716_array_difference_overflow.reference
new file mode 100644
index 00000000000..5297534679e
--- /dev/null
+++ b/tests/queries/0_stateless/01716_array_difference_overflow.reference
@@ -0,0 +1 @@
+[0,9223372036854710272]
diff --git a/tests/queries/0_stateless/01716_array_difference_overflow.sql b/tests/queries/0_stateless/01716_array_difference_overflow.sql
new file mode 100644
index 00000000000..3d153725294
--- /dev/null
+++ b/tests/queries/0_stateless/01716_array_difference_overflow.sql
@@ -0,0 +1,2 @@
+-- Overflow is Ok and behaves as the CPU does it.
+SELECT arrayDifference([65536, -9223372036854775808]);

From 1546f5bcb961d0e60b7c5934e6d1add981df3298 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 13 Feb 2021 03:54:38 +0300
Subject: [PATCH 1040/1238] Suppress UBSan report in Decimal comparison

---
 base/common/arithmeticOverflow.h                          | 7 +++++++
 src/Core/DecimalComparison.h                              | 8 +++++---
 .../0_stateless/01716_decimal_comparison_ubsan.reference  | 1 +
 .../0_stateless/01716_decimal_comparison_ubsan.sql        | 2 ++
 4 files changed, 15 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/01716_decimal_comparison_ubsan.reference
 create mode 100644 tests/queries/0_stateless/01716_decimal_comparison_ubsan.sql

diff --git a/base/common/arithmeticOverflow.h b/base/common/arithmeticOverflow.h
index 8df037a14af..38f2cf29605 100644
--- a/base/common/arithmeticOverflow.h
+++ b/base/common/arithmeticOverflow.h
@@ -156,4 +156,11 @@ namespace common
             return false;
         return (x * y) / y != x;
     }
+
+    /// Multiply and ignore overflow.
+    template <typename T1, typename T2>
+    inline auto NO_SANITIZE_UNDEFINED mulIgnoreOverflow(T1 x, T2 y)
+    {
+        return x * y;
+    }
 }
diff --git a/src/Core/DecimalComparison.h b/src/Core/DecimalComparison.h
index aaf471cefd8..8279d01d35a 100644
--- a/src/Core/DecimalComparison.h
+++ b/src/Core/DecimalComparison.h
@@ -21,7 +21,7 @@ namespace ErrorCodes
     extern const int DECIMAL_OVERFLOW;
 }
 
-///
+
 inline bool allowDecimalComparison(const DataTypePtr & left_type, const DataTypePtr & right_type)
 {
     if (isColumnedAsDecimal(left_type))
@@ -30,7 +30,9 @@ inline bool allowDecimalComparison(const DataTypePtr & left_type, const DataType
             return true;
     }
     else if (isNotDecimalButComparableToDecimal(left_type) && isColumnedAsDecimal(right_type))
+    {
         return true;
+    }
     return false;
 }
 
@@ -252,9 +254,9 @@ private:
         else
         {
             if constexpr (scale_left)
-                x *= scale;
+                x = common::mulIgnoreOverflow(x, scale);
             if constexpr (scale_right)
-                y *= scale;
+                y = common::mulIgnoreOverflow(y, scale);
         }
 
         return Op::apply(x, y);
diff --git a/tests/queries/0_stateless/01716_decimal_comparison_ubsan.reference b/tests/queries/0_stateless/01716_decimal_comparison_ubsan.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/tests/queries/0_stateless/01716_decimal_comparison_ubsan.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/01716_decimal_comparison_ubsan.sql b/tests/queries/0_stateless/01716_decimal_comparison_ubsan.sql
new file mode 100644
index 00000000000..f68d9de1995
--- /dev/null
+++ b/tests/queries/0_stateless/01716_decimal_comparison_ubsan.sql
@@ -0,0 +1,2 @@
+SET decimal_check_overflow = 0;
+SELECT toDecimal64(0, 8) = 9223372036854775807;

From c760d5224d042304211a0f6ab157bde4a56a7a3d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 13 Feb 2021 03:56:38 +0300
Subject: [PATCH 1041/1238] Suppress UBSan report in Decimal comparison

---
 base/common/arithmeticOverflow.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/base/common/arithmeticOverflow.h b/base/common/arithmeticOverflow.h
index 38f2cf29605..fd557fd5b2d 100644
--- a/base/common/arithmeticOverflow.h
+++ b/base/common/arithmeticOverflow.h
@@ -1,6 +1,8 @@
 #pragma once
 
 #include <common/extended_types.h>
+#include <common/defines.h>
+
 
 namespace common
 {

From cbeda6c60e6fdf90803636844aa4dd18d94f1e3e Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 12 Feb 2021 23:04:45 +0300
Subject: [PATCH 1042/1238] Fix LOGICAL_ERROR for join_use_nulls=1 when JOIN
 contains const from SELECT

---
 src/Interpreters/TableJoin.cpp                     | 10 +++++++++-
 tests/queries/0_stateless/01710_join_use_nulls.sql |  6 ++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp
index 2d3bffa8234..c1777711d9e 100644
--- a/src/Interpreters/TableJoin.cpp
+++ b/src/Interpreters/TableJoin.cpp
@@ -230,8 +230,16 @@ void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column)
 void TableJoin::addJoinedColumnsAndCorrectNullability(ColumnsWithTypeAndName & columns) const
 {
     for (auto & col : columns)
+    {
         if (leftBecomeNullable(col.type))
-            col.type = makeNullable(col.type);
+        {
+            /// No need to nullify constants
+            if (!(col.column && isColumnConst(*col.column)))
+            {
+                col.type = makeNullable(col.type);
+            }
+        }
+    }
 
     for (const auto & col : columns_added_by_join)
     {
diff --git a/tests/queries/0_stateless/01710_join_use_nulls.sql b/tests/queries/0_stateless/01710_join_use_nulls.sql
index 2845af8b8ed..5486010183a 100644
--- a/tests/queries/0_stateless/01710_join_use_nulls.sql
+++ b/tests/queries/0_stateless/01710_join_use_nulls.sql
@@ -11,5 +11,11 @@ FROM X
 RIGHT JOIN Y ON (X.id + 1) = Y.id
 SETTINGS join_use_nulls=1; -- { serverError 53 }
 
+-- Logical error: 'Arguments of 'plus' have incorrect data types: '2' of type 'UInt8', '1' of type 'UInt8''.
+-- Because 1 became toNullable(1), i.e.:
+--     2 UInt8 Const(size = 1, UInt8(size = 1))
+--     1 UInt8 Const(size = 1, Nullable(size = 1, UInt8(size = 1), UInt8(size = 1)))
+SELECT 2+1 FROM system.one X RIGHT JOIN system.one Y ON X.dummy+1 = Y.dummy SETTINGS join_use_nulls = 1; -- { serverError 53 }
+
 DROP TABLE X;
 DROP TABLE Y;

From 4aa46ce3d60007819ffc43b674bbb8e4fdf75df7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 13 Feb 2021 10:46:29 +0300
Subject: [PATCH 1043/1238] More tests for join_use_nulls

All of them already works, but just in case
---
 .../queries/0_stateless/01710_join_use_nulls.reference |  3 +++
 tests/queries/0_stateless/01710_join_use_nulls.sql     | 10 +++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/01710_join_use_nulls.reference b/tests/queries/0_stateless/01710_join_use_nulls.reference
index e69de29bb2d..8bd111e0416 100644
--- a/tests/queries/0_stateless/01710_join_use_nulls.reference
+++ b/tests/queries/0_stateless/01710_join_use_nulls.reference
@@ -0,0 +1,3 @@
+3
+1
+1
diff --git a/tests/queries/0_stateless/01710_join_use_nulls.sql b/tests/queries/0_stateless/01710_join_use_nulls.sql
index 5486010183a..b024227d4e2 100644
--- a/tests/queries/0_stateless/01710_join_use_nulls.sql
+++ b/tests/queries/0_stateless/01710_join_use_nulls.sql
@@ -5,17 +5,17 @@ CREATE TABLE X (id Int) ENGINE=Memory;
 CREATE TABLE Y (id Int) ENGINE=Memory;
 
 -- Type mismatch of columns to JOIN by: plus(id, 1) Int64 at left, Y.id Int32 at right.
-SELECT
-    Y.id - 1
-FROM X
-RIGHT JOIN Y ON (X.id + 1) = Y.id
-SETTINGS join_use_nulls=1; -- { serverError 53 }
+SELECT Y.id - 1 FROM X RIGHT JOIN Y ON (X.id + 1) = Y.id SETTINGS join_use_nulls=1; -- { serverError 53 }
+SELECT Y.id - 1 FROM X RIGHT JOIN Y ON (X.id + 1) = toInt64(Y.id) SETTINGS join_use_nulls=1;
 
 -- Logical error: 'Arguments of 'plus' have incorrect data types: '2' of type 'UInt8', '1' of type 'UInt8''.
 -- Because 1 became toNullable(1), i.e.:
 --     2 UInt8 Const(size = 1, UInt8(size = 1))
 --     1 UInt8 Const(size = 1, Nullable(size = 1, UInt8(size = 1), UInt8(size = 1)))
 SELECT 2+1 FROM system.one X RIGHT JOIN system.one Y ON X.dummy+1 = Y.dummy SETTINGS join_use_nulls = 1; -- { serverError 53 }
+SELECT 2+1 FROM system.one X RIGHT JOIN system.one Y ON X.dummy+1 = toUInt16(Y.dummy) SETTINGS join_use_nulls = 1;
+SELECT X.dummy+1 FROM system.one X RIGHT JOIN system.one Y ON X.dummy = Y.dummy SETTINGS join_use_nulls = 1;
+SELECT Y.dummy+1 FROM system.one X RIGHT JOIN system.one Y ON X.dummy = Y.dummy SETTINGS join_use_nulls = 1;
 
 DROP TABLE X;
 DROP TABLE Y;

From fa329808e57315c0ab0692220bdc69d185231753 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 13 Feb 2021 13:12:55 +0300
Subject: [PATCH 1044/1238] Call next() from sync()/finalize() in WriteBuffer

---
 src/IO/WriteBuffer.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h
index d425f813d7b..24529fad8c0 100644
--- a/src/IO/WriteBuffer.h
+++ b/src/IO/WriteBuffer.h
@@ -95,8 +95,15 @@ public:
         ++pos;
     }
 
-    virtual void sync() {}
-    virtual void finalize() {}
+    virtual void sync()
+    {
+        next();
+    }
+
+    virtual void finalize()
+    {
+        next();
+    }
 
 private:
     /** Write the data in the buffer (from the beginning of the buffer to the current position).

From 06e8065ee65fabfed101da03eef993913f096450 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 13 Feb 2021 13:15:36 +0300
Subject: [PATCH 1045/1238] Add missing sync of underlying files

---
 base/daemon/BaseDaemon.cpp                     | 1 +
 src/Access/DiskAccessStorage.cpp               | 2 ++
 src/Common/tests/compact_array.cpp             | 1 +
 utils/convert-month-partitioned-parts/main.cpp | 1 +
 4 files changed, 5 insertions(+)

diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp
index 491ffe6a775..d96af1297e6 100644
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@@ -562,6 +562,7 @@ void debugIncreaseOOMScore()
     {
         DB::WriteBufferFromFile buf("/proc/self/oom_score_adj");
         buf.write(new_score.c_str(), new_score.size());
+        buf.close();
     }
     catch (const Poco::Exception & e)
     {
diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp
index 426c27ea799..80594f66dfc 100644
--- a/src/Access/DiskAccessStorage.cpp
+++ b/src/Access/DiskAccessStorage.cpp
@@ -217,6 +217,7 @@ namespace
         /// Write the file.
         WriteBufferFromFile out{tmp_file_path.string()};
         out.write(file_contents.data(), file_contents.size());
+        out.close();
 
         /// Rename.
         std::filesystem::rename(tmp_file_path, file_path);
@@ -274,6 +275,7 @@ namespace
             writeStringBinary(name, out);
             writeUUIDText(id, out);
         }
+        out.close();
     }
 
 
diff --git a/src/Common/tests/compact_array.cpp b/src/Common/tests/compact_array.cpp
index 91fb59d543f..a63859ac712 100644
--- a/src/Common/tests/compact_array.cpp
+++ b/src/Common/tests/compact_array.cpp
@@ -50,6 +50,7 @@ struct Test
             {
                 DB::WriteBufferFromFile wb(filename);
                 wb.write(reinterpret_cast<const char *>(&store), sizeof(store));
+                wb.close();
             }
 
             {
diff --git a/utils/convert-month-partitioned-parts/main.cpp b/utils/convert-month-partitioned-parts/main.cpp
index bce1e08077c..97eba631f1e 100644
--- a/utils/convert-month-partitioned-parts/main.cpp
+++ b/utils/convert-month-partitioned-parts/main.cpp
@@ -97,6 +97,7 @@ void run(String part_path, String date_column, String dest_path)
     Poco::File(new_tmp_part_path_str + "checksums.txt").setWriteable();
     WriteBufferFromFile checksums_out(new_tmp_part_path_str + "checksums.txt", 4096);
     checksums.write(checksums_out);
+    checksums.close();
 
     Poco::File(new_tmp_part_path).renameTo(new_part_path.toString());
 }

From 592f62d0afe4c3320744b6be6099ad022d3d65bc Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 13 Feb 2021 12:29:59 +0300
Subject: [PATCH 1046/1238] Remove superfluous out->next() call in HTTPHandler

---
 src/Server/HTTPHandler.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index eb4d6119c6f..211a910a52f 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -715,7 +715,6 @@ void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_
             writeChar('\n', *used_output.out_maybe_compressed);
 
             used_output.out_maybe_compressed->next();
-            used_output.out->next();
             used_output.out->finalize();
         }
     }

From 33f54cdb3c64fe72bffc79f5c6a082049a5d0012 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 13 Feb 2021 12:04:03 +0300
Subject: [PATCH 1047/1238] Fix abnormal server termination when http client
 goes away

In [1] stress tests found:

    2021.02.12 14:20:58.800988 [ 17728 ] {de3e7894-b401-4f7d-8530-90cd5ab06682} <Debug> executeQuery: (from [::1]:45792, using production parser) (comment: /usr/share/clickhouse-test/queries/0_stateless/01520_client_print_query_id.expect) SELECT * FROM numbers(34599)
    2021.02.12 14:20:58.916484 [ 17728 ] {de3e7894-b401-4f7d-8530-90cd5ab06682} <Trace> ContextAccess (default): Access granted: CREATE TEMPORARY TABLE ON *.*
    2021.02.12 14:20:59.071980 [ 17728 ] {de3e7894-b401-4f7d-8530-90cd5ab06682} <Trace> InterpreterSelectQuery: FetchColumns -> Complete
    2021.02.12 14:21:10.708202 [ 17728 ] {de3e7894-b401-4f7d-8530-90cd5ab06682} <Information> executeQuery: Read 34599 rows, 270.30 KiB in 11.876294055 sec., 2913 rows/sec., 22.76 KiB/sec.
    2021.02.12 14:22:10.506261 [ 17728 ] {de3e7894-b401-4f7d-8530-90cd5ab06682} <Debug> DynamicQueryHandler: Done processing query
    2021.02.12 14:22:18.238037 [ 375 ] {} <Fatal> BaseDaemon: (version 21.3.1.5996, build id: 8DBCED54529C989F7AD4D991F51410774D55DE6C) (from thread 17728) Terminate called for uncaught exception:
    Code: 24, e.displayText() = DB::Exception: Cannot write to ostream at offset 262994, Stack trace (when copying this message, always include the lines below):

    0. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/exception:0: Poco::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int) @ 0x15c976cb in /usr/bin/clickhouse
    1. ./obj-x86_64-linux-gnu/../src/Common/Exception.cpp:56: DB::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int, bool) @ 0x8c9320e in /usr/bin/clickhouse
    2. ./obj-x86_64-linux-gnu/../src/IO/WriteBufferFromOStream.cpp:0: DB::WriteBufferFromOStream::nextImpl() @ 0x8d54da5 in /usr/bin/clickhouse
    3. ./obj-x86_64-linux-gnu/../src/IO/BufferBase.h:39: DB::WriteBufferFromOStream::~WriteBufferFromOStream() @ 0x8d551d7 in /usr/bin/clickhouse
    4. ./obj-x86_64-linux-gnu/../src/IO/WriteBufferFromOStream.cpp:44: DB::Write

    2021.02.12 14:22:18.811071 [ 18134 ] {} <Fatal> BaseDaemon: ########################################
    2021.02.12 14:22:18.878935 [ 18134 ] {} <Fatal> BaseDaemon: (version 21.3.1.5996, build id: 8DBCED54529C989F7AD4D991F51410774D55DE6C) (from thread 17728) (query_id: de3e7894-b401-4f7d-8530-90cd5ab06682) Received signal Aborted (6)
    2021.02.12 14:22:18.943148 [ 18134 ] {} <Fatal> BaseDaemon:
    2021.02.12 14:22:19.007073 [ 18134 ] {} <Fatal> BaseDaemon: Stack trace: 0x7f109932018b 0x7f10992ff859 0x8bb33ae 0x8e301dd 0x17dac8c4 0x17dac7c7 0x8c3fe0b 0x8d552c5 0x8d552ea 0x11a29914 0x11a2a2ca 0x12f96092 0x12f8c65e 0x12f84300 0x15b84110 0x15bc0913 0x15bc103f 0x15d29a12 0x15d27fb0 0x15d267b8 0x8badbad 0x7f10994d5609 0x7f10993fc293
    2021.02.12 14:22:19.255998 [ 18134 ] {} <Fatal> BaseDaemon: 5. raise @ 0x4618b in /usr/lib/x86_64-linux-gnu/libc-2.31.so
    2021.02.12 14:22:19.270203 [ 18134 ] {} <Fatal> BaseDaemon: 6. abort @ 0x25859 in /usr/lib/x86_64-linux-gnu/libc-2.31.so
    2021.02.12 14:22:50.108918 [ 370 ] {} <Fatal> Application: Child process was terminated by signal 6.

  [1]: https://clickhouse-test-reports.s3.yandex.net/19580/6aecb62416ece880cbb8ee3a803e14d841388dde/stress_test_(thread).html#fail1

Verified locally by commenting out->next() call in
WriteBufferFromHTTPServerResponse::nextImpl(), adding a sleep(1) and
canceling HTTP request before it finished, the stacktrace as follow:

    [ 6351 ] {} <Fatal> BaseDaemon: (version 21.3.1.1, build id: 9B40466BF3D2F5AED78A52A995A4A2FD3116787C) (from thread 6677) Terminate called for uncaught exception:
    Code: 24, e.displayText() = DB::Exception: Cannot write to ostream at offset 4, Stack trace (when copying this message, always include the lines below):

    0. /src/ch/clickhouse/.cmake/../src/Common/StackTrace.cpp:298: StackTrace::tryCapture() @ 0x30a52a in /src/ch/clickhouse/.cmake/src/libclickhouse_common_iod.so
    1. /src/ch/clickhouse/.cmake/../src/Common/StackTrace.cpp:260: StackTrace::StackTrace() @ 0x30a4e5 in /src/ch/clickhouse/.cmake/src/libclickhouse_common_iod.so
    2. /src/ch/clickhouse/.cmake/../src/Common/Exception.cpp:53: DB::Exception::Exception(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, int, bool) @ 0x2a61ae in /src/ch/clickhouse/.cmake/src/libclickhouse_common_iod.so
    3. /src/ch/clickhouse/.cmake/../src/IO/WriteBufferFromOStream.cpp:22: DB::WriteBufferFromOStream::nextImpl() @ 0x3b468a in /src/ch/clickhouse/.cmake/src/libclickhouse_common_iod.so
    4. /src/ch/clickhouse/.cmake/../src/IO/WriteBuffer.h:47

    <snip>
    [ 8966 ] {} <Fatal> BaseDaemon: 7. __cxxabiv1::__terminate(void (*)()) @ 0x1784ca in /src/ch/clickhouse/.cmake/contrib/replxx-cmake/libreplxxd.so
    <snip>
    [ 8966 ] {} <Fatal> BaseDaemon: 10. /src/ch/clickhouse/.cmake/../src/IO/WriteBufferFromOStream.cpp:0: DB::WriteBufferFromOStream::~WriteBufferFromOStream() @ 0x3b48c1 in /src/ch/clickhouse/.cmake/src/libclickhouse_common_iod.so
    [ 8966 ] {} <Fatal> BaseDaemon: 11. /src/ch/clickhouse/.cmake/../src/IO/WriteBufferFromOStream.cpp:44: DB::WriteBufferFromOStream::~WriteBufferFromOStream() @ 0x3b48ec in /src/ch/clickhouse/.cmake/src/libclickhouse_common_iod.so
    <snip>
    [ 8966 ] {} <Fatal> BaseDaemon: 14. /src/ch/clickhouse/.cmake/../src/IO/WriteBufferFromHTTPServerResponse.cpp:218: DB::WriteBufferFromHTTPServerResponse::~WriteBufferFromHTTPServerResponse() @ 0x3b33cd in /src/ch/clickhouse/.cmake/src/libclickhouse_common_iod.so
    <snip>
    [ 8966 ] {} <Fatal> BaseDaemon: 22. /src/ch/clickhouse/.cmake/../src/Server/HTTPHandler.h:43: DB::HTTPHandler::Output::~Output() @ 0x260421 in /src/ch/clickhouse/.cmake/src/libclickhouse_serverd.so
    [ 8966 ] {} <Fatal> BaseDaemon: 23. /src/ch/clickhouse/.cmake/../src/Server/HTTPHandler.cpp:778: DB::HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest&, Poco::Net::HTTPServerResponse&) @ 0x253fd4 in /src/ch/clickhouse/.cmake/src/libclickhouse_serverd.so
---
 src/IO/WriteBufferFromHTTPServerResponse.cpp | 12 ++++++------
 src/Server/HTTPHandler.cpp                   |  3 +++
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/IO/WriteBufferFromHTTPServerResponse.cpp b/src/IO/WriteBufferFromHTTPServerResponse.cpp
index fb9a6a99d2b..ac2eeac1652 100644
--- a/src/IO/WriteBufferFromHTTPServerResponse.cpp
+++ b/src/IO/WriteBufferFromHTTPServerResponse.cpp
@@ -188,14 +188,14 @@ void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress)
 
 void WriteBufferFromHTTPServerResponse::finalize()
 {
-    if (offset())
+    next();
+    if (out)
     {
-        next();
-
-        if (out)
-            out.reset();
+        out->next();
+        out.reset();
     }
-    else
+
+    if (!offset())
     {
         /// If no remaining data, just send headers.
         std::lock_guard lock(mutex);
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index 211a910a52f..e9a77c3b433 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -774,6 +774,9 @@ void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
 
         trySendExceptionToClient(exception_message, exception_code, request, response, used_output);
     }
+
+    if (used_output.out)
+        used_output.out->finalize();
 }
 
 DynamicQueryHandler::DynamicQueryHandler(IServer & server_, const std::string & param_name_)

From 69d4120982fa2b7cae35da83532c8318f44bfc8f Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Fri, 12 Feb 2021 10:22:18 +0800
Subject: [PATCH 1048/1238] Disable table function view in expression

---
 src/Parsers/ASTFunction.cpp                               | 8 ++++++++
 .../0_stateless/01715_table_function_view_fix.reference   | 0
 .../queries/0_stateless/01715_table_function_view_fix.sql | 1 +
 3 files changed, 9 insertions(+)
 create mode 100644 tests/queries/0_stateless/01715_table_function_view_fix.reference
 create mode 100644 tests/queries/0_stateless/01715_table_function_view_fix.sql

diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index 806b8e6c5b9..29ac01eefc5 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -15,8 +15,16 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int UNEXPECTED_EXPRESSION;
+}
+
 void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const
 {
+    if (name == "view")
+        throw Exception("Table function view cannot be used as an expression", ErrorCodes::UNEXPECTED_EXPRESSION);
+
     writeString(name, ostr);
 
     if (parameters)
diff --git a/tests/queries/0_stateless/01715_table_function_view_fix.reference b/tests/queries/0_stateless/01715_table_function_view_fix.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01715_table_function_view_fix.sql b/tests/queries/0_stateless/01715_table_function_view_fix.sql
new file mode 100644
index 00000000000..21da116f6ba
--- /dev/null
+++ b/tests/queries/0_stateless/01715_table_function_view_fix.sql
@@ -0,0 +1 @@
+SELECT view(SELECT 1); -- { serverError 183 }

From a551edd8d6e308569433a9158df1ee31a60844de Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sat, 13 Feb 2021 13:18:14 +0800
Subject: [PATCH 1049/1238] Do not parse view function in expression

---
 src/Parsers/ASTFunction.cpp                   |  8 --
 src/Parsers/ExpressionElementParsers.cpp      | 81 ++++++++++++-------
 src/Parsers/ExpressionElementParsers.h        | 16 +++-
 src/Parsers/ExpressionListParsers.cpp         | 17 +++-
 src/Parsers/ExpressionListParsers.h           | 22 ++++-
 src/Parsers/ParserTablesInSelectQuery.cpp     |  2 +-
 .../01715_table_function_view_fix.sql         |  2 +-
 7 files changed, 98 insertions(+), 50 deletions(-)

diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index 29ac01eefc5..806b8e6c5b9 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -15,16 +15,8 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int UNEXPECTED_EXPRESSION;
-}
-
 void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const
 {
-    if (name == "view")
-        throw Exception("Table function view cannot be used as an expression", ErrorCodes::UNEXPECTED_EXPRESSION);
-
     writeString(name, ostr);
 
     if (parameters)
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index e7cd85798b9..3d868812304 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -266,7 +266,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserIdentifier id_parser;
     ParserKeyword distinct("DISTINCT");
     ParserKeyword all("ALL");
-    ParserExpressionList contents(false);
+    ParserExpressionList contents(false, is_table_function);
     ParserSelectWithUnionQuery select;
     ParserKeyword over("OVER");
 
@@ -278,6 +278,12 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ASTPtr expr_list_args;
     ASTPtr expr_list_params;
 
+    if (is_table_function)
+    {
+        if (ParserTableFunctionView().parse(pos, node, expected))
+            return true;
+    }
+
     if (!id_parser.parse(pos, identifier, expected))
         return false;
 
@@ -312,36 +318,6 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         }
     }
 
-    if (!has_distinct && !has_all)
-    {
-        auto old_pos = pos;
-        auto maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket;
-
-        if (select.parse(pos, query, expected))
-        {
-            auto & select_ast = query->as<ASTSelectWithUnionQuery &>();
-            if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery)
-            {
-                // It's an subquery. Bail out.
-                pos = old_pos;
-            }
-            else
-            {
-                if (pos->type != TokenType::ClosingRoundBracket)
-                    return false;
-                ++pos;
-                auto function_node = std::make_shared<ASTFunction>();
-                tryGetIdentifierNameInto(identifier, function_node->name);
-                auto expr_list_with_single_query = std::make_shared<ASTExpressionList>();
-                expr_list_with_single_query->children.push_back(query);
-                function_node->arguments = expr_list_with_single_query;
-                function_node->children.push_back(function_node->arguments);
-                node = function_node;
-                return true;
-            }
-        }
-    }
-
     const char * contents_begin = pos->begin;
     if (!contents.parse(pos, expr_list_args, expected))
         return false;
@@ -477,6 +453,49 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     return true;
 }
 
+bool ParserTableFunctionView::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    ParserIdentifier id_parser;
+    ParserKeyword view("VIEW");
+    ParserSelectWithUnionQuery select;
+
+    ASTPtr identifier;
+    ASTPtr query;
+
+    if (!view.ignore(pos, expected))
+        return false;
+
+    if (pos->type != TokenType::OpeningRoundBracket)
+        return false;
+
+    ++pos;
+
+    bool maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket;
+
+    if (!select.parse(pos, query, expected))
+        return false;
+
+    auto & select_ast = query->as<ASTSelectWithUnionQuery &>();
+    if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery)
+    {
+        // It's an subquery. Bail out.
+        return false;
+    }
+
+    if (pos->type != TokenType::ClosingRoundBracket)
+        return false;
+    ++pos;
+    auto function_node = std::make_shared<ASTFunction>();
+    tryGetIdentifierNameInto(identifier, function_node->name);
+    auto expr_list_with_single_query = std::make_shared<ASTExpressionList>();
+    expr_list_with_single_query->children.push_back(query);
+    function_node->name = "view";
+    function_node->arguments = expr_list_with_single_query;
+    function_node->children.push_back(function_node->arguments);
+    node = function_node;
+    return true;
+}
+
 bool ParserWindowReference::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ASTFunction * function = dynamic_cast<ASTFunction *>(node.get());
diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h
index ba18fc2cddd..b6194f981fe 100644
--- a/src/Parsers/ExpressionElementParsers.h
+++ b/src/Parsers/ExpressionElementParsers.h
@@ -149,11 +149,25 @@ protected:
 class ParserFunction : public IParserBase
 {
 public:
-    ParserFunction(bool allow_function_parameters_ = true) : allow_function_parameters(allow_function_parameters_) {}
+    ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false)
+        : allow_function_parameters(allow_function_parameters_), is_table_function(is_table_function_)
+    {
+    }
+
 protected:
     const char * getName() const override { return "function"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
     bool allow_function_parameters;
+    bool is_table_function;
+};
+
+// A special function parser for view table function.
+// It parses an SELECT query as its argument and doesn't support getColumnName().
+class ParserTableFunctionView : public IParserBase
+{
+protected:
+    const char * getName() const override { return "function"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
 };
 
 // Window reference (the thing that goes after OVER) for window function.
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index afe85f069c7..e9ad65af471 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -468,6 +468,14 @@ bool ParserLambdaExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
 }
 
 
+bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    if (ParserTableFunctionView().parse(pos, node, expected))
+        return true;
+    return elem_parser.parse(pos, node, expected);
+}
+
+
 bool ParserPrefixUnaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     /// try to find any of the valid operators
@@ -570,9 +578,10 @@ bool ParserTupleElementExpression::parseImpl(Pos & pos, ASTPtr & node, Expected
 }
 
 
-ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword)
-    : impl(std::make_unique<ParserWithOptionalAlias>(std::make_unique<ParserExpression>(),
-                                                     allow_alias_without_as_keyword))
+ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function)
+    : impl(std::make_unique<ParserWithOptionalAlias>(
+        is_table_function ? ParserPtr(std::make_unique<ParserTableFunctionExpression>()) : ParserPtr(std::make_unique<ParserExpression>()),
+        allow_alias_without_as_keyword))
 {
 }
 
@@ -580,7 +589,7 @@ ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_
 bool ParserExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     return ParserList(
-        std::make_unique<ParserExpressionWithOptionalAlias>(allow_alias_without_as_keyword),
+        std::make_unique<ParserExpressionWithOptionalAlias>(allow_alias_without_as_keyword, is_table_function),
         std::make_unique<ParserToken>(TokenType::Comma))
         .parse(pos, node, expected);
 }
diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h
index 90b27950873..2371e006c09 100644
--- a/src/Parsers/ExpressionListParsers.h
+++ b/src/Parsers/ExpressionListParsers.h
@@ -436,13 +436,26 @@ protected:
 };
 
 
+// It's used to parse expressions in table function.
+class ParserTableFunctionExpression : public IParserBase
+{
+private:
+    ParserLambdaExpression elem_parser;
+
+protected:
+    const char * getName() const override { return "table function expression"; }
+
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+
 using ParserExpression = ParserLambdaExpression;
 
 
 class ParserExpressionWithOptionalAlias : public IParserBase
 {
 public:
-    ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword);
+    explicit ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function = false);
 protected:
     ParserPtr impl;
 
@@ -459,11 +472,12 @@ protected:
 class ParserExpressionList : public IParserBase
 {
 public:
-    ParserExpressionList(bool allow_alias_without_as_keyword_)
-        : allow_alias_without_as_keyword(allow_alias_without_as_keyword_) {}
+    explicit ParserExpressionList(bool allow_alias_without_as_keyword_, bool is_table_function_ = false)
+        : allow_alias_without_as_keyword(allow_alias_without_as_keyword_), is_table_function(is_table_function_) {}
 
 protected:
     bool allow_alias_without_as_keyword;
+    bool is_table_function; // This expression list is used by a table function
 
     const char * getName() const override { return "list of expressions"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
@@ -473,7 +487,7 @@ protected:
 class ParserNotEmptyExpressionList : public IParserBase
 {
 public:
-    ParserNotEmptyExpressionList(bool allow_alias_without_as_keyword)
+    explicit ParserNotEmptyExpressionList(bool allow_alias_without_as_keyword)
         : nested_parser(allow_alias_without_as_keyword) {}
 private:
     ParserExpressionList nested_parser;
diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp
index 1264acefe64..2e20279dbe1 100644
--- a/src/Parsers/ParserTablesInSelectQuery.cpp
+++ b/src/Parsers/ParserTablesInSelectQuery.cpp
@@ -22,7 +22,7 @@ bool ParserTableExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     auto res = std::make_shared<ASTTableExpression>();
 
     if (!ParserWithOptionalAlias(std::make_unique<ParserSubquery>(), true).parse(pos, res->subquery, expected)
-        && !ParserWithOptionalAlias(std::make_unique<ParserFunction>(), true).parse(pos, res->table_function, expected)
+        && !ParserWithOptionalAlias(std::make_unique<ParserFunction>(true, true), true).parse(pos, res->table_function, expected)
         && !ParserWithOptionalAlias(std::make_unique<ParserCompoundIdentifier>(false, true), true).parse(pos, res->database_and_table_name, expected))
         return false;
 
diff --git a/tests/queries/0_stateless/01715_table_function_view_fix.sql b/tests/queries/0_stateless/01715_table_function_view_fix.sql
index 21da116f6ba..de5150b7b70 100644
--- a/tests/queries/0_stateless/01715_table_function_view_fix.sql
+++ b/tests/queries/0_stateless/01715_table_function_view_fix.sql
@@ -1 +1 @@
-SELECT view(SELECT 1); -- { serverError 183 }
+SELECT view(SELECT 1); -- { clientError 62 }

From 68f23b7087a87dd1960dcee06e66835d9d237a52 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 13 Feb 2021 13:59:09 +0300
Subject: [PATCH 1050/1238] Improve logging during MergeTree reading

- Remove "Not using primary index on part {}" message (too noisy)
- Add number of total marks before filtering by primary key into the
  common message
- Make "Index {} has dropped {} / {} granules." not per-part, but
  per-query
---
 .../MergeTree/MergeTreeDataSelectExecutor.cpp | 61 +++++++++++++++----
 .../MergeTree/MergeTreeDataSelectExecutor.h   |  2 +
 2 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index d41faa1ed46..d23413f4a84 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -175,6 +175,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
     Names virt_column_names;
     Names real_column_names;
 
+    size_t total_parts = parts.size();
     bool part_column_queried = false;
     bool part_uuid_column_queried = false;
 
@@ -550,7 +551,21 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
     if (select.prewhere())
         prewhere_column = select.prewhere()->getColumnName();
 
-    std::vector<std::pair<MergeTreeIndexPtr, MergeTreeIndexConditionPtr>> useful_indices;
+    struct DataSkippingIndexAndCondition
+    {
+        MergeTreeIndexPtr index;
+        MergeTreeIndexConditionPtr condition;
+        std::atomic<size_t> total_granules;
+        std::atomic<size_t> granules_dropped;
+
+        DataSkippingIndexAndCondition(MergeTreeIndexPtr index_, MergeTreeIndexConditionPtr condition_)
+            : index(index_)
+            , condition(condition_)
+            , total_granules(0)
+            , granules_dropped(0)
+        {}
+    };
+    std::list<DataSkippingIndexAndCondition> useful_indices;
 
     for (const auto & index : metadata_snapshot->getSecondaryIndices())
     {
@@ -579,7 +594,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
 
         std::unordered_set<std::string> useful_indices_names;
         for (const auto & useful_index : useful_indices)
-            useful_indices_names.insert(useful_index.first->index.name);
+            useful_indices_names.insert(useful_index.index->index.name);
 
         for (const auto & index_name : forced_indices)
         {
@@ -595,6 +610,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
     RangesInDataParts parts_with_ranges(parts.size());
     size_t sum_marks = 0;
     std::atomic<size_t> sum_marks_pk = 0;
+    std::atomic<size_t> total_marks_pk = 0;
+
     size_t sum_ranges = 0;
 
     /// Let's find what range to read from each part.
@@ -615,6 +632,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
 
             RangesInDataPart ranges(part, part_index);
 
+            total_marks_pk.fetch_add(part->index_granularity.getMarksCount(), std::memory_order_relaxed);
+
             if (metadata_snapshot->hasPrimaryKey())
                 ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings, log);
             else
@@ -630,9 +649,20 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
 
             sum_marks_pk.fetch_add(ranges.getMarksCount(), std::memory_order_relaxed);
 
-            for (const auto & index_and_condition : useful_indices)
+            for (auto & index_and_condition : useful_indices)
+            {
+                size_t total_granules = 0;
+                size_t granules_dropped = 0;
                 ranges.ranges = filterMarksUsingIndex(
-                        index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings, reader_settings, log);
+                    index_and_condition.index, index_and_condition.condition,
+                    part, ranges.ranges,
+                    settings, reader_settings,
+                    total_granules, granules_dropped,
+                    log);
+
+                index_and_condition.total_granules.fetch_add(total_granules, std::memory_order_relaxed);
+                index_and_condition.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed);
+            }
 
             if (!ranges.ranges.empty())
             {
@@ -697,7 +727,19 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
         parts_with_ranges.resize(next_part);
     }
 
-    LOG_DEBUG(log, "Selected {} parts by partition key, {} parts by primary key, {} marks by primary key, {} marks to read from {} ranges", parts.size(), parts_with_ranges.size(), sum_marks_pk.load(std::memory_order_relaxed), sum_marks, sum_ranges);
+    for (const auto & index_and_condition : useful_indices)
+    {
+        const auto & index_name = index_and_condition.index->index.name;
+        LOG_DEBUG(log, "Index {} has dropped {}/{} granules.",
+            backQuote(index_name),
+            index_and_condition.granules_dropped, index_and_condition.total_granules);
+    }
+
+    LOG_DEBUG(log, "Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges",
+        parts.size(), total_parts, parts_with_ranges.size(),
+        sum_marks_pk.load(std::memory_order_relaxed),
+        total_marks_pk.load(std::memory_order_relaxed),
+        sum_marks, sum_ranges);
 
     if (parts_with_ranges.empty())
         return std::make_unique<QueryPlan>();
@@ -1595,8 +1637,6 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
     /// If index is not used.
     if (key_condition.alwaysUnknownOrTrue())
     {
-        LOG_TRACE(log, "Not using primary index on part {}", part->name);
-
         if (has_final_mark)
             res.push_back(MarkRange(0, marks_count - 1));
         else
@@ -1769,6 +1809,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
     const MarkRanges & ranges,
     const Settings & settings,
     const MergeTreeReaderSettings & reader_settings,
+    size_t & total_granules,
+    size_t & granules_dropped,
     Poco::Logger * log)
 {
     if (!part->volume->getDisk()->exists(part->getFullRelativePath() + index_helper->getFileName() + ".idx"))
@@ -1785,9 +1827,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
         part->index_granularity_info.fixed_index_granularity,
         part->index_granularity_info.index_granularity_bytes);
 
-    size_t granules_dropped = 0;
-    size_t total_granules = 0;
-
     size_t marks_count = part->getMarksCount();
     size_t final_mark = part->index_granularity.hasFinalMark();
     size_t index_marks_count = (marks_count - final_mark + index_granularity - 1) / index_granularity;
@@ -1839,8 +1878,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
         last_index_mark = index_range.end - 1;
     }
 
-    LOG_DEBUG(log, "Index {} has dropped {} / {} granules.", backQuote(index_helper->index.name), granules_dropped, total_granules);
-
     return res;
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
index 04a3be3d3f0..7692424dfb5 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
@@ -113,6 +113,8 @@ private:
         const MarkRanges & ranges,
         const Settings & settings,
         const MergeTreeReaderSettings & reader_settings,
+        size_t & total_granules,
+        size_t & granules_dropped,
         Poco::Logger * log);
 
     /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`,

From ff647ad176ad32718fcfc87677effa0ab37e3f10 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 13 Feb 2021 16:32:50 +0300
Subject: [PATCH 1051/1238] Update 01508_partition_pruning expectations

---
 .../01508_partition_pruning.reference         | 120 +++++++++---------
 1 file changed, 60 insertions(+), 60 deletions(-)

diff --git a/tests/queries/0_stateless/01508_partition_pruning.reference b/tests/queries/0_stateless/01508_partition_pruning.reference
index 0cc40d23b41..70f529c6058 100644
--- a/tests/queries/0_stateless/01508_partition_pruning.reference
+++ b/tests/queries/0_stateless/01508_partition_pruning.reference
@@ -1,244 +1,244 @@
 --------- tMM ----------------------------
 select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-15');
 0	0
-Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges
+Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges
 
 select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-01');
 2	2880
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-10-15');
 1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() from tMM where toDate(d)='2020-09-15';
 0	0
-Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges
+Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMM(d)=202009;
 2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816;
 2	2880
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015;
 1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15';
 1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00';
 3	15000
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
 
 select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00');
 6	30000
-Selected 6 parts by partition key, 6 parts by primary key, 6 marks by primary key, 6 marks to read from 6 ranges
+Selected 6/6 parts by partition key, 6 parts by primary key, 6/12 marks by primary key, 6 marks to read from 6 ranges
 
 select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00');
 0	0
-Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges
+Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges
 
 select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00';
 2	6440
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00';
 2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00';
 2	2880
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-10-01 00:00:00';
 1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-15 00:00:00' and d < '2020-10-16 00:00:00';
 2	6440
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010;
 4	20000
-Selected 4 parts by partition key, 4 parts by primary key, 4 marks by primary key, 4 marks to read from 4 ranges
+Selected 4/6 parts by partition key, 4 parts by primary key, 4/8 marks by primary key, 4 marks to read from 4 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202009;
 2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010 and toStartOfDay(d) = '2020-10-01 00:00:00';
 1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMM(d) >= 202009 and toStartOfDay(d) < '2020-10-02 00:00:00';
 3	11440
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMM(d) > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00';
 1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00';
 3	11440
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010 and toStartOfDay(d) < '2020-10-02 00:00:00';
 1	1440
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010;
 2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010;
 3	9999
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
 
 select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-15';
 2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01';
 4	20000
-Selected 4 parts by partition key, 4 parts by primary key, 4 marks by primary key, 4 marks to read from 4 ranges
+Selected 4/6 parts by partition key, 4 parts by primary key, 4/8 marks by primary key, 4 marks to read from 4 ranges
 
 select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01' and toStartOfMonth(d) < '2020-10-01';
 2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010;
 2	9999
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/3 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010;
 1	10000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/3 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010;
 2	20000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/3 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 --------- tDD ----------------------------
 select uniqExact(_part), count() from tDD where toDate(d)=toDate('2020-09-24');
 1	10000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/4 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() FROM tDD WHERE toDate(d) = toDate('2020-09-24');
 1	10000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/4 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() FROM tDD WHERE toDate(d) = '2020-09-24';
 1	10000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/4 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() FROM tDD WHERE toDate(d) >= '2020-09-23' and toDate(d) <= '2020-09-26';
 3	40000
-Selected 3 parts by partition key, 3 parts by primary key, 4 marks by primary key, 4 marks to read from 3 ranges
+Selected 3/4 parts by partition key, 3 parts by primary key, 4/7 marks by primary key, 4 marks to read from 3 ranges
 
 select uniqExact(_part), count() FROM tDD WHERE toYYYYMMDD(d) >= 20200923 and toDate(d) <= '2020-09-26';
 3	40000
-Selected 3 parts by partition key, 3 parts by primary key, 4 marks by primary key, 4 marks to read from 3 ranges
+Selected 3/4 parts by partition key, 3 parts by primary key, 4/7 marks by primary key, 4 marks to read from 3 ranges
 
 --------- sDD ----------------------------
 select uniqExact(_part), count() from sDD;
 6	30000
-Selected 6 parts by partition key, 6 parts by primary key, 6 marks by primary key, 6 marks to read from 6 ranges
+Selected 6/6 parts by partition key, 6 parts by primary key, 6/12 marks by primary key, 6 marks to read from 6 ranges
 
 select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1)+1 = 202010;
 3	9999
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
 
 select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202010;
 2	9999
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202110;
 0	0
-Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges
+Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges
 
 select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toStartOfDay(toDateTime(intDiv(d,1000),'UTC')) < toDateTime('2020-10-02 00:00:00','UTC');
 3	11440
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
 
 select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toDateTime(intDiv(d,1000),'UTC') < toDateTime('2020-10-01 00:00:00','UTC');
 2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from sDD where d >= 1598918400000;
 4	20000
-Selected 4 parts by partition key, 4 parts by primary key, 4 marks by primary key, 4 marks to read from 4 ranges
+Selected 4/6 parts by partition key, 4 parts by primary key, 4/8 marks by primary key, 4 marks to read from 4 ranges
 
 select uniqExact(_part), count() from sDD where d >= 1598918400000 and toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) < 202010;
 3	10001
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
 
 --------- xMM ----------------------------
 select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00';
 2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00';
 3	10001
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
 
 select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00';
 2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a=1;
 1	1
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3;
 2	5001
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3;
 1	5000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-11-01 00:00:00' and a = 1;
 2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from xMM where a = 1;
 3	15000
-Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges
+Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges
 
 select uniqExact(_part), count() from xMM where a = 66;
 0	0
-Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges
+Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges
 
 select uniqExact(_part), count() from xMM where a <> 66;
 6	30000
-Selected 6 parts by partition key, 6 parts by primary key, 6 marks by primary key, 6 marks to read from 6 ranges
+Selected 6/6 parts by partition key, 6 parts by primary key, 6/12 marks by primary key, 6 marks to read from 6 ranges
 
 select uniqExact(_part), count() from xMM where a = 2;
 2	10000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from xMM where a = 1;
 2	15000
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/5 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00';
 1	10000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/5 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 
 select uniqExact(_part), count() from xMM where a <> 66;
 5	30000
-Selected 5 parts by partition key, 5 parts by primary key, 5 marks by primary key, 5 marks to read from 5 ranges
+Selected 5/5 parts by partition key, 5 parts by primary key, 5/10 marks by primary key, 5 marks to read from 5 ranges
 
 select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3;
 2	5001
-Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges
+Selected 2/5 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges
 
 select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3;
 1	5000
-Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges
+Selected 1/5 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges
 

From 790c210e51d66f1d06077d1921b32045da0fa4af Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 13 Feb 2021 20:31:37 +0300
Subject: [PATCH 1052/1238] Mark 01508_partition_pruning as long

https://clickhouse-test-reports.s3.yandex.net/20466/ff647ad176ad32718fcfc87677effa0ab37e3f10/functional_stateless_tests_flaky_check_(address).html#fail1
---
 ....queries => 01508_partition_pruning_long.queries} |  0
 ...erence => 01508_partition_pruning_long.reference} |  0
 ...on_pruning.sh => 01508_partition_pruning_long.sh} | 12 +++++-------
 tests/queries/skip_list.json                         |  4 ++--
 4 files changed, 7 insertions(+), 9 deletions(-)
 rename tests/queries/0_stateless/{01508_partition_pruning.queries => 01508_partition_pruning_long.queries} (100%)
 rename tests/queries/0_stateless/{01508_partition_pruning.reference => 01508_partition_pruning_long.reference} (100%)
 rename tests/queries/0_stateless/{01508_partition_pruning.sh => 01508_partition_pruning_long.sh} (88%)

diff --git a/tests/queries/0_stateless/01508_partition_pruning.queries b/tests/queries/0_stateless/01508_partition_pruning_long.queries
similarity index 100%
rename from tests/queries/0_stateless/01508_partition_pruning.queries
rename to tests/queries/0_stateless/01508_partition_pruning_long.queries
diff --git a/tests/queries/0_stateless/01508_partition_pruning.reference b/tests/queries/0_stateless/01508_partition_pruning_long.reference
similarity index 100%
rename from tests/queries/0_stateless/01508_partition_pruning.reference
rename to tests/queries/0_stateless/01508_partition_pruning_long.reference
diff --git a/tests/queries/0_stateless/01508_partition_pruning.sh b/tests/queries/0_stateless/01508_partition_pruning_long.sh
similarity index 88%
rename from tests/queries/0_stateless/01508_partition_pruning.sh
rename to tests/queries/0_stateless/01508_partition_pruning_long.sh
index b5ec6388d5c..1b3c524ac77 100755
--- a/tests/queries/0_stateless/01508_partition_pruning.sh
+++ b/tests/queries/0_stateless/01508_partition_pruning_long.sh
@@ -4,8 +4,8 @@
 # Description of test result:
 #   Test the correctness of the partition
 #   pruning
-#   
-#   Script executes queries from a file 01508_partition_pruning.queries  (1 line = 1 query) 
+#
+#   Script executes queries from a file 01508_partition_pruning_long.queries  (1 line = 1 query)
 #   Queries are started with 'select' (but NOT with 'SELECT') are executed with log_level=debug
 #-------------------------------------------------------------------------------------------
 
@@ -18,7 +18,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 #export CURDIR=.
 
 
-queries="${CURDIR}/01508_partition_pruning.queries"
+queries="${CURDIR}/01508_partition_pruning_long.queries"
 while IFS= read -r sql
 do
   [ -z "$sql" ] && continue
@@ -30,9 +30,7 @@ do
     ${CLICKHOUSE_CLIENT} --query "$sql" 2>&1 | grep -oh "Selected .* parts by partition key, *. parts by primary key, .* marks by primary key, .* marks to read from .* ranges.*$"
     CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/--send_logs_level=debug/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/g')
     echo ""
-  else  
+  else
     ${CLICKHOUSE_CLIENT} --query "$sql"
-  fi  
+  fi
 done < "$queries"
-
-
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 07250cd9c90..e4e7504ba41 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -103,7 +103,7 @@
         "00738_lock_for_inner_table"
     ],
     "polymorphic-parts": [
-        "01508_partition_pruning", /// bug, shoud be fixed
+        "01508_partition_pruning_long", /// bug, shoud be fixed
         "01482_move_to_prewhere_and_cast" /// bug, shoud be fixed
     ],
     "antlr": [
@@ -267,7 +267,7 @@
         "01501_clickhouse_client_INSERT_exception",
         "01504_compression_multiple_streams",
         "01508_explain_header",
-        "01508_partition_pruning",
+        "01508_partition_pruning_long",
         "01509_check_parallel_quorum_inserts",
         "01509_parallel_quorum_and_merge",
         "01515_mv_and_array_join_optimisation_bag",

From 179a0f9d8bfd540e730abacbe9c11d945ac3b405 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 13 Feb 2021 00:26:25 +0300
Subject: [PATCH 1053/1238] Performance improvement by Nikolai Kochetov

---
 src/Storages/StorageMemory.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h
index dc695427156..79ced856231 100644
--- a/src/Storages/StorageMemory.h
+++ b/src/Storages/StorageMemory.h
@@ -45,6 +45,8 @@ public:
     /// Smaller blocks (e.g. 64K rows) are better for CPU cache.
     bool prefersLargeBlocks() const override { return false; }
 
+    bool hasEvenlyDistributedRead() const override { return true; }
+
     BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, const Context & context) override;
 
     void drop() override;

From 652ede5af91e3a8ab7e3afe4dd50f3c45c00d14f Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Sun, 14 Feb 2021 00:06:40 +0300
Subject: [PATCH 1054/1238] add test

---
 tests/queries/0_stateless/01177_group_array_moving.reference | 2 ++
 tests/queries/0_stateless/01177_group_array_moving.sql       | 2 ++
 2 files changed, 4 insertions(+)
 create mode 100644 tests/queries/0_stateless/01177_group_array_moving.reference
 create mode 100644 tests/queries/0_stateless/01177_group_array_moving.sql

diff --git a/tests/queries/0_stateless/01177_group_array_moving.reference b/tests/queries/0_stateless/01177_group_array_moving.reference
new file mode 100644
index 00000000000..d74c84bb94f
--- /dev/null
+++ b/tests/queries/0_stateless/01177_group_array_moving.reference
@@ -0,0 +1,2 @@
+[-9223372036854775808,0,-9223372036854775808,0,-9223372036854775808,0]	[18446744073709551615,18446744073709551614,18446744073709551613,18446744073709551612,18446744073709551611,18446744073709551610]	[0,9223372036854775807,9223372036854775805,9223372036854775805,18446744073709551612,18446744073709551610]
+[-35888607147294850,-71777214294589700,-107665821441884540,-143554428589179400,-179443035736474240,-215331642883769100]	[17592202821648,35184405643296,52776608464944,70368811286592,87961014108240,105553216929888]	[0,1,3,3,4,6]
diff --git a/tests/queries/0_stateless/01177_group_array_moving.sql b/tests/queries/0_stateless/01177_group_array_moving.sql
new file mode 100644
index 00000000000..b1969e204fc
--- /dev/null
+++ b/tests/queries/0_stateless/01177_group_array_moving.sql
@@ -0,0 +1,2 @@
+SELECT groupArrayMovingSum(257)(-9223372036854775808), groupArrayMovingSum(1048575)(18446744073709551615), groupArrayMovingSum(9223372036854775807)(number * 9223372036854775807) FROM remote('127.0.0.{1..2}', numbers(3));
+SELECT groupArrayMovingAvg(257)(-9223372036854775808), groupArrayMovingAvg(1048575)(18446744073709551615), groupArrayMovingAvg(9223372036854775807)(number * 9223372036854775807) FROM remote('127.0.0.{1..2}', numbers(3));

From b0f2a84306f34eb3d69fdbe40f841fc91bff8149 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Sun, 14 Feb 2021 01:12:10 +0300
Subject: [PATCH 1055/1238] fix bad test

---
 tests/queries/0_stateless/01669_columns_declaration_serde.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01669_columns_declaration_serde.sql b/tests/queries/0_stateless/01669_columns_declaration_serde.sql
index 8e3354d63cd..a6bf1184e9f 100644
--- a/tests/queries/0_stateless/01669_columns_declaration_serde.sql
+++ b/tests/queries/0_stateless/01669_columns_declaration_serde.sql
@@ -22,12 +22,12 @@ DROP TABLE IF EXISTS test_r1;
 DROP TABLE IF EXISTS test_r2;
 
 CREATE TABLE test_r1 (x UInt64, "\\" String DEFAULT '\r\n\t\\' || '
-') ENGINE = ReplicatedMergeTree('/clickhouse/test', 'r1') ORDER BY "\\";
+') ENGINE = ReplicatedMergeTree('/clickhouse/test_01669', 'r1') ORDER BY "\\";
 
 INSERT INTO test_r1 ("\\") VALUES ('\\');
 
 CREATE TABLE test_r2 (x UInt64, "\\" String DEFAULT '\r\n\t\\' || '
-') ENGINE = ReplicatedMergeTree('/clickhouse/test', 'r2') ORDER BY "\\";
+') ENGINE = ReplicatedMergeTree('/clickhouse/test_01669', 'r2') ORDER BY "\\";
 
 SYSTEM SYNC REPLICA test_r2;
 

From 5bdc57004682a5e0236ec630546d20ad752c2fde Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 14 Feb 2021 01:56:04 +0300
Subject: [PATCH 1056/1238] Improve performance of GROUP BY multiple fixed size
 keys

---
 src/Common/ColumnsHashing.h               | 71 ++++++++++++++++++++++-
 src/Interpreters/AggregationCommon.h      | 32 ++++++++++
 src/Interpreters/Aggregator.h             |  8 ++-
 tests/performance/group_by_fixed_keys.xml |  7 +++
 4 files changed, 116 insertions(+), 2 deletions(-)
 create mode 100644 tests/performance/group_by_fixed_keys.xml

diff --git a/src/Common/ColumnsHashing.h b/src/Common/ColumnsHashing.h
index b1d25c98955..1ac753fbae5 100644
--- a/src/Common/ColumnsHashing.h
+++ b/src/Common/ColumnsHashing.h
@@ -455,7 +455,14 @@ template <>
 struct LowCardinalityKeys<false> {};
 
 /// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits.
-template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true, bool need_offset = false>
+template <
+    typename Value,
+    typename Key,
+    typename Mapped,
+    bool has_nullable_keys_ = false,
+    bool has_low_cardinality_ = false,
+    bool use_cache = true,
+    bool need_offset = false>
 struct HashMethodKeysFixed
     : private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>
     , public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
@@ -471,6 +478,12 @@ struct HashMethodKeysFixed
     Sizes key_sizes;
     size_t keys_size;
 
+    /// SSSE3 shuffle method can be used. Shuffle masks will be calculated and stored here.
+#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
+    std::unique_ptr<uint8_t[]> masks;
+    std::unique_ptr<const char*[]> columns_data;
+#endif
+
     HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const HashMethodContextPtr &)
         : Base(key_columns), key_sizes(std::move(key_sizes_)), keys_size(key_columns.size())
     {
@@ -491,6 +504,58 @@ struct HashMethodKeysFixed
                     low_cardinality_keys.nested_columns[i] = key_columns[i];
             }
         }
+
+#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
+        if constexpr (!has_low_cardinality && !has_nullable_keys && sizeof(Key) <= 16)
+        {
+            /** The task is to "pack" multiple fixed-size fields into single larger Key.
+              * Example: pack UInt8, UInt32, UInt16, UInt64 into UInt128 key:
+              * [- ---- -- -------- -] - the resulting uint128 key
+              *  ^  ^   ^   ^       ^
+              *  u8 u32 u16 u64    zero
+              *
+              * We can do it with the help of SSSE3 shuffle instruction.
+              *
+              * There will be a mask for every GROUP BY element (keys_size masks in total).
+              * Every mask has 16 bytes but only sizeof(Key) bytes are used (other we don't care).
+              *
+              * Every byte in the mask has the following meaning:
+              * - if it is 0..15, take the element at this index from source register and place here in the result;
+              * - if it is 0xFF - set the elemend in the result to zero.
+              *
+              * Example:
+              * We want to copy UInt32 to offset 1 in the destination and set other bytes in the destination as zero.
+              * The corresponding mask will be: FF, 0, 1, 2, 3, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF
+              *
+              * The max size of destination is 16 bytes, because we cannot process more with SSSE3.
+              *
+              * The method is disabled under MSan, because it's allowed
+              * to load into SSE register and process up to 15 bytes of uninitialized memory in columns padding.
+              * We don't use this uninitialized memory but MSan cannot look "into" the shuffle instruction.
+              *
+              * 16-bytes masks can be placed overlapping, only first sizeof(Key) bytes are relevant in each mask.
+              * We initialize them to 0xFF and then set the needed elements.
+              */
+            size_t total_masks_size = sizeof(Key) * keys_size + (16 - sizeof(Key));
+            masks.reset(new uint8_t[total_masks_size]);
+            memset(masks.get(), 0xFF, total_masks_size);
+
+            size_t offset = 0;
+            for (size_t i = 0; i < keys_size; ++i)
+            {
+                for (size_t j = 0; j < key_sizes[i]; ++j)
+                {
+                    masks[i * sizeof(Key) + offset] = j;
+                    ++offset;
+                }
+            }
+
+            columns_data.reset(new const char*[keys_size]);
+
+            for (size_t i = 0; i < keys_size; ++i)
+                columns_data[i] = Base::getActualColumns()[i]->getRawData().data;
+        }
+#endif
     }
 
     ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const
@@ -506,6 +571,10 @@ struct HashMethodKeysFixed
                 return packFixed<Key, true>(row, keys_size, low_cardinality_keys.nested_columns, key_sizes,
                                             &low_cardinality_keys.positions, &low_cardinality_keys.position_sizes);
 
+#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
+            if constexpr (!has_low_cardinality && !has_nullable_keys && sizeof(Key) <= 16)
+                return packFixedShuffle<Key>(columns_data.get(), keys_size, key_sizes.data(), row, masks.get());
+#endif
             return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes);
         }
     }
diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h
index f70ab282e6f..ca9b00184fb 100644
--- a/src/Interpreters/AggregationCommon.h
+++ b/src/Interpreters/AggregationCommon.h
@@ -15,6 +15,10 @@
 #include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnLowCardinality.h>
 
+#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
+#include <tmmintrin.h>
+#endif
+
 
 template <>
 struct DefaultHash<StringRef> : public StringRefHash {};
@@ -255,4 +259,32 @@ static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous(
 }
 
 
+/** Pack elements with shuffle instruction.
+  * See the explanation in ColumnsHashing.h
+  */
+#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
+template <typename T>
+static T ALWAYS_INLINE packFixedShuffle(
+    const char * __restrict * __restrict srcs,
+    size_t num_srcs,
+    const size_t * __restrict elem_sizes,
+    size_t idx,
+    const uint8_t * __restrict masks)
+{
+    __m128i res{};
+
+    for (size_t i = 0; i < num_srcs; ++i)
+    {
+        res = _mm_xor_si128(res,
+            _mm_shuffle_epi8(
+                _mm_loadu_si128(reinterpret_cast<const __m128i *>(srcs[i] + elem_sizes[i] * idx)),
+                _mm_loadu_si128(reinterpret_cast<const __m128i *>(&masks[i * sizeof(T)]))));
+    }
+
+    T out;
+    __builtin_memcpy(&out, &res, sizeof(T));
+    return out;
+}
+#endif
+
 }
diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h
index 2a1224b0b48..c5bcc1eb27f 100644
--- a/src/Interpreters/Aggregator.h
+++ b/src/Interpreters/Aggregator.h
@@ -365,7 +365,13 @@ struct AggregationMethodKeysFixed
     template <typename Other>
     AggregationMethodKeysFixed(const Other & other) : data(other.data) {}
 
-    using State = ColumnsHashing::HashMethodKeysFixed<typename Data::value_type, Key, Mapped, has_nullable_keys, has_low_cardinality, use_cache>;
+    using State = ColumnsHashing::HashMethodKeysFixed<
+        typename Data::value_type,
+        Key,
+        Mapped,
+        has_nullable_keys,
+        has_low_cardinality,
+        use_cache>;
 
     static const bool low_cardinality_optimization = false;
 
diff --git a/tests/performance/group_by_fixed_keys.xml b/tests/performance/group_by_fixed_keys.xml
new file mode 100644
index 00000000000..0be29ff11ac
--- /dev/null
+++ b/tests/performance/group_by_fixed_keys.xml
@@ -0,0 +1,7 @@
+<test>
+    <query>WITH toUInt8(number) AS k, toUInt64(k) AS k1, k AS k2 SELECT k1, k2, count() FROM numbers(100000000) GROUP BY k1, k2</query>
+    <query>WITH toUInt8(number) AS k, toUInt16(k) AS k1, toUInt32(k) AS k2, k AS k3 SELECT k1, k2, k3, count() FROM numbers(100000000) GROUP BY k1, k2, k3</query>
+    <query>WITH toUInt8(number) AS k, k AS k1, k + 1 AS k2 SELECT k1, k2, count() FROM numbers(100000000) GROUP BY k1, k2</query>
+    <query>WITH toUInt8(number) AS k, k AS k1, k + 1 AS k2, k + 2 AS k3, k + 3 AS k4 SELECT k1, k2, k3, k4, count() FROM numbers(100000000) GROUP BY k1, k2, k3, k4</query>
+    <query>WITH toUInt8(number) AS k, toUInt64(k) AS k1, k1 + 1 AS k2 SELECT k1, k2, count() FROM numbers(100000000) GROUP BY k1, k2</query>
+</test>

From 9b319af9651e130650b6c3438900d58eab98a63c Mon Sep 17 00:00:00 2001
From: Ramazan Polat <ramazanpolat@gmail.com>
Date: Sun, 14 Feb 2021 02:09:34 +0300
Subject: [PATCH 1057/1238] Added the RENAME COLUMN statement

---
 .../en/sql-reference/statements/alter/column.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md
index 0ea4d4b3dc5..0fa2c492bee 100644
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@@ -20,6 +20,7 @@ The following actions are supported:
 
 -   [ADD COLUMN](#alter_add-column) — Adds a new column to the table.
 -   [DROP COLUMN](#alter_drop-column) — Deletes the column.
+-   [RENAME COLUMN](#alter_rename-column) — Renames the column.
 -   [CLEAR COLUMN](#alter_clear-column) — Resets column values.
 -   [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
 -   [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL.
@@ -78,6 +79,22 @@ Example:
 ALTER TABLE visits DROP COLUMN browser
 ```
 
+## RENAME COLUMN {#alter_rename-column}
+
+``` sql
+RENAME COLUMN [IF EXISTS] name to new_name
+```
+
+Renames the column `name` to `new_name`. If the `IF EXISTS` clause is specified, the query won’t return an error if the column doesn’t exist. Since renaming does not involve the underlying data, the query is completed almost instantly.
+
+**NOTE**: Columns specified in the key expression of the table (either with `ORDER BY` or `PRIMARY KEY`) cannot be renamed. Trying to change these columns will produce `SQL Error [524]`. 
+
+Example:
+
+``` sql
+ALTER TABLE visits RENAME COLUMN webBrowser TO browser
+```
+
 ## CLEAR COLUMN {#alter_clear-column}
 
 ``` sql

From 320ce101e11ae24d28432757af78d4f59017d1c2 Mon Sep 17 00:00:00 2001
From: Habibullah Oladepo <habibullaho@yahoo.com>
Date: Sun, 14 Feb 2021 00:26:10 +0100
Subject: [PATCH 1058/1238] Minor link fix in delete-old-data.md

Minor link fix in delete-old-data.md
---
 docs/en/faq/operations/delete-old-data.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/faq/operations/delete-old-data.md b/docs/en/faq/operations/delete-old-data.md
index 5addc455602..fdf1f1f290e 100644
--- a/docs/en/faq/operations/delete-old-data.md
+++ b/docs/en/faq/operations/delete-old-data.md
@@ -39,4 +39,4 @@ More details on [manipulating partitions](../../sql-reference/statements/alter/p
 
 It’s rather radical to drop all data from a table, but in some cases it might be exactly what you need.
 
-More details on [table truncation](../../sql-reference/statements/alter/partition.md#alter_drop-partition).
+More details on [table truncation](../../sql-reference/statements/truncate.md).

From b13d1f31422fe52f944ca95fe11276791434815d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 14 Feb 2021 04:34:42 +0300
Subject: [PATCH 1059/1238] Fix integration test

---
 tests/integration/test_settings_profile/test.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py
index 3ceef9f25cf..1945875bf53 100644
--- a/tests/integration/test_settings_profile/test.py
+++ b/tests/integration/test_settings_profile/test.py
@@ -46,7 +46,7 @@ def reset_after_test():
 
 
 def test_smoke():
-    # Set settings and constraints via CREATE SETTINGS PROFILE ... TO user 
+    # Set settings and constraints via CREATE SETTINGS PROFILE ... TO user
     instance.query(
         "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin")
     assert instance.query(
@@ -194,13 +194,13 @@ def test_show_profiles():
 
     assert instance.query("SHOW CREATE PROFILE xyz") == "CREATE SETTINGS PROFILE xyz\n"
     assert instance.query(
-        "SHOW CREATE SETTINGS PROFILE default") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n"
+        "SHOW CREATE SETTINGS PROFILE default") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = \\'random\\'\n"
     assert instance.query(
-        "SHOW CREATE PROFILES") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n" \
+        "SHOW CREATE PROFILES") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = \\'random\\'\n" \
                                    "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n" \
                                    "CREATE SETTINGS PROFILE xyz\n"
 
-    expected_access = "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n" \
+    expected_access = "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = \\'random\\'\n" \
                       "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n" \
                       "CREATE SETTINGS PROFILE xyz\n"
     assert expected_access in instance.query("SHOW ACCESS")
@@ -210,7 +210,7 @@ def test_allow_ddl():
     assert "it's necessary to have grant" in instance.query_and_get_error("CREATE TABLE tbl(a Int32) ENGINE=Log", user="robin")
     assert "it's necessary to have grant" in instance.query_and_get_error("GRANT CREATE ON tbl TO robin", user="robin")
     assert "DDL queries are prohibited" in instance.query_and_get_error("CREATE TABLE tbl(a Int32) ENGINE=Log", settings={"allow_ddl": 0})
-    
+
     instance.query("GRANT CREATE ON tbl TO robin")
     instance.query("CREATE TABLE tbl(a Int32) ENGINE=Log", user="robin")
     instance.query("DROP TABLE tbl")

From 55c17ac93f83746e1ddc92172d45ceeb5973de9f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 14 Feb 2021 06:11:12 +0300
Subject: [PATCH 1060/1238] Fix UBSan report in intDiv

---
 src/Functions/DivisionUtils.h                            | 9 ++++++++-
 .../01717_int_div_float_too_large_ubsan.reference        | 0
 .../0_stateless/01717_int_div_float_too_large_ubsan.sql  | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.reference
 create mode 100644 tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql

diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h
index d0df7e41af1..ff5636bf9fb 100644
--- a/src/Functions/DivisionUtils.h
+++ b/src/Functions/DivisionUtils.h
@@ -100,7 +100,14 @@ struct DivideIntegralImpl
                     throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
                         ErrorCodes::ILLEGAL_DIVISION);
 
-            return static_cast<Result>(checkedDivision(CastA(a), CastB(b)));
+            auto res = checkedDivision(CastA(a), CastB(b));
+
+            if constexpr (std::is_floating_point_v<decltype(res)>)
+                if (isNaN(res) || res > std::numeric_limits<Result>::max() || res < std::numeric_limits<Result>::lowest())
+                    throw Exception("Cannot perform integer division, because it will produce infinite or too large number",
+                        ErrorCodes::ILLEGAL_DIVISION);
+
+            return static_cast<Result>(res);
         }
     }
 
diff --git a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.reference b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql
new file mode 100644
index 00000000000..f3353cd3b8d
--- /dev/null
+++ b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql
@@ -0,0 +1 @@
+SELECT intDiv(9223372036854775807, 0.9998999834060669); -- { serverError 153 }

From ed49367fc750d0d50edaa4dde3cc7cb56598c305 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sun, 14 Feb 2021 14:20:23 +0800
Subject: [PATCH 1061/1238] Fix global-with with subqueries

---
 src/Interpreters/InterpreterSelectWithUnionQuery.cpp      | 8 ++++++--
 src/Interpreters/InterpreterSelectWithUnionQuery.h        | 3 ++-
 src/Interpreters/getTableExpressions.cpp                  | 2 +-
 .../0_stateless/01717_global_with_subquery_fix.reference  | 0
 .../0_stateless/01717_global_with_subquery_fix.sql        | 1 +
 5 files changed, 10 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/01717_global_with_subquery_fix.reference
 create mode 100644 tests/queries/0_stateless/01717_global_with_subquery_fix.sql

diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
index e6610df43ff..59fcff61936 100644
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
@@ -329,7 +329,7 @@ InterpreterSelectWithUnionQuery::buildCurrentChildInterpreter(const ASTPtr & ast
 
 InterpreterSelectWithUnionQuery::~InterpreterSelectWithUnionQuery() = default;
 
-Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, const Context & context_)
+Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, const Context & context_, bool is_subquery)
 {
     auto & cache = context_.getSampleBlockCache();
     /// Using query string because query_ptr changes for every internal SELECT
@@ -339,7 +339,11 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_,
         return cache[key];
     }
 
-    return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock();
+    if (is_subquery)
+        return cache[key]
+            = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock();
+    else
+        return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock();
 }
 
 
diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.h b/src/Interpreters/InterpreterSelectWithUnionQuery.h
index cd089a51970..f4062b2005e 100644
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.h
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.h
@@ -35,7 +35,8 @@ public:
 
     static Block getSampleBlock(
         const ASTPtr & query_ptr_,
-        const Context & context_);
+        const Context & context_,
+        bool is_subquery = false);
 
     virtual void ignoreWithTotals() override;
 
diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp
index 766ce257530..a4e971c302c 100644
--- a/src/Interpreters/getTableExpressions.cpp
+++ b/src/Interpreters/getTableExpressions.cpp
@@ -84,7 +84,7 @@ static NamesAndTypesList getColumnsFromTableExpression(
     if (table_expression.subquery)
     {
         const auto & subquery = table_expression.subquery->children.at(0);
-        names_and_type_list = InterpreterSelectWithUnionQuery::getSampleBlock(subquery, context).getNamesAndTypesList();
+        names_and_type_list = InterpreterSelectWithUnionQuery::getSampleBlock(subquery, context, true).getNamesAndTypesList();
     }
     else if (table_expression.table_function)
     {
diff --git a/tests/queries/0_stateless/01717_global_with_subquery_fix.reference b/tests/queries/0_stateless/01717_global_with_subquery_fix.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01717_global_with_subquery_fix.sql b/tests/queries/0_stateless/01717_global_with_subquery_fix.sql
new file mode 100644
index 00000000000..14c4ac3e4ca
--- /dev/null
+++ b/tests/queries/0_stateless/01717_global_with_subquery_fix.sql
@@ -0,0 +1 @@
+WITH (SELECT count(distinct colU) from tabA) AS withA, (SELECT count(distinct colU) from tabA) AS withB SELECT withA / withB AS ratio FROM (SELECT date AS period, colX FROM (SELECT date, if(colA IN (SELECT colB FROM tabC), 0, colA) AS colX FROM tabB) AS tempB GROUP BY period, colX) AS main; -- {serverError 60}

From 17dce001362e9a178681756ae0498ef36b134008 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Sun, 14 Feb 2021 10:45:52 +0300
Subject: [PATCH 1062/1238] Temporary disable
 00992_system_parts_race_condition_zookeeper

---
 tests/queries/skip_list.json | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index e4e7504ba41..ee25bee6a0a 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -10,6 +10,7 @@
         "00152_insert_different_granularity",
         "00151_replace_partition_with_different_granularity",
         "00157_cache_dictionary",
+        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "01193_metadata_loading",
         "01473_event_time_microseconds",
         "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers
@@ -25,6 +26,7 @@
         "memory_profiler",
         "odbc_roundtrip",
         "01103_check_cpu_instructions_at_startup",
+        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "01473_event_time_microseconds",
         "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers
         "01193_metadata_loading"
@@ -35,6 +37,7 @@
         "memory_profiler",
         "01103_check_cpu_instructions_at_startup",
         "00900_orc_load",
+        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "01473_event_time_microseconds",
         "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers
         "01193_metadata_loading"
@@ -46,6 +49,7 @@
         "01103_check_cpu_instructions_at_startup",
         "01086_odbc_roundtrip", /// can't pass because odbc libraries are not instrumented
         "00877_memory_limit_for_new_delete", /// memory limits don't work correctly under msan because it replaces malloc/free
+        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "01473_event_time_microseconds",
         "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers
         "01193_metadata_loading"
@@ -57,6 +61,7 @@
         "00980_alter_settings_race",
         "00834_kill_mutation_replicated_zookeeper",
         "00834_kill_mutation",
+        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "01200_mutations_memory_consumption",
         "01103_check_cpu_instructions_at_startup",
         "01037_polygon_dicts_",
@@ -82,6 +87,7 @@
         "00505_secure",
         "00505_shard_secure",
         "odbc_roundtrip",
+        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "01103_check_cpu_instructions_at_startup",
         "01114_mysql_database_engine_segfault",
         "00834_cancel_http_readonly_queries_on_client_close",
@@ -95,16 +101,19 @@
         "01455_time_zones"
     ],
     "release-build": [
+        "00992_system_parts_race_condition_zookeeper" /// TODO remove me (alesapin)
     ],
     "database-ordinary": [
         "00604_show_create_database",
         "00609_mv_index_in_in",
         "00510_materizlized_view_and_deduplication_zookeeper",
-        "00738_lock_for_inner_table"
+        "00738_lock_for_inner_table",
+        "00992_system_parts_race_condition_zookeeper" /// TODO remove me (alesapin)
     ],
     "polymorphic-parts": [
         "01508_partition_pruning_long", /// bug, shoud be fixed
-        "01482_move_to_prewhere_and_cast" /// bug, shoud be fixed
+        "01482_move_to_prewhere_and_cast", /// bug, shoud be fixed
+        "00992_system_parts_race_condition_zookeeper" /// TODO remove me (alesapin)
     ],
     "antlr": [
         "00186_very_long_arrays",
@@ -144,6 +153,7 @@
         "00982_array_enumerate_uniq_ranked",
         "00984_materialized_view_to_columns",
         "00988_constraints_replication_zookeeper",
+        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "00995_order_by_with_fill",
         "01001_enums_in_in_section",
         "01011_group_uniq_array_memsan",

From 607b57ea2842fee07a3a20c42f0b4aabc9623186 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Sun, 14 Feb 2021 10:57:52 +0300
Subject: [PATCH 1063/1238] Update version_date.tsv after release 21.2.3.15

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 8d05f5fff46..f4616027512 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v21.2.3.15-stable	2021-02-14
 v21.2.2.8-stable	2021-02-07
 v21.1.3.32-stable	2021-02-03
 v21.1.2.15-stable	2021-01-18

From 09a5b7a05535b7fd5725bd80f5f13ad9bf05de7a Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Sun, 14 Feb 2021 11:35:34 +0300
Subject: [PATCH 1064/1238] Update version_date.tsv after release 21.1.4.46

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index f4616027512..43a1b3eba50 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,5 +1,6 @@
 v21.2.3.15-stable	2021-02-14
 v21.2.2.8-stable	2021-02-07
+v21.1.4.46-stable	2021-02-14
 v21.1.3.32-stable	2021-02-03
 v21.1.2.15-stable	2021-01-18
 v20.12.5.18-stable	2021-02-03

From 37807e1a18a3bef186b97eb845faa943fa98f537 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Sun, 14 Feb 2021 11:51:46 +0300
Subject: [PATCH 1065/1238] Update version_date.tsv after release 20.12.6.29

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 43a1b3eba50..d0d782e77ec 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -3,6 +3,7 @@ v21.2.2.8-stable	2021-02-07
 v21.1.4.46-stable	2021-02-14
 v21.1.3.32-stable	2021-02-03
 v21.1.2.15-stable	2021-01-18
+v20.12.6.29-stable	2021-02-14
 v20.12.5.18-stable	2021-02-03
 v20.12.5.14-stable	2020-12-28
 v20.12.4.5-stable	2020-12-24

From 48d38e497871556ce6bf3de2b18f8140a5474dbd Mon Sep 17 00:00:00 2001
From: damozhaeva <68770561+damozhaeva@users.noreply.github.com>
Date: Sun, 14 Feb 2021 14:19:26 +0300
Subject: [PATCH 1066/1238] Update docs/ru/operations/settings/settings.md

Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
---
 docs/ru/operations/settings/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 716345a9560..bd7fa97db5d 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -1985,7 +1985,7 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
 
 ## output_format_pretty_grid_charset {#output-format-pretty-grid-charset}
 
-ППозволяет изменить кодировку, которая используется для рисования таблицы при выводе результатов запросов. Доступны следующие кодировки: UTF-8, ASCII.
+Позволяет изменить кодировку, которая используется для отрисовки таблицы при выводе результатов запросов. Доступны следующие кодировки: UTF-8, ASCII.
 
 **Пример**
 

From 89f2cf52f3798b7280391d86a170da6651e2857a Mon Sep 17 00:00:00 2001
From: tavplubix <avtokmakov@yandex-team.ru>
Date: Sun, 14 Feb 2021 14:24:54 +0300
Subject: [PATCH 1067/1238] Update skip_list.json

---
 tests/queries/skip_list.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 07250cd9c90..0b4ac2b581b 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -574,6 +574,7 @@
         "01676_dictget_in_default_expression",
         "01715_background_checker_blather_zookeeper",
         "01700_system_zookeeper_path_in",
+        "01669_columns_declaration_serde",
         "attach",
         "ddl_dictionaries",
         "dictionary",

From 7f9436381f175eae6326bc7ddc9970f31849e499 Mon Sep 17 00:00:00 2001
From: Daria Mozhaeva <dmozhaeva@yandex-team.ru>
Date: Sun, 14 Feb 2021 14:48:26 +0300
Subject: [PATCH 1068/1238] fixed conflict

---
 docs/en/operations/settings/settings.md | 79 +------------------------
 1 file changed, 1 insertion(+), 78 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index c91ed1f2400..50108531310 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -311,12 +311,8 @@ Enables or disables parsing enum values as enum ids for TSV input format.
 
 Possible values:
 
-<<<<<<< HEAD
-Enables or disables using default values if input data contain `NULL`, but the data type of the corresponding column in not `Nullable(T)` (for text input formats).
-=======
 -   0 — Enum values are parsed as values.
--   1 — Enum values are parsed as enum IDs
->>>>>>> 6325b15a63335e2efd7de1ae92d2907493a07a9c
+-   1 — Enum values are parsed as enum IDs.
 
 Default value: 0.
 
@@ -1318,15 +1314,7 @@ See also:
 
 ## insert_quorum_timeout {#settings-insert_quorum_timeout}
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-Write to a quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
-=======
 Write to a quorum timeout in milliseconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
->>>>>>> 6325b15a63335e2efd7de1ae92d2907493a07a9c
-=======
-Write to a quorum timeout in milliseconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
->>>>>>> 547db452d63ba42b88e82cbe9a2aa1f5c683403f
 
 Default value: 600 000 milliseconds (ten minutes).
 
@@ -1625,11 +1613,7 @@ Default value: 0.
 -   Type: seconds
 -   Default value: 60 seconds
 
-<<<<<<< HEAD
-Controls how fast errors in distributed tables are zeroed. If a replica is unavailable for some time, accumulates 5 errors, and distributed\_replica\_error\_half\_life is set to 1 second, then the replica is considered normal 3 seconds after the last error.
-=======
 Controls how fast errors in distributed tables are zeroed. If a replica is unavailable for some time, accumulates 5 errors, and distributed_replica_error_half_life is set to 1 second, then the replica is considered normal 3 seconds after the last error.
->>>>>>> 6325b15a63335e2efd7de1ae92d2907493a07a9c
 
 See also:
 
@@ -1875,8 +1859,6 @@ Default value: `0`.
 -   [Distributed Table Engine](../../engines/table-engines/special/distributed.md#distributed)
 -   [Managing Distributed Tables](../../sql-reference/statements/system.md#query-language-system-distributed)
 
-<<<<<<< HEAD
-=======
 ## insert_distributed_one_random_shard {#insert_distributed_one_random_shard}
 
 Enables or disables random shard insertion into a [Distributed](../../engines/table-engines/special/distributed.md#distributed) table when there is no distributed key.
@@ -1907,7 +1889,6 @@ Default value: `1`.
 
 ## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size}
 
->>>>>>> 6325b15a63335e2efd7de1ae92d2907493a07a9c
 Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session.
 
 Possible values:
@@ -2224,11 +2205,7 @@ Default value: `0`.
 
 ## lock_acquire_timeout {#lock_acquire_timeout}
 
-<<<<<<< HEAD
-Defines how many seconds a locking request waits before failing. 
-=======
 Defines how many seconds a locking request waits before failing.
->>>>>>> 6325b15a63335e2efd7de1ae92d2907493a07a9c
 
 Locking timeout is used to protect from deadlocks while executing read/write operations with tables. When the timeout expires and the locking request fails, the ClickHouse server throws an exception "Locking attempt timed out! Possible deadlock avoided. Client should retry." with error code `DEADLOCK_AVOIDED`.
 
@@ -2615,58 +2592,4 @@ Possible values:
 
 Default value: `16`.
 
-## optimize_on_insert {#optimize-on-insert}
-
-Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine).
-
-Possible values:
-
--   0 — Disabled.
--   1 — Enabled.
-
-Default value: 1.
-
-**Example**
-
-The difference between enabled and disabled:
-
-Query:
-
-```sql
-SET optimize_on_insert = 1;
-
-CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable;
-
-INSERT INTO test1 SELECT number % 2 FROM numbers(5);
-
-SELECT * FROM test1;
-
-SET optimize_on_insert = 0;
-
-CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable;
-
-INSERT INTO test2 SELECT number % 2 FROM numbers(5);
-
-SELECT * FROM test2;
-```
-
-Result:
-
-``` text
-┌─FirstTable─┐
-│          0 │
-│          1 │
-└────────────┘
-
-┌─SecondTable─┐
-│           0 │
-│           0 │
-│           0 │
-│           1 │
-│           1 │
-└─────────────┘
-```
-
-Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
-
 [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

From 00bb72735eef1b11f406a8e139d4667d8c7e8b4d Mon Sep 17 00:00:00 2001
From: Daria Mozhaeva <dmozhaeva@yandex-team.ru>
Date: Sun, 14 Feb 2021 15:55:40 +0300
Subject: [PATCH 1069/1238] add text

---
 docs/en/operations/settings/settings.md | 54 +++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 50108531310..40a68491682 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2592,4 +2592,58 @@ Possible values:
 
 Default value: `16`.
 
+## optimize_on_insert {#optimize-on-insert}
+
+Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine).
+
+Possible values:
+
+-   0 — Disabled.
+-   1 — Enabled.
+
+Default value: 1.
+
+**Example**
+
+The difference between enabled and disabled:
+
+Query:
+
+```sql
+SET optimize_on_insert = 1;
+
+CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable;
+
+INSERT INTO test1 SELECT number % 2 FROM numbers(5);
+
+SELECT * FROM test1;
+
+SET optimize_on_insert = 0;
+
+CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable;
+
+INSERT INTO test2 SELECT number % 2 FROM numbers(5);
+
+SELECT * FROM test2;
+```
+
+Result:
+
+``` text
+┌─FirstTable─┐
+│          0 │
+│          1 │
+└────────────┘
+
+┌─SecondTable─┐
+│           0 │
+│           0 │
+│           0 │
+│           1 │
+│           1 │
+└─────────────┘
+```
+
+Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
+
 [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

From f409a6d4a71d85919185ac12df9e001747d4e763 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 14 Feb 2021 19:00:47 +0300
Subject: [PATCH 1070/1238] Fix build

---
 src/Interpreters/AggregationCommon.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h
index ca9b00184fb..aafec9a7929 100644
--- a/src/Interpreters/AggregationCommon.h
+++ b/src/Interpreters/AggregationCommon.h
@@ -264,7 +264,7 @@ static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous(
   */
 #if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
 template <typename T>
-static T ALWAYS_INLINE packFixedShuffle(
+static T inline packFixedShuffle(
     const char * __restrict * __restrict srcs,
     size_t num_srcs,
     const size_t * __restrict elem_sizes,

From d529db54980642028851ec3fa84af3d15127542a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 14 Feb 2021 23:57:25 +0300
Subject: [PATCH 1071/1238] Adjustments

---
 src/Functions/DivisionUtils.h                          | 10 ++++++----
 .../01717_int_div_float_too_large_ubsan.sql            |  1 +
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h
index ff5636bf9fb..2b4c07b1cff 100644
--- a/src/Functions/DivisionUtils.h
+++ b/src/Functions/DivisionUtils.h
@@ -6,11 +6,11 @@
 #include <Common/NaNUtils.h>
 #include <DataTypes/NumberTraits.h>
 
-
 #if !defined(ARCADIA_BUILD)
 #    include <Common/config.h>
 #endif
 
+
 namespace DB
 {
 
@@ -90,20 +90,22 @@ struct DivideIntegralImpl
         }
         else
         {
+            /// Comparisons are not strict to avoid rounding issues when operand is implicitly casted to float.
+
             if constexpr (std::is_floating_point_v<A>)
-                if (isNaN(a) || a > std::numeric_limits<CastA>::max() || a < std::numeric_limits<CastA>::lowest())
+                if (isNaN(a) || a >= std::numeric_limits<CastA>::max() || a <= std::numeric_limits<CastA>::lowest())
                     throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
                         ErrorCodes::ILLEGAL_DIVISION);
 
             if constexpr (std::is_floating_point_v<B>)
-                if (isNaN(b) || b > std::numeric_limits<CastB>::max() || b < std::numeric_limits<CastB>::lowest())
+                if (isNaN(b) || b >= std::numeric_limits<CastB>::max() || b <= std::numeric_limits<CastB>::lowest())
                     throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
                         ErrorCodes::ILLEGAL_DIVISION);
 
             auto res = checkedDivision(CastA(a), CastB(b));
 
             if constexpr (std::is_floating_point_v<decltype(res)>)
-                if (isNaN(res) || res > std::numeric_limits<Result>::max() || res < std::numeric_limits<Result>::lowest())
+                if (isNaN(res) || res >= std::numeric_limits<Result>::max() || res <= std::numeric_limits<Result>::lowest())
                     throw Exception("Cannot perform integer division, because it will produce infinite or too large number",
                         ErrorCodes::ILLEGAL_DIVISION);
 
diff --git a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql
index f3353cd3b8d..c4f26a079f0 100644
--- a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql
+++ b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql
@@ -1 +1,2 @@
 SELECT intDiv(9223372036854775807, 0.9998999834060669); -- { serverError 153 }
+SELECT intDiv(9223372036854775807, 1.);  -- { serverError 153 }

From 96dc69609c9def6dc5f457e67529e106f55ffccd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 15 Feb 2021 00:00:18 +0300
Subject: [PATCH 1072/1238] Fix Arcadia

---
 src/Columns/ya.make    | 1 +
 src/Columns/ya.make.in | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/Columns/ya.make b/src/Columns/ya.make
index 061391b5214..54dd02609ff 100644
--- a/src/Columns/ya.make
+++ b/src/Columns/ya.make
@@ -7,6 +7,7 @@ ADDINCL(
     contrib/libs/icu/common
     contrib/libs/icu/i18n
     contrib/libs/pdqsort
+    contrib/libs/lz4
 )
 
 PEERDIR(
diff --git a/src/Columns/ya.make.in b/src/Columns/ya.make.in
index 4422d222ce1..846e2c6c3bd 100644
--- a/src/Columns/ya.make.in
+++ b/src/Columns/ya.make.in
@@ -6,6 +6,7 @@ ADDINCL(
     contrib/libs/icu/common
     contrib/libs/icu/i18n
     contrib/libs/pdqsort
+    contrib/libs/lz4
 )
 
 PEERDIR(

From 320fd6b264db77de1ef335c0025c5487868e9ddb Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Mon, 15 Feb 2021 03:04:46 +0300
Subject: [PATCH 1073/1238] startup without zk

---
 src/Databases/DatabaseReplicated.cpp          | 169 ++++++++++++------
 src/Databases/DatabaseReplicated.h            |   2 +
 src/Databases/DatabaseReplicatedWorker.cpp    |   2 +
 src/Interpreters/DDLWorker.cpp                |   2 +-
 .../test_replicated_database/test.py          |  49 ++++-
 5 files changed, 156 insertions(+), 68 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index d365ea24bbf..24a193d9134 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -82,37 +82,6 @@ DatabaseReplicated::DatabaseReplicated(
     /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
     if (zookeeper_path.front() != '/')
         zookeeper_path = "/" + zookeeper_path;
-
-    if (!context_.hasZooKeeper())
-    {
-        throw Exception("Can't create replicated database without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
-    }
-    //FIXME it will fail on startup if zk is not available
-
-    auto current_zookeeper = global_context.getZooKeeper();
-
-    if (!current_zookeeper->exists(zookeeper_path))
-    {
-        /// Create new database, multiple nodes can execute it concurrently
-        createDatabaseNodesInZooKeeper(current_zookeeper);
-    }
-
-    replica_path = zookeeper_path + "/replicas/" + getFullReplicaName();
-
-    String replica_host_id;
-    if (current_zookeeper->tryGet(replica_path, replica_host_id))
-    {
-        String host_id = getHostID(global_context, db_uuid);
-        if (replica_host_id != host_id)
-            throw Exception(ErrorCodes::REPLICA_IS_ALREADY_EXIST,
-                            "Replica {} of shard {} of replicated database at {} already exists. Replica host ID: '{}', current host ID: '{}'",
-                            replica_name, shard_name, zookeeper_path, replica_host_id, host_id);
-    }
-    else
-    {
-        /// Throws if replica with the same name was created concurrently
-        createReplicaNodesInZooKeeper(current_zookeeper);
-    }
 }
 
 String DatabaseReplicated::getFullReplicaName() const
@@ -203,6 +172,50 @@ ClusterPtr DatabaseReplicated::getCluster() const
     return std::make_shared<Cluster>(global_context.getSettingsRef(), shards, username, password, global_context.getTCPPort(), false);
 }
 
+void DatabaseReplicated::tryConnectToZooKeeper(bool force_attach)
+{
+    try
+    {
+        if (!global_context.hasZooKeeper())
+        {
+            throw Exception("Can't create replicated database without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
+        }
+
+        auto current_zookeeper = global_context.getZooKeeper();
+
+        if (!current_zookeeper->exists(zookeeper_path))
+        {
+            /// Create new database, multiple nodes can execute it concurrently
+            createDatabaseNodesInZooKeeper(current_zookeeper);
+        }
+
+        replica_path = zookeeper_path + "/replicas/" + getFullReplicaName();
+
+        String replica_host_id;
+        if (current_zookeeper->tryGet(replica_path, replica_host_id))
+        {
+            String host_id = getHostID(global_context, db_uuid);
+            if (replica_host_id != host_id)
+                throw Exception(ErrorCodes::REPLICA_IS_ALREADY_EXIST,
+                                "Replica {} of shard {} of replicated database at {} already exists. Replica host ID: '{}', current host ID: '{}'",
+                                replica_name, shard_name, zookeeper_path, replica_host_id, host_id);
+        }
+        else
+        {
+            /// Throws if replica with the same name already exists
+            createReplicaNodesInZooKeeper(current_zookeeper);
+        }
+
+        is_readonly = false;
+    }
+    catch(...)
+    {
+        if (!force_attach)
+            throw;
+        tryLogCurrentException(log);
+    }
+}
+
 bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper)
 {
     current_zookeeper->createAncestors(zookeeper_path);
@@ -256,6 +269,8 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt
 
 void DatabaseReplicated::loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach)
 {
+    tryConnectToZooKeeper(force_attach);
+
     DatabaseAtomic::loadStoredObjects(context, has_force_restore_data_flag, force_attach);
 
     ddl_worker = std::make_unique<DatabaseReplicatedDDLWorker>(this, global_context);
@@ -264,6 +279,9 @@ void DatabaseReplicated::loadStoredObjects(Context & context, bool has_force_res
 
 BlockIO DatabaseReplicated::propose(const ASTPtr & query, const Context & query_context)
 {
+    if (is_readonly)
+        throw Exception(ErrorCodes::NO_ZOOKEEPER, "Database is in readonly mode, because it cannot connect to ZooKeeper");
+
     if (query_context.getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY)
         throw Exception(ErrorCodes::INCORRECT_QUERY, "It's not initial query. ON CLUSTER is not allowed for Replicated database.");
 
@@ -297,6 +315,24 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query, const Context & query_
     return io;
 }
 
+static UUID getTableUUIDIfReplicated(const String & metadata, const Context & context)
+{
+    bool looks_like_replicated = metadata.find("ReplicatedMergeTree") != std::string::npos;
+    if (!looks_like_replicated)
+        return UUIDHelpers::Nil;
+
+    ParserCreateQuery parser;
+    auto size = context.getSettingsRef().max_query_size;
+    auto depth = context.getSettingsRef().max_parser_depth;
+    ASTPtr query = parseQuery(parser, metadata, size, depth);
+    const ASTCreateQuery & create = query->as<const ASTCreateQuery &>();
+    if (!create.storage || !create.storage->engine)
+        return UUIDHelpers::Nil;
+    if (!startsWith(create.storage->engine->name, "Replicated") || !endsWith(create.storage->engine->name, "MergeTree"))
+        return UUIDHelpers::Nil;
+    assert(create.uuid != UUIDHelpers::Nil);
+    return create.uuid;
+}
 
 void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr)
 {
@@ -311,42 +347,44 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
 
     auto table_name_to_metadata = tryGetConsistentMetadataSnapshot(current_zookeeper, max_log_ptr);
 
+    /// For ReplicatedMergeTree tables we can compare only UUIDs to ensure that it's the same table.
+    /// Metadata can be different, it's handled on table replication level.
+    /// We need to handle only renamed tables.
+    /// TODO maybe we should also update MergeTree SETTINGS if required?
+    std::unordered_map<UUID, String> zk_replicated_id_to_name;
+    for (const auto & zk_table : table_name_to_metadata)
+    {
+        UUID zk_replicated_id = getTableUUIDIfReplicated(zk_table.second, global_context);
+        if (zk_replicated_id != UUIDHelpers::Nil)
+            zk_replicated_id_to_name.emplace(zk_replicated_id, zk_table.first);
+    }
+
     Strings tables_to_detach;
+    std::vector<std::pair<String, String>> replicated_tables_to_rename;
     size_t total_tables = 0;
-    auto existing_tables_it = getTablesIterator(global_context, {});
-    while (existing_tables_it->isValid())
+    std::vector<UUID> replicated_ids;
+    for (auto existing_tables_it = getTablesIterator(global_context, {}); existing_tables_it->isValid(); existing_tables_it->next(), ++total_tables)
     {
         String name = existing_tables_it->name();
-        auto in_zk = table_name_to_metadata.find(name);
-        String local_metadata = readMetadataFile(name);
-        if (in_zk == table_name_to_metadata.end() || in_zk->second != local_metadata)
+        UUID local_replicated_id = UUIDHelpers::Nil;
+        if (existing_tables_it->table()->supportsReplication())
         {
-            bool should_detach = true;
-            bool looks_like_replicated = in_zk->second.find("ReplicatedMergeTree") != std::string::npos;
-
-            if (looks_like_replicated)
+            local_replicated_id = existing_tables_it->table()->getStorageID().uuid;
+            auto it = zk_replicated_id_to_name.find(local_replicated_id);
+            if (it != zk_replicated_id_to_name.end())
             {
-                ParserCreateQuery parser;
-                auto size = global_context.getSettingsRef().max_query_size;
-                auto depth = global_context.getSettingsRef().max_parser_depth;
-                ASTPtr local_create = parseQuery(parser, local_metadata, size, depth);
-                ASTPtr zk_create = parseQuery(parser, in_zk->second, size, depth);
-                if (local_create->as<ASTCreateQuery>()->uuid == zk_create->as<ASTCreateQuery>()->uuid)
-                {
-                    /// For ReplicatedMergeTree tables we can compare only UUIDs to ensure that it's the same table.
-                    /// Metadata can be different, it's handled on table replication level.
-                    /// TODO maybe we should also compare MergeTree SETTINGS?
-                    should_detach = false;
-                }
+                if (name != it->second)
+                    replicated_tables_to_rename.emplace_back(name, it->second);
+                continue;
             }
+        }
 
-            if (should_detach)
+        auto in_zk = table_name_to_metadata.find(name);
+        if (in_zk == table_name_to_metadata.end() || in_zk->second != readMetadataFile(name))
+        {
                 tables_to_detach.emplace_back(std::move(name));
         }
-        existing_tables_it->next();
-        ++total_tables;
     }
-    existing_tables_it.reset();
 
     String db_name = getDatabaseName();
     String to_db_name = getDatabaseName() + BROKEN_TABLES_SUFFIX;
@@ -375,17 +413,18 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
         if (getDatabaseName() != db_name)
             throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed, will retry");
 
+        auto table = tryGetTable(table_name, global_context);
         if (isDictionaryExist(table_name))
         {
             LOG_DEBUG(log, "Will DROP DICTIONARY {}", backQuoteIfNeed(table_name));
             DatabaseAtomic::removeDictionary(global_context, table_name);
             ++dropped_dicts;
         }
-        else if (!tryGetTable(table_name, global_context)->storesDataOnDisk())
+        else if (!table->storesDataOnDisk())
         {
             LOG_DEBUG(log, "Will DROP TABLE {}, because it does not store data on disk and can be safely dropped", backQuoteIfNeed(table_name));
             dropped_tables.push_back(tryGetTableUUID(table_name));
-            tryGetTable(table_name, global_context)->shutdown();
+            table->shutdown();
             DatabaseAtomic::dropTable(global_context, table_name, true);
         }
         else
@@ -401,6 +440,20 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
         LOG_WARNING(log, "Cleaned {} outdated objects: dropped {} dictionaries and {} tables, moved {} tables",
                     tables_to_detach.size(), dropped_dicts, dropped_tables.size(), moved_tables);
 
+    /// Now database is cleared from outdated tables, let's rename ReplicatedMergeTree tables to actual names
+    for (const auto & old_to_new : replicated_tables_to_rename)
+    {
+        const String & from = old_to_new.first;
+        const String & to = old_to_new.second;
+
+        LOG_DEBUG(log, "Will RENAME TABLE {} TO {}", backQuoteIfNeed(from), backQuoteIfNeed(to));
+        /// TODO Maybe we should do it in two steps: rename all tables to temporary names and then rename them to actual names?
+        DDLGuardPtr table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, std::min(from, to));
+        DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, std::max(from, to));
+        DatabaseAtomic::renameTable(global_context, from, *this, to, false, false);
+    }
+
+
     for (const auto & id : dropped_tables)
         DatabaseCatalog::instance().waitTableFinallyDropped(id);
 
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 2c998a8bc97..43a6ce15376 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -84,6 +84,7 @@ public:
     friend struct DatabaseReplicatedTask;
     friend class DatabaseReplicatedDDLWorker;
 private:
+    void tryConnectToZooKeeper(bool force_attach);
     bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
     void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
 
@@ -100,6 +101,7 @@ private:
 
     zkutil::ZooKeeperPtr getZooKeeper() const;
 
+    std::atomic_bool is_readonly = true;
     std::unique_ptr<DatabaseReplicatedDDLWorker> ddl_worker;
 };
 
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index 521ba5b7cb2..8751c125383 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -29,6 +29,8 @@ void DatabaseReplicatedDDLWorker::initializeMainThread()
         try
         {
             auto zookeeper = getAndSetZooKeeper();
+            if (database->is_readonly)
+                database->tryConnectToZooKeeper(false);
             initializeReplication();
             initialized = true;
             return;
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 1f4c7932329..ac365dbb8d4 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -211,7 +211,7 @@ void DDLWorker::shutdown()
 
 DDLWorker::~DDLWorker()
 {
-    shutdown();
+    DDLWorker::shutdown();
 }
 
 
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index faeb436f279..0db6884fbb7 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -196,14 +196,16 @@ def test_recover_staled_replica(started_cluster):
     dummy_node.query("CREATE TABLE recover.mt2 (n int) ENGINE=MergeTree order by n", settings=settings)
     main_node.query("CREATE TABLE recover.rmt1 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings)
     dummy_node.query("CREATE TABLE recover.rmt2 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings)
+    main_node.query("CREATE TABLE recover.rmt3 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings)
+    dummy_node.query("CREATE TABLE recover.rmt5 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings)
     main_node.query("CREATE DICTIONARY recover.d1 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt1' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())")
     dummy_node.query("CREATE DICTIONARY recover.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt2' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())")
 
-    for table in ['t1', 't2', 'mt1', 'mt2', 'rmt1', 'rmt2']:
+    for table in ['t1', 't2', 'mt1', 'mt2', 'rmt1', 'rmt2', 'rmt3', 'rmt5']:
         main_node.query("INSERT INTO recover.{} VALUES (42)".format(table))
     for table in ['t1', 't2', 'mt1', 'mt2']:
         dummy_node.query("INSERT INTO recover.{} VALUES (42)".format(table))
-    for table in ['rmt1', 'rmt2']:
+    for table in ['rmt1', 'rmt2', 'rmt3', 'rmt5']:
         main_node.query("SYSTEM SYNC REPLICA recover.{}".format(table))
 
     with PartitionManager() as pm:
@@ -212,6 +214,8 @@ def test_recover_staled_replica(started_cluster):
         main_node.query("RENAME TABLE recover.t1 TO recover.m1", settings=settings)
         main_node.query("ALTER TABLE recover.mt1  ADD COLUMN m int", settings=settings)
         main_node.query("ALTER TABLE recover.rmt1 ADD COLUMN m int", settings=settings)
+        main_node.query("RENAME TABLE recover.rmt3 TO recover.rmt4", settings=settings)
+        main_node.query("DROP TABLE recover.rmt5", settings=settings)
         main_node.query("DROP DICTIONARY recover.d2", settings=settings)
         main_node.query("CREATE DICTIONARY recover.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt1' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT());", settings=settings)
 
@@ -223,25 +227,52 @@ def test_recover_staled_replica(started_cluster):
         main_node.query("DROP TABLE recover.tmp", settings=settings)
         main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings)
 
-    assert main_node.query("SELECT name FROM system.tables WHERE database='recover' ORDER BY name") == "d1\nd2\nm1\nmt1\nmt2\nrmt1\nrmt2\nt2\ntmp\n"
+    assert main_node.query("SELECT name FROM system.tables WHERE database='recover' ORDER BY name") == "d1\nd2\nm1\nmt1\nmt2\nrmt1\nrmt2\nrmt4\nt2\ntmp\n"
     query = "SELECT name, uuid, create_table_query FROM system.tables WHERE database='recover' ORDER BY name"
     expected = main_node.query(query)
     assert_eq_with_retry(dummy_node, query, expected)
 
-    for table in ['m1', 't2', 'mt1', 'mt2', 'rmt1', 'rmt2', 'd1', 'd2']:
+    for table in ['m1', 't2', 'mt1', 'mt2', 'rmt1', 'rmt2', 'rmt4', 'd1', 'd2']:
         assert main_node.query("SELECT (*,).1 FROM recover.{}".format(table)) == "42\n"
-    for table in ['t2', 'rmt1', 'rmt2', 'd1', 'd2', 'mt2']:
+    for table in ['t2', 'rmt1', 'rmt2', 'rmt4', 'd1', 'd2', 'mt2']:
         assert dummy_node.query("SELECT (*,).1 FROM recover.{}".format(table)) == "42\n"
     for table in ['m1', 'mt1']:
         assert dummy_node.query("SELECT count() FROM recover.{}".format(table)) == "0\n"
 
-    assert dummy_node.query("SELECT count() FROM system.tables WHERE database='recover_broken_tables'") == "1\n"
-    table = dummy_node.query("SHOW TABLES FROM recover_broken_tables").strip()
-    assert "mt1_22_" in table
+    assert dummy_node.query("SELECT count() FROM system.tables WHERE database='recover_broken_tables'") == "2\n"
+    table = dummy_node.query("SHOW TABLES FROM recover_broken_tables LIKE 'mt1_26_%'").strip()
+    assert dummy_node.query("SELECT (*,).1 FROM recover_broken_tables.{}".format(table)) == "42\n"
+    table = dummy_node.query("SHOW TABLES FROM recover_broken_tables LIKE 'rmt5_26_%'").strip()
     assert dummy_node.query("SELECT (*,).1 FROM recover_broken_tables.{}".format(table)) == "42\n"
 
-    expected = "Cleaned 3 outdated objects: dropped 1 dictionaries and 1 tables, moved 1 tables"
+    expected = "Cleaned 4 outdated objects: dropped 1 dictionaries and 1 tables, moved 2 tables"
     assert_logs_contain(dummy_node, expected)
 
     dummy_node.query("DROP TABLE recover.tmp")
+    assert_eq_with_retry(main_node, "SELECT count() FROM system.tables WHERE database='recover' AND name='tmp'", "0\n")
 
+def test_startup_without_zk(started_cluster):
+    main_node.query("DROP DATABASE IF EXISTS testdb SYNC")
+    main_node.query("DROP DATABASE IF EXISTS recover SYNC")
+    with PartitionManager() as pm:
+        pm.drop_instance_zk_connections(main_node)
+        err = main_node.query_and_get_error("CREATE DATABASE startup ENGINE = Replicated('/clickhouse/databases/startup', 'shard1', 'replica1');")
+        assert "ZooKeeper" in err
+    main_node.query("CREATE DATABASE startup ENGINE = Replicated('/clickhouse/databases/startup', 'shard1', 'replica1');")
+    #main_node.query("CREATE TABLE startup.rmt (n int) ENGINE=ReplicatedMergeTree order by n")
+    main_node.query("CREATE TABLE startup.rmt (n int) ENGINE=MergeTree order by n")
+    main_node.query("INSERT INTO startup.rmt VALUES (42)")
+    with PartitionManager() as pm:
+        pm.drop_instance_zk_connections(main_node)
+        main_node.restart_clickhouse(stop_start_wait_sec=30)
+        assert main_node.query("SELECT (*,).1 FROM startup.rmt") == "42\n"
+
+    for _ in range(10):
+        try:
+            main_node.query("CREATE TABLE startup.m (n int) ENGINE=Memory")
+            break
+        except:
+            time.sleep(1)
+
+    main_node.query("EXCHANGE TABLES startup.rmt AND startup.m")
+    assert main_node.query("SELECT (*,).1 FROM startup.m") == "42\n"

From c5b96a522e29b38eae3f6c2d945540dd234e3c34 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 15 Feb 2021 04:57:34 +0300
Subject: [PATCH 1074/1238] Add a test for #8654

---
 tests/queries/0_stateless/01718_subtract_seconds_date.reference | 2 ++
 tests/queries/0_stateless/01718_subtract_seconds_date.sql       | 2 ++
 2 files changed, 4 insertions(+)
 create mode 100644 tests/queries/0_stateless/01718_subtract_seconds_date.reference
 create mode 100644 tests/queries/0_stateless/01718_subtract_seconds_date.sql

diff --git a/tests/queries/0_stateless/01718_subtract_seconds_date.reference b/tests/queries/0_stateless/01718_subtract_seconds_date.reference
new file mode 100644
index 00000000000..97e3da8cc48
--- /dev/null
+++ b/tests/queries/0_stateless/01718_subtract_seconds_date.reference
@@ -0,0 +1,2 @@
+2021-02-14 23:59:59
+10
diff --git a/tests/queries/0_stateless/01718_subtract_seconds_date.sql b/tests/queries/0_stateless/01718_subtract_seconds_date.sql
new file mode 100644
index 00000000000..6bffcd4db5a
--- /dev/null
+++ b/tests/queries/0_stateless/01718_subtract_seconds_date.sql
@@ -0,0 +1,2 @@
+SELECT subtractSeconds(toDate('2021-02-15'), 1);
+SELECT subtractSeconds(today(), 1) - subtractSeconds(today(), 11);

From 4c7923e6dff96fed33939d7c342b865811ea0228 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 15 Feb 2021 05:17:30 +0300
Subject: [PATCH 1075/1238] Add a test for #10893

---
 .../0_stateless/01719_join_timezone.reference |  3 ++
 .../0_stateless/01719_join_timezone.sql       | 45 +++++++++++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 tests/queries/0_stateless/01719_join_timezone.reference
 create mode 100644 tests/queries/0_stateless/01719_join_timezone.sql

diff --git a/tests/queries/0_stateless/01719_join_timezone.reference b/tests/queries/0_stateless/01719_join_timezone.reference
new file mode 100644
index 00000000000..c2702a38012
--- /dev/null
+++ b/tests/queries/0_stateless/01719_join_timezone.reference
@@ -0,0 +1,3 @@
+2020-05-13 13:38:45	2020-05-13 16:38:45
+2020-05-13 13:38:45	2020-05-13 16:38:45
+2020-05-13 13:38:45	2020-05-13 16:38:45
diff --git a/tests/queries/0_stateless/01719_join_timezone.sql b/tests/queries/0_stateless/01719_join_timezone.sql
new file mode 100644
index 00000000000..cbf0c27fcfc
--- /dev/null
+++ b/tests/queries/0_stateless/01719_join_timezone.sql
@@ -0,0 +1,45 @@
+DROP TABLE IF EXISTS test;
+
+CREATE TABLE test (timestamp DateTime('UTC'), i UInt8) Engine=MergeTree() PARTITION BY toYYYYMM(timestamp) ORDER BY (i);
+INSERT INTO test values ('2020-05-13 16:38:45', 1);
+
+SELECT
+    toTimeZone(timestamp, 'America/Sao_Paulo') AS converted,
+    timestamp AS original
+FROM test
+LEFT JOIN (SELECT 2 AS x) AS anything ON x = i
+WHERE timestamp >= toDateTime('2020-05-13T00:00:00', 'America/Sao_Paulo');
+
+/* This was incorrect result in previous ClickHouse versions:
+┌─converted───────────┬─original────────────┐
+│ 2020-05-13 16:38:45 │ 2020-05-13 16:38:45 │ <-- toTimeZone is ignored.
+└─────────────────────┴─────────────────────┘
+*/
+
+SELECT
+    toTimeZone(timestamp, 'America/Sao_Paulo') AS converted,
+    timestamp AS original
+FROM test
+-- LEFT JOIN (SELECT 2 AS x) AS anything ON x = i -- Removing the join fixes the issue.
+WHERE timestamp >= toDateTime('2020-05-13T00:00:00', 'America/Sao_Paulo');
+
+/*
+┌─converted───────────┬─original────────────┐
+│ 2020-05-13 13:38:45 │ 2020-05-13 16:38:45 │ <-- toTimeZone works.
+└─────────────────────┴─────────────────────┘
+*/
+
+SELECT
+    toTimeZone(timestamp, 'America/Sao_Paulo') AS converted,
+    timestamp AS original
+FROM test
+LEFT JOIN (SELECT 2 AS x) AS anything ON x = i
+WHERE timestamp >= '2020-05-13T00:00:00'; -- Not using toDateTime in the WHERE also fixes the issue.
+
+/*
+┌─converted───────────┬─original────────────┐
+│ 2020-05-13 13:38:45 │ 2020-05-13 16:38:45 │ <-- toTimeZone works.
+└─────────────────────┴─────────────────────┘
+*/
+
+DROP TABLE test;

From c1550814ca770a0ecb9aec0de8eeb77dee266ca4 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 15 Feb 2021 10:30:08 +0300
Subject: [PATCH 1076/1238] Disable snapshots for tests

---
 src/Coordination/NuKeeperStateMachine.cpp  |  2 ++
 tests/config/config.d/test_keeper_port.xml |  2 ++
 tests/queries/skip_list.json               | 14 ++------------
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp
index d282f57ce73..0061645c75c 100644
--- a/src/Coordination/NuKeeperStateMachine.cpp
+++ b/src/Coordination/NuKeeperStateMachine.cpp
@@ -166,6 +166,8 @@ void NuKeeperStateMachine::create_snapshot(
         }
 
     }
+
+    LOG_DEBUG(log, "Created snapshot {}", s.get_last_log_idx());
     nuraft::ptr<std::exception> except(nullptr);
     bool ret = true;
     when_done(ret, except);
diff --git a/tests/config/config.d/test_keeper_port.xml b/tests/config/config.d/test_keeper_port.xml
index 6ca00a972d4..97c6d7c2e33 100644
--- a/tests/config/config.d/test_keeper_port.xml
+++ b/tests/config/config.d/test_keeper_port.xml
@@ -6,6 +6,8 @@
         <coordination_settings>
             <operation_timeout_ms>10000</operation_timeout_ms>
             <session_timeout_ms>30000</session_timeout_ms>
+            <snapshot_distance>0</snapshot_distance>
+            <reserved_log_items>0</reserved_log_items>
         </coordination_settings>
 
         <raft_configuration>
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index ee25bee6a0a..e4e7504ba41 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -10,7 +10,6 @@
         "00152_insert_different_granularity",
         "00151_replace_partition_with_different_granularity",
         "00157_cache_dictionary",
-        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "01193_metadata_loading",
         "01473_event_time_microseconds",
         "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers
@@ -26,7 +25,6 @@
         "memory_profiler",
         "odbc_roundtrip",
         "01103_check_cpu_instructions_at_startup",
-        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "01473_event_time_microseconds",
         "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers
         "01193_metadata_loading"
@@ -37,7 +35,6 @@
         "memory_profiler",
         "01103_check_cpu_instructions_at_startup",
         "00900_orc_load",
-        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "01473_event_time_microseconds",
         "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers
         "01193_metadata_loading"
@@ -49,7 +46,6 @@
         "01103_check_cpu_instructions_at_startup",
         "01086_odbc_roundtrip", /// can't pass because odbc libraries are not instrumented
         "00877_memory_limit_for_new_delete", /// memory limits don't work correctly under msan because it replaces malloc/free
-        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "01473_event_time_microseconds",
         "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers
         "01193_metadata_loading"
@@ -61,7 +57,6 @@
         "00980_alter_settings_race",
         "00834_kill_mutation_replicated_zookeeper",
         "00834_kill_mutation",
-        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "01200_mutations_memory_consumption",
         "01103_check_cpu_instructions_at_startup",
         "01037_polygon_dicts_",
@@ -87,7 +82,6 @@
         "00505_secure",
         "00505_shard_secure",
         "odbc_roundtrip",
-        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "01103_check_cpu_instructions_at_startup",
         "01114_mysql_database_engine_segfault",
         "00834_cancel_http_readonly_queries_on_client_close",
@@ -101,19 +95,16 @@
         "01455_time_zones"
     ],
     "release-build": [
-        "00992_system_parts_race_condition_zookeeper" /// TODO remove me (alesapin)
     ],
     "database-ordinary": [
         "00604_show_create_database",
         "00609_mv_index_in_in",
         "00510_materizlized_view_and_deduplication_zookeeper",
-        "00738_lock_for_inner_table",
-        "00992_system_parts_race_condition_zookeeper" /// TODO remove me (alesapin)
+        "00738_lock_for_inner_table"
     ],
     "polymorphic-parts": [
         "01508_partition_pruning_long", /// bug, shoud be fixed
-        "01482_move_to_prewhere_and_cast", /// bug, shoud be fixed
-        "00992_system_parts_race_condition_zookeeper" /// TODO remove me (alesapin)
+        "01482_move_to_prewhere_and_cast" /// bug, shoud be fixed
     ],
     "antlr": [
         "00186_very_long_arrays",
@@ -153,7 +144,6 @@
         "00982_array_enumerate_uniq_ranked",
         "00984_materialized_view_to_columns",
         "00988_constraints_replication_zookeeper",
-        "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin)
         "00995_order_by_with_fill",
         "01001_enums_in_in_section",
         "01011_group_uniq_array_memsan",

From 02198d091ed5539e6683c607a6ee169edb09041c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 15 Feb 2021 10:45:19 +0300
Subject: [PATCH 1077/1238] Add proper checks while parsing directory names for
 async INSERT (fixes SIGSEGV)

---
 src/Storages/Distributed/DirectoryMonitor.cpp | 39 ++++++++++++++++---
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index bf15ca22ca9..6fe98c53b3e 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -48,6 +48,7 @@ namespace ErrorCodes
     extern const int TOO_LARGE_SIZE_COMPRESSED;
     extern const int ATTEMPT_TO_READ_AFTER_EOF;
     extern const int EMPTY_DATA_PASSED;
+    extern const int INCORRECT_FILE_NAME;
 }
 
 
@@ -56,14 +57,26 @@ namespace
     constexpr const std::chrono::minutes decrease_error_count_period{5};
 
     template <typename PoolFactory>
-    ConnectionPoolPtrs createPoolsForAddresses(const std::string & name, PoolFactory && factory)
+    ConnectionPoolPtrs createPoolsForAddresses(const std::string & name, PoolFactory && factory, Poco::Logger * log)
     {
         ConnectionPoolPtrs pools;
 
         for (auto it = boost::make_split_iterator(name, boost::first_finder(",")); it != decltype(it){}; ++it)
         {
             Cluster::Address address = Cluster::Address::fromFullString(boost::copy_range<std::string>(*it));
-            pools.emplace_back(factory(address));
+            try
+            {
+                pools.emplace_back(factory(address));
+            }
+            catch (const Exception & e)
+            {
+                if (e.code() == ErrorCodes::INCORRECT_FILE_NAME)
+                {
+                    tryLogCurrentException(log);
+                    continue;
+                }
+                throw;
+            }
         }
 
         return pools;
@@ -351,16 +364,30 @@ void StorageDistributedDirectoryMonitor::run()
 
 ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::string & name, const StorageDistributed & storage)
 {
-    const auto pool_factory = [&storage] (const Cluster::Address & address) -> ConnectionPoolPtr
+    const auto pool_factory = [&storage, &name] (const Cluster::Address & address) -> ConnectionPoolPtr
     {
         const auto & cluster = storage.getCluster();
         const auto & shards_info = cluster->getShardsInfo();
         const auto & shards_addresses = cluster->getShardsAddresses();
 
-        /// check new format shard{shard_index}_number{number_index}
+        /// check new format shard{shard_index}_number{replica_index}
+        /// (shard_index and replica_index starts from 1)
         if (address.shard_index != 0)
         {
-            return shards_info[address.shard_index - 1].per_replica_pools[address.replica_index - 1];
+            if (!address.replica_index)
+                throw Exception(ErrorCodes::INCORRECT_FILE_NAME,
+                    "Wrong replica_index ({})", address.replica_index, name);
+
+            if (address.shard_index > shards_info.size())
+                throw Exception(ErrorCodes::INCORRECT_FILE_NAME,
+                    "No shard with shard_index={} ({})", address.shard_index, name);
+
+            const auto & shard_info = shards_info[address.shard_index - 1];
+            if (address.replica_index > shard_info.per_replica_pools.size())
+                throw Exception(ErrorCodes::INCORRECT_FILE_NAME,
+                    "No shard with replica_index={} ({})", address.replica_index, name);
+
+            return shard_info.per_replica_pools[address.replica_index - 1];
         }
 
         /// existing connections pool have a higher priority
@@ -398,7 +425,7 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri
             address.secure);
     };
 
-    auto pools = createPoolsForAddresses(name, pool_factory);
+    auto pools = createPoolsForAddresses(name, pool_factory, storage.log);
 
     const auto settings = storage.global_context.getSettings();
     return pools.size() == 1 ? pools.front() : std::make_shared<ConnectionPoolWithFailover>(pools,

From 9686649b0229cc4f492dbf646d6342d587f02657 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 15 Feb 2021 12:42:50 +0300
Subject: [PATCH 1078/1238] Fix non-zero session reconnect in integration test

---
 tests/integration/test_testkeeper_back_to_back/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_testkeeper_back_to_back/test.py b/tests/integration/test_testkeeper_back_to_back/test.py
index 0f2c1ed19a5..8ec54f1a883 100644
--- a/tests/integration/test_testkeeper_back_to_back/test.py
+++ b/tests/integration/test_testkeeper_back_to_back/test.py
@@ -29,8 +29,8 @@ def get_fake_zk():
         def reset_last_zxid_listener(state):
             print("Fake zk callback called for state", state)
             global _fake_zk_instance
-            # reset last_zxid -- fake server doesn't support it
-            _fake_zk_instance.last_zxid = 0
+            if state != KazooState.CONNECTED:
+                _fake_zk_instance._reset()
 
         _fake_zk_instance.add_listener(reset_last_zxid_listener)
         _fake_zk_instance.start()

From 9c7cf9e92e8c75bc670abf070397c3aacbcf3193 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Mon, 15 Feb 2021 13:26:34 +0300
Subject: [PATCH 1079/1238] remove some debug code

---
 docker/test/stateful/run.sh                   |  4 +++
 docker/test/stateless/run.sh                  |  4 +++
 programs/server/Server.cpp                    |  4 ++-
 src/Core/Settings.h                           |  3 ++
 src/Databases/DatabaseReplicated.cpp          |  3 +-
 src/Databases/DatabaseReplicated.h            |  1 -
 src/Databases/DatabaseReplicatedWorker.cpp    |  4 +--
 src/Interpreters/DDLWorker.cpp                | 15 ++++-----
 src/Interpreters/DDLWorker.h                  |  5 +--
 src/Interpreters/InterpreterCreateQuery.cpp   | 21 ++++--------
 src/Interpreters/executeDDLQueryOnCluster.cpp | 12 +------
 tests/ci/ci_config.json                       | 24 ++++++++++++++
 tests/clickhouse-test                         | 17 +++++++---
 tests/config/install.sh                       |  3 ++
 tests/config/users.d/database_replicated.xml  | 10 ++++++
 .../test_materialize_mysql_database/test.py   |  2 +-
 .../configs/settings.xml                      | 12 +++++++
 .../test_replicated_database/test.py          | 10 +++---
 tests/queries/skip_list.json                  | 33 ++++---------------
 19 files changed, 109 insertions(+), 78 deletions(-)
 create mode 100644 tests/config/users.d/database_replicated.xml
 create mode 100644 tests/integration/test_replicated_database/configs/settings.xml

diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh
index f2fcefd604f..7779f0e9dc2 100755
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@@ -60,4 +60,8 @@ fi
 # more idiologically correct.
 read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
 
+if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
+    ADDITIONAL_OPTIONS+=('--replicated-database')
+fi
+
 clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index 575be721a54..d078f3739fd 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -57,6 +57,10 @@ function run_tests()
         ADDITIONAL_OPTIONS+=('4')
     fi
 
+    if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
+        ADDITIONAL_OPTIONS+=('--replicated-database')
+    fi
+
     clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
             --test-runs "$NUM_TRIES" \
             "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 2bb5181d348..400796981d5 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -100,6 +100,7 @@ namespace CurrentMetrics
     extern const Metric Revision;
     extern const Metric VersionInteger;
     extern const Metric MemoryTracking;
+    extern const Metric MaxDDLEntryID;
 }
 
 
@@ -997,7 +998,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
         int pool_size = config().getInt("distributed_ddl.pool_size", 1);
         if (pool_size < 1)
             throw Exception("distributed_ddl.pool_size should be greater then 0", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
-        global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, *global_context, &config(), "distributed_ddl"));
+        global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, *global_context, &config(),
+                                                                 "distributed_ddl", "DDLWorker", &CurrentMetrics::MaxDDLEntryID));
     }
 
     std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 96571cedd3f..ba4fcdda48c 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -420,6 +420,9 @@ class IColumn;
     M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
     M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
     M(Bool, allow_experimental_query_deduplication, false, "Allow sending parts' UUIDs for a query in order to deduplicate data parts if any", 0) \
+    M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \
+    M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \
+    M(Bool, database_replicated_ddl_output, true, "Return table with query execution status as a result of DDL query", 0) \
     \
     /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
     \
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 24a193d9134..dc1203e8cc9 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -311,7 +311,8 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query, const Context & query_
 
     Strings hosts_to_wait = getZooKeeper()->getChildren(zookeeper_path + "/replicas");
     auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, query_context, hosts_to_wait);
-    io.in = std::move(stream);
+    if (query_context.getSettingsRef().database_replicated_ddl_output)
+        io.in = std::move(stream);
     return io;
 }
 
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 43a6ce15376..2ae97b0d82a 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -80,7 +80,6 @@ public:
 
     ClusterPtr getCluster() const;
 
-    //FIXME
     friend struct DatabaseReplicatedTask;
     friend class DatabaseReplicatedDDLWorker;
 private:
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index 8751c125383..ff15878b136 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -89,7 +89,7 @@ String DatabaseReplicatedDDLWorker::enqueueQuery(DDLLogEntry & entry)
     return node_path;
 }
 
-String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entry, const Context & /*query_context*/)
+String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entry, const Context & query_context)
 {
     /// NOTE Possibly it would be better to execute initial query on the most up-to-date node,
     /// but it requires more complex logic around /try node.
@@ -114,7 +114,7 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
     task->is_initial_query = true;
 
     LOG_DEBUG(log, "Waiting for worker thread to process all entries before {}", entry_name);
-    UInt64 timeout = 600;
+    UInt64 timeout = query_context.getSettingsRef().database_replicated_initial_query_timeout_sec;
     {
         std::unique_lock lock{mutex};
         bool processed = wait_current_task_change.wait_for(lock, std::chrono::seconds(timeout), [&]()
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index ac365dbb8d4..f08f47b1c0e 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -32,10 +32,6 @@
 
 namespace fs = std::filesystem;
 
-namespace CurrentMetrics
-{
-    extern const Metric MaxDDLEntryID;
-}
 
 namespace DB
 {
@@ -152,12 +148,14 @@ std::unique_ptr<ZooKeeperLock> createSimpleZooKeeperLock(
 
 
 DDLWorker::DDLWorker(int pool_size_, const std::string & zk_root_dir, const Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix,
-                     const String & logger_name)
+                     const String & logger_name, const CurrentMetrics::Metric * max_entry_metric_)
     : context(context_)
     , log(&Poco::Logger::get(logger_name))
     , pool_size(pool_size_)
+    , max_entry_metric(max_entry_metric_)
 {
-    CurrentMetrics::set(CurrentMetrics::MaxDDLEntryID, 0);
+    if (max_entry_metric)
+        CurrentMetrics::set(*max_entry_metric, 0);
 
     if (1 < pool_size)
     {
@@ -456,7 +454,8 @@ void DDLWorker::updateMaxDDLEntryID(const String & entry_name)
     {
         if (max_id.compare_exchange_weak(prev_id, id))
         {
-            CurrentMetrics::set(CurrentMetrics::MaxDDLEntryID, id);
+            if (max_entry_metric)
+                CurrentMetrics::set(*max_entry_metric, id);
             break;
         }
     }
@@ -596,7 +595,7 @@ void DDLWorker::processTask(DDLTaskBase & task)
 }
 
 
-bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, const StoragePtr storage)
+bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr & ast_ddl, const StoragePtr storage)
 {
     /// Pure DROP queries have to be executed on each node separately
     if (auto * query = ast_ddl->as<ASTDropQuery>(); query && query->kind != ASTDropQuery::Kind::Truncate)
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 03c80e3f669..0985884eef7 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -43,7 +43,7 @@ class DDLWorker
 {
 public:
     DDLWorker(int pool_size_, const std::string & zk_root_dir, const Context & context_, const Poco::Util::AbstractConfiguration * config, const String & prefix,
-              const String & logger_name = "DDLWorker");
+              const String & logger_name = "DDLWorker", const CurrentMetrics::Metric * max_entry_metric_ = nullptr);
     virtual ~DDLWorker();
 
     /// Pushes query into DDL queue, returns path to created node
@@ -81,7 +81,7 @@ protected:
     void updateMaxDDLEntryID(const String & entry_name);
 
     /// Check that query should be executed on leader replica only
-    static bool taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, StoragePtr storage);
+    static bool taskShouldBeExecutedOnLeader(const ASTPtr & ast_ddl, StoragePtr storage);
 
     /// Executes query only on leader replica in case of replicated table.
     /// Queries like TRUNCATE/ALTER .../OPTIMIZE have to be executed only on one node of shard.
@@ -144,6 +144,7 @@ protected:
     size_t max_tasks_in_queue = 1000;
 
     std::atomic<UInt64> max_id = 0;
+    const CurrentMetrics::Metric * max_entry_metric;
 };
 
 
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index bbe8526ae5b..2021c1f1d60 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -138,20 +138,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
         bool old_style_database = context.getSettingsRef().default_database_engine.value == DefaultDatabaseEngine::Ordinary;
         auto engine = std::make_shared<ASTFunction>();
         auto storage = std::make_shared<ASTStorage>();
-
-        //FIXME revert it before merge
-        engine->name = "Atomic";
-        if (old_style_database)
-        {
-            if (database_name == "test")
-                engine->name = "Ordinary";      // for stateful tests
-            else
-                engine = makeASTFunction("Replicated",
-                                     std::make_shared<ASTLiteral>(fmt::format("/clickhouse/db/{}/", create.database)),
-                                     std::make_shared<ASTLiteral>("s1"),
-                                     std::make_shared<ASTLiteral>("r" + toString(getpid())));
-        }
-
+        engine->name = old_style_database ? "Ordinary" : "Atomic";
         engine->no_empty_args = true;
         storage->set(storage->engine, engine);
         create.set(create.storage, storage);
@@ -221,6 +208,12 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
                         "Enable allow_experimental_database_materialize_mysql to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE);
     }
 
+    if (create.storage->engine->name == "Replicated" && !context.getSettingsRef().allow_experimental_database_replicated && !internal)
+    {
+        throw Exception("Replicated is an experimental database engine. "
+                        "Enable allow_experimental_database_replicated to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE);
+    }
+
     DatabasePtr database = DatabaseFactory::get(create, metadata_path / "", context);
 
     if (create.uuid != UUIDHelpers::Nil)
diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
index 2774f78663e..1937fbaf905 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.cpp
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -205,10 +205,6 @@ DDLQueryStatusInputStream::DDLQueryStatusInputStream(const String & zk_node_path
     addTotalRowsApprox(waiting_hosts.size());
 
     timeout_seconds = context.getSettingsRef().distributed_ddl_task_timeout;
-
-    //FIXME revert it before merge
-    if (context.getSettingsRef().default_database_engine.value == DefaultDatabaseEngine::Ordinary)
-        timeout_seconds = 10;
 }
 
 Block DDLQueryStatusInputStream::readImpl()
@@ -252,7 +248,6 @@ Block DDLQueryStatusInputStream::readImpl()
             sleepForMilliseconds(std::min<size_t>(1000, 50 * (try_number + 1)));
         }
 
-        /// TODO: add shared lock
         if (!zookeeper->exists(node_path))
         {
             throw Exception(ErrorCodes::UNFINISHED,
@@ -301,12 +296,7 @@ Block DDLQueryStatusInputStream::readImpl()
         res = sample.cloneWithColumns(std::move(columns));
     }
 
-    //FIXME revert it before merge
-    bool is_functional_tests = !by_hostname && context.getSettingsRef().default_database_engine.value == DefaultDatabaseEngine::Ordinary;
-    if (is_functional_tests)
-        return {};
-    else
-        return res;
+    return res;
 }
 
 Strings DDLQueryStatusInputStream::getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json
index 44b35d61601..0e467319285 100644
--- a/tests/ci/ci_config.json
+++ b/tests/ci/ci_config.json
@@ -261,6 +261,18 @@
                 "with_coverage": false
             }
         },
+        "Functional stateful tests (release, DatabaseReplicated)": {
+            "required_build_properties": {
+                "compiler": "clang-11",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
         "Functional stateless tests (address)": {
             "required_build_properties": {
                 "compiler": "clang-11",
@@ -381,6 +393,18 @@
                 "with_coverage": false
             }
         },
+        "Functional stateless tests (release, DatabaseReplicated)": {
+            "required_build_properties": {
+                "compiler": "clang-11",
+                "package_type": "deb",
+                "build_type": "relwithdebuginfo",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
         "Stress test (address)": {
             "required_build_properties": {
                 "compiler": "clang-11",
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index b2f3f73b6c0..64a93416c41 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -105,7 +105,9 @@ def remove_control_characters(s):
     s = re.sub(r"[\x00-\x08\x0b\x0e-\x1f\x7f]", "", s)
     return s
 
-def get_db_engine(args):
+def get_db_engine(args, database_name):
+    if args.replicated_database:
+        return " ENGINE=Replicated('/test/clickhouse/db/{}', 's1', 'r1')".format(database_name)
     if args.db_engine:
         return " ENGINE=" + args.db_engine
     return ""   # Will use default engine
@@ -128,7 +130,7 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
 
         clickhouse_proc_create = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True)
         try:
-            clickhouse_proc_create.communicate(("CREATE DATABASE " + database + get_db_engine(args)), timeout=args.timeout)
+            clickhouse_proc_create.communicate(("CREATE DATABASE " + database + get_db_engine(args, database)), timeout=args.timeout)
         except TimeoutExpired:
             total_time = (datetime.now() - start_time).total_seconds()
             return clickhouse_proc_create, "", "Timeout creating database {} before test".format(database), total_time
@@ -532,6 +534,8 @@ class BuildFlags():
     RELEASE = 'release-build'
     DATABASE_ORDINARY = 'database-ordinary'
     POLYMORPHIC_PARTS = 'polymorphic-parts'
+    ANTLR = 'antlr'
+    DATABASE_REPLICATED = 'database-replicated'
 
 
 def collect_build_flags(client):
@@ -613,7 +617,9 @@ def main(args):
 
     build_flags = collect_build_flags(args.client)
     if args.antlr:
-        build_flags.append('antlr')
+        build_flags.append(BuildFlags.ANTLR)
+    if args.replicated_database:
+        build_flags.append(BuildFlags.DATABASE_REPLICATED)
 
     if args.use_skip_list:
         tests_to_skip_from_list = collect_tests_to_skip(args.skip_list_path, build_flags)
@@ -666,10 +672,10 @@ def main(args):
 
     if args.database and args.database != "test":
         clickhouse_proc_create = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True)
-        clickhouse_proc_create.communicate(("CREATE DATABASE IF NOT EXISTS " + args.database + get_db_engine(args)))
+        clickhouse_proc_create.communicate(("CREATE DATABASE IF NOT EXISTS " + args.database + get_db_engine(args, args.database)))
 
     clickhouse_proc_create = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True)
-    clickhouse_proc_create.communicate(("CREATE DATABASE IF NOT EXISTS test" + get_db_engine(args)))
+    clickhouse_proc_create.communicate(("CREATE DATABASE IF NOT EXISTS test" + get_db_engine(args, 'test')))
 
     def is_test_from_dir(suite_dir, case):
         case_file = os.path.join(suite_dir, case)
@@ -923,6 +929,7 @@ if __name__ == '__main__':
     parser.add_argument('--skip-list-path', help="Path to skip-list file")
     parser.add_argument('--use-skip-list', action='store_true', default=False, help="Use skip list to skip tests if found")
     parser.add_argument('--db-engine', help='Database engine name')
+    parser.add_argument('--replicated-database', action='store_true', default=False, help='Run tests with Replicated database engine')
 
     parser.add_argument('--antlr', action='store_true', default=False, dest='antlr', help='Use new ANTLR parser in tests')
     parser.add_argument('--no-stateless', action='store_true', help='Disable all stateless tests')
diff --git a/tests/config/install.sh b/tests/config/install.sh
index 9965e1fb1ad..de6ba2a7a09 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -61,5 +61,8 @@ fi
 if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then
     ln -sf $SRC_PATH/users.d/database_ordinary.xml $DEST_SERVER_PATH/users.d/
 fi
+if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
+    ln -sf $SRC_PATH/users.d/database_replicated.xml $DEST_SERVER_PATH/users.d/
+fi
 
 ln -sf $SRC_PATH/client_config.xml $DEST_CLIENT_PATH/config.xml
diff --git a/tests/config/users.d/database_replicated.xml b/tests/config/users.d/database_replicated.xml
new file mode 100644
index 00000000000..23801d00154
--- /dev/null
+++ b/tests/config/users.d/database_replicated.xml
@@ -0,0 +1,10 @@
+<yandex>
+    <profiles>
+        <default>
+            <allow_experimental_database_replicated>1</allow_experimental_database_replicated>
+            <database_replicated_ddl_output>0</database_replicated_ddl_output>
+            <database_replicated_initial_query_timeout_sec>30</database_replicated_initial_query_timeout_sec>
+            <distributed_ddl_task_timeout>30</distributed_ddl_task_timeout>
+        </default>
+    </profiles>
+</yandex>
diff --git a/tests/integration/test_materialize_mysql_database/test.py b/tests/integration/test_materialize_mysql_database/test.py
index 0175ec78587..e55772d9e1d 100644
--- a/tests/integration/test_materialize_mysql_database/test.py
+++ b/tests/integration/test_materialize_mysql_database/test.py
@@ -14,7 +14,7 @@ DOCKER_COMPOSE_PATH = get_docker_compose_path()
 
 cluster = ClickHouseCluster(__file__)
 
-node_db_ordinary = cluster.add_instance('node1', user_configs=["configs/users.xml"], with_mysql=False, stay_alive=True, with_zookeeper=True)    #FIXME
+node_db_ordinary = cluster.add_instance('node1', user_configs=["configs/users.xml"], with_mysql=False, stay_alive=True)
 node_db_atomic = cluster.add_instance('node2', user_configs=["configs/users_db_atomic.xml"], with_mysql=False, stay_alive=True)
 
 
diff --git a/tests/integration/test_replicated_database/configs/settings.xml b/tests/integration/test_replicated_database/configs/settings.xml
new file mode 100644
index 00000000000..e0f7e8691e6
--- /dev/null
+++ b/tests/integration/test_replicated_database/configs/settings.xml
@@ -0,0 +1,12 @@
+<yandex>
+    <profiles>
+        <default>
+            <allow_experimental_database_replicated>1</allow_experimental_database_replicated>
+        </default>
+    </profiles>
+    <users>
+        <default>
+            <profile>default</profile>
+        </default>
+    </users>
+</yandex>
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 0db6884fbb7..99e7d6077f8 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -8,11 +8,11 @@ from helpers.network import PartitionManager
 
 cluster = ClickHouseCluster(__file__)
 
-main_node = cluster.add_instance('main_node', main_configs=['configs/config.xml'], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 1})
-dummy_node = cluster.add_instance('dummy_node', main_configs=['configs/config.xml'], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 2})
-competing_node = cluster.add_instance('competing_node', main_configs=['configs/config.xml'], with_zookeeper=True, macros={"shard": 1, "replica": 3})
-snapshotting_node = cluster.add_instance('snapshotting_node', main_configs=['configs/config.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 1})
-snapshot_recovering_node = cluster.add_instance('snapshot_recovering_node', main_configs=['configs/config.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 2})
+main_node = cluster.add_instance('main_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 1})
+dummy_node = cluster.add_instance('dummy_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 2})
+competing_node = cluster.add_instance('competing_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, macros={"shard": 1, "replica": 3})
+snapshotting_node = cluster.add_instance('snapshotting_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 1})
+snapshot_recovering_node = cluster.add_instance('snapshot_recovering_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 2})
 
 all_nodes = [main_node, dummy_node, competing_node, snapshotting_node, snapshot_recovering_node]
 
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 2317cdcecac..db7b0631b97 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -100,10 +100,15 @@
         "00604_show_create_database",
         "00609_mv_index_in_in",
         "00510_materizlized_view_and_deduplication_zookeeper",
-        "memory_tracking",     /// FIXME remove it before merge
+        "00738_lock_for_inner_table"
+    ],
+    "database-replicated": [
         "memory_tracking",
         "memory_usage",
+        "live_view",
         "01188_attach_table_from_pat",
+        "01415_sticking_mutations",
+        "01130_in_memory_parts",
         "01110_dictionary_layout_without_arguments",
         "01018_ddl_dictionaries_create",
         "01018_ddl_dictionaries_select",
@@ -167,7 +172,6 @@
         "01493_alter_remove_properties_zookeeper",
         "01475_read_subcolumns_storages",
         "01475_read_subcolumns",
-        "01463_test_alter_live_view_refresh",
         "01451_replicated_detach_drop_part",
         "01451_detach_drop_part",
         "01440_big_int_exotic_casts",
@@ -180,9 +184,6 @@
         "01355_alter_column_with_order",
         "01291_geo_types",
         "01270_optimize_skip_unused_shards_low_cardinality",
-        "01237_live_view_over_distributed_with_subquery_select_table_alias",
-        "01236_distributed_over_live_view_over_distributed",
-        "01235_live_view_over_distributed",
         "01182_materialized_view_different_structure",
         "01150_ddl_guard_rwr",
         "01148_zookeeper_path_macros_unfolding",
@@ -194,7 +195,6 @@
         "01073_attach_if_not_exists",
         "01072_optimize_skip_unused_shards_const_expr_eval",
         "01071_prohibition_secondary_index_with_old_format_merge_tree",
-        "01071_live_view_detach_dependency",
         "01062_alter_on_mutataion_zookeeper",
         "01060_shutdown_table_after_detach",
         "01056_create_table_as",
@@ -207,27 +207,6 @@
         "00989_parallel_parts_loading",
         "00980_zookeeper_merge_tree_alter_settings",
         "00980_merge_alter_settings",
-        "00980_create_temporary_live_view",
-        "00978_live_view_watch",
-        "00977_live_view_watch_events",
-        "00976_live_view_select_version",
-        "00975_live_view_create",
-        "00974_live_view_select_with_aggregation",
-        "00973_live_view_with_subquery_select_with_aggregation_in_subquery",
-        "00973_live_view_with_subquery_select_with_aggregation",
-        "00973_live_view_with_subquery_select_table_alias",
-        "00973_live_view_with_subquery_select_nested_with_aggregation_table_alias",
-        "00973_live_view_with_subquery_select_nested_with_aggregation",
-        "00973_live_view_with_subquery_select_nested",
-        "00973_live_view_with_subquery_select_join_no_alias",
-        "00973_live_view_with_subquery_select_join",
-        "00973_live_view_with_subquery_select",
-        "00973_live_view_select_prewhere",
-        "00973_live_view_select",
-        "00972_live_view_select_1",
-        "00969_live_view_watch_format_jsoneachrowwithprogress",
-        "00968_live_view_select_format_jsoneachrowwithprogress",
-        "00961_temporary_live_view_watch",
         "00955_test_final_mark",
         "00933_reserved_word",
         "00926_zookeeper_adaptive_index_granularity_replicated_merge_tree",

From ac476ad83e526d8afec591189f10c5933edf68e7 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Mon, 15 Feb 2021 14:27:16 +0300
Subject: [PATCH 1080/1238] done

---
 .../1_stateful/00158_cache_dictionary_has.reference       | 6 +++---
 tests/queries/1_stateful/00158_cache_dictionary_has.sql   | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.reference b/tests/queries/1_stateful/00158_cache_dictionary_has.reference
index f8d5cd4f53d..ad4bce6bec5 100644
--- a/tests/queries/1_stateful/00158_cache_dictionary_has.reference
+++ b/tests/queries/1_stateful/00158_cache_dictionary_has.reference
@@ -1,6 +1,6 @@
+100
 6410
-6410
-25323
+100
 25323
-1774655
+100
 1774655
diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.sql b/tests/queries/1_stateful/00158_cache_dictionary_has.sql
index 063e7843fd4..8461728c58e 100644
--- a/tests/queries/1_stateful/00158_cache_dictionary_has.sql
+++ b/tests/queries/1_stateful/00158_cache_dictionary_has.sql
@@ -6,15 +6,15 @@ CREATE DICTIONARY db_dict.cache_hits
 PRIMARY KEY WatchID
 SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hits' PASSWORD '' DB 'test'))
 LIFETIME(MIN 300 MAX 600)
-LAYOUT(CACHE(SIZE_IN_CELLS 100000 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000));
+LAYOUT(CACHE(SIZE_IN_CELLS 100 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000));
 
-SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0);
+SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0 LIMIT 100);
 SELECT count() from test.hits PREWHERE WatchID % 1400 == 0;
 
-SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 350 == 0);
+SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 350 == 0 LIMIT 100);
 SELECT count() from test.hits PREWHERE WatchID % 350 == 0;
 
-SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 5 == 0);
+SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 5 == 0 LIMIT 100);
 SELECT count() from test.hits PREWHERE WatchID % 5 == 0;
 
 DROP DICTIONARY IF EXISTS db_dict.cache_hits;

From 40e8bbc49a7dda0f5db49125dd26d28630e45a5f Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Mon, 15 Feb 2021 14:32:17 +0300
Subject: [PATCH 1081/1238] done

---
 utils/convert-month-partitioned-parts/main.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/utils/convert-month-partitioned-parts/main.cpp b/utils/convert-month-partitioned-parts/main.cpp
index 97eba631f1e..bce1e08077c 100644
--- a/utils/convert-month-partitioned-parts/main.cpp
+++ b/utils/convert-month-partitioned-parts/main.cpp
@@ -97,7 +97,6 @@ void run(String part_path, String date_column, String dest_path)
     Poco::File(new_tmp_part_path_str + "checksums.txt").setWriteable();
     WriteBufferFromFile checksums_out(new_tmp_part_path_str + "checksums.txt", 4096);
     checksums.write(checksums_out);
-    checksums.close();
 
     Poco::File(new_tmp_part_path).renameTo(new_part_path.toString());
 }

From 780cf3dbff59422cd67f063b16c81121e7ddf487 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Mon, 15 Feb 2021 14:37:07 +0300
Subject: [PATCH 1082/1238] better

---
 utils/convert-month-partitioned-parts/main.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/utils/convert-month-partitioned-parts/main.cpp b/utils/convert-month-partitioned-parts/main.cpp
index bce1e08077c..0a697937eb6 100644
--- a/utils/convert-month-partitioned-parts/main.cpp
+++ b/utils/convert-month-partitioned-parts/main.cpp
@@ -97,6 +97,8 @@ void run(String part_path, String date_column, String dest_path)
     Poco::File(new_tmp_part_path_str + "checksums.txt").setWriteable();
     WriteBufferFromFile checksums_out(new_tmp_part_path_str + "checksums.txt", 4096);
     checksums.write(checksums_out);
+    checksums_in.close();
+    checksums_out.close();
 
     Poco::File(new_tmp_part_path).renameTo(new_part_path.toString());
 }

From 3f86ce4c67371cb87263367e7eea0cc0dafaabb4 Mon Sep 17 00:00:00 2001
From: tavplubix <avtokmakov@yandex-team.ru>
Date: Mon, 15 Feb 2021 15:04:30 +0300
Subject: [PATCH 1083/1238] Update StorageReplicatedMergeTree.cpp

---
 src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 097b7679899..518577c473c 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -751,7 +751,7 @@ void StorageReplicatedMergeTree::drop()
         auto zookeeper = global_context.getZooKeeper();
 
         /// If probably there is metadata in ZooKeeper, we don't allow to drop the table.
-        if (is_readonly || !zookeeper)
+        if (!zookeeper)
             throw Exception("Can't drop readonly replicated table (need to drop data in ZooKeeper as well)", ErrorCodes::TABLE_IS_READ_ONLY);
 
         shutdown();

From d615b8e516569ddf69ad92cd3b73f6591c0b7248 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Mon, 15 Feb 2021 16:10:14 +0300
Subject: [PATCH 1084/1238] more checks

(cherry picked from commit b45168ecaf37d0061edfd12c67a8c5300d45d2e3)
---
 src/Formats/JSONEachRowUtils.cpp                    | 11 ++++++++---
 src/IO/BufferWithOwnMemory.h                        |  6 +++---
 src/Processors/Formats/Impl/CSVRowInputFormat.cpp   | 13 +++++++++----
 .../Formats/Impl/RegexpRowInputFormat.cpp           |  5 ++++-
 .../Formats/Impl/TabSeparatedRowInputFormat.cpp     |  8 +++++---
 5 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp
index 56bef9e09ea..28ba625d9fb 100644
--- a/src/Formats/JSONEachRowUtils.cpp
+++ b/src/Formats/JSONEachRowUtils.cpp
@@ -6,6 +6,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int INCORRECT_DATA;
+    extern const int LOGICAL_ERROR;
 }
 
 std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
@@ -28,7 +29,9 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
         if (quotes)
         {
             pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end());
-            if (pos == in.buffer().end())
+            if (pos > in.buffer().end())
+                throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
+            else if (pos == in.buffer().end())
                 continue;
             if (*pos == '\\')
             {
@@ -45,9 +48,11 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
         else
         {
             pos = find_first_symbols<'{', '}', '\\', '"'>(pos, in.buffer().end());
-            if (pos == in.buffer().end())
+            if (pos > in.buffer().end())
+                throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
+            else if (pos == in.buffer().end())
                 continue;
-            if (*pos == '{')
+            else if (*pos == '{')
             {
                 ++balance;
                 ++pos;
diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h
index 782eea84ed7..f8cc8b7febb 100644
--- a/src/IO/BufferWithOwnMemory.h
+++ b/src/IO/BufferWithOwnMemory.h
@@ -35,10 +35,10 @@ struct Memory : boost::noncopyable, Allocator
     char * m_data = nullptr;
     size_t alignment = 0;
 
-    Memory() {}
+    Memory() = default;
 
     /// If alignment != 0, then allocate memory aligned to specified value.
-    Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_)
+    explicit Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_)
     {
         alloc();
     }
@@ -140,7 +140,7 @@ protected:
     Memory<> memory;
 public:
     /// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership.
-    BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
+    explicit BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
         : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment)
     {
         Base::set(existing_memory ? existing_memory : memory.data(), size);
diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
index 8422f09e364..f7f08411dfa 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@@ -15,6 +15,7 @@ namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
     extern const int INCORRECT_DATA;
+    extern const int LOGICAL_ERROR;
 }
 
 
@@ -436,9 +437,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB
         if (quotes)
         {
             pos = find_first_symbols<'"'>(pos, in.buffer().end());
-            if (pos == in.buffer().end())
+            if (pos > in.buffer().end())
+                throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
+            else if (pos == in.buffer().end())
                 continue;
-            if (*pos == '"')
+            else if (*pos == '"')
             {
                 ++pos;
                 if (loadAtPosition(in, memory, pos) && *pos == '"')
@@ -450,9 +453,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB
         else
         {
             pos = find_first_symbols<'"', '\r', '\n'>(pos, in.buffer().end());
-            if (pos == in.buffer().end())
+            if (pos > in.buffer().end())
+                throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
+            else if (pos == in.buffer().end())
                 continue;
-            if (*pos == '"')
+            else if (*pos == '"')
             {
                 quotes = true;
                 ++pos;
diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
index 6e14a1dc3c8..108f4d9d321 100644
--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
@@ -11,6 +11,7 @@ namespace ErrorCodes
 {
     extern const int INCORRECT_DATA;
     extern const int BAD_ARGUMENTS;
+    extern const int LOGICAL_ERROR;
 }
 
 RegexpRowInputFormat::RegexpRowInputFormat(
@@ -182,7 +183,9 @@ static std::pair<bool, size_t> fileSegmentationEngineRegexpImpl(ReadBuffer & in,
     while (loadAtPosition(in, memory, pos) && need_more_data)
     {
         pos = find_first_symbols<'\n', '\r'>(pos, in.buffer().end());
-        if (pos == in.buffer().end())
+        if (pos > in.buffer().end())
+                throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
+        else if (pos == in.buffer().end())
             continue;
 
         // Support DOS-style newline ("\r\n")
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index 69a5e61caf2..96b01a5bd9b 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -15,6 +15,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int INCORRECT_DATA;
+    extern const int LOGICAL_ERROR;
 }
 
 
@@ -433,10 +434,11 @@ static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer
     {
         pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end());
 
-        if (pos == in.buffer().end())
+        if (pos > in.buffer().end())
+                throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
+        else if (pos == in.buffer().end())
             continue;
-
-        if (*pos == '\\')
+        else if (*pos == '\\')
         {
             ++pos;
             if (loadAtPosition(in, memory, pos))

From 812641f5a70f0912d809961f10bc6a9d39d2cb1c Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Mon, 15 Feb 2021 16:38:31 +0300
Subject: [PATCH 1085/1238] add test to arcadia skip list

---
 tests/queries/0_stateless/arcadia_skip_list.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 38d5d3871f5..b141443a979 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -189,6 +189,7 @@
 01650_fetch_patition_with_macro_in_zk_path
 01651_bugs_from_15889
 01655_agg_if_nullable
+01658_read_file_to_stringcolumn
 01182_materialized_view_different_structure
 01660_sum_ubsan
 01669_columns_declaration_serde

From d38198dade3b79bcfecbee338d719e38d2c68501 Mon Sep 17 00:00:00 2001
From: lehasm <lehasm@gmail.com>
Date: Mon, 15 Feb 2021 18:58:46 +0300
Subject: [PATCH 1086/1238] ru translation

---
 .../functions/string-functions.md             | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md
index aeb0652cc18..b1c4012e9f9 100644
--- a/docs/ru/sql-reference/functions/string-functions.md
+++ b/docs/ru/sql-reference/functions/string-functions.md
@@ -597,4 +597,47 @@ Hello, &quot;world&quot;!
 &apos;foo&apos;
 ```
 
+
+## decodeXMLComponent {#decode-xml-component}
+
+Заменяет символами предопределенные мнемоники XML: `&quot;` `&amp;` `&apos;` `&gt;` `&lt;`
+Также эта функция заменяет числовые ссылки соответствующими символами юникод.
+Поддерживаются десятичная (например, `&#10003;`) и шестнадцатеричная (`&#x2713;`) формы.
+
+**Синтаксис**
+
+``` sql
+decodeXMLComponent(x)
+```
+
+**Параметры**
+
+-   `x` — последовательность символов. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-   Строка с произведенными заменами.
+
+Тип: [String](../../sql-reference/data-types/string.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT decodeXMLComponent('&apos;foo&apos;');
+SELECT decodeXMLComponent('&lt; &#x3A3; &gt;');
+```
+
+Результат:
+
+``` text
+'foo' 
+< Σ >
+```
+
+**Смотрите также**
+
+-   [Мнемоники в HTML](https://ru.wikipedia.org/wiki/%D0%9C%D0%BD%D0%B5%D0%BC%D0%BE%D0%BD%D0%B8%D0%BA%D0%B8_%D0%B2_HTML)
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/string_functions/) <!--hide-->

From 8d11d09615bd89670594972ab36dfb6f29dafeea Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 15 Feb 2021 21:00:50 +0300
Subject: [PATCH 1087/1238] Add a test for malformed directores for Distributed
 async INSERT

---
 .../__init__.py                               |  0
 .../configs/remote_servers.xml                | 13 ++++++
 .../test.py                                   | 43 +++++++++++++++++++
 3 files changed, 56 insertions(+)
 create mode 100644 tests/integration/test_insert_distributed_async_extra_dirs/__init__.py
 create mode 100644 tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml
 create mode 100644 tests/integration/test_insert_distributed_async_extra_dirs/test.py

diff --git a/tests/integration/test_insert_distributed_async_extra_dirs/__init__.py b/tests/integration/test_insert_distributed_async_extra_dirs/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml b/tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml
new file mode 100644
index 00000000000..1df72377ce6
--- /dev/null
+++ b/tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml
@@ -0,0 +1,13 @@
+<yandex>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <replica>
+                    <host>node</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</yandex>
+
diff --git a/tests/integration/test_insert_distributed_async_extra_dirs/test.py b/tests/integration/test_insert_distributed_async_extra_dirs/test.py
new file mode 100644
index 00000000000..8365fce298d
--- /dev/null
+++ b/tests/integration/test_insert_distributed_async_extra_dirs/test.py
@@ -0,0 +1,43 @@
+# pylint: disable=unused-argument
+# pylint: disable=redefined-outer-name
+# pylint: disable=line-too-long
+
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+
+node = cluster.add_instance('node', main_configs=['configs/remote_servers.xml'], stay_alive=True)
+
+@pytest.fixture(scope='module', autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+def test_insert_distributed_async_send_success():
+    node.query('CREATE TABLE data (key Int, value String) Engine=Null()')
+    node.query("""
+    CREATE TABLE dist AS data
+    Engine=Distributed(
+        test_cluster,
+        currentDatabase(),
+        data,
+        key
+    )
+    """)
+
+    node.exec_in_container(['bash', '-c', 'mkdir /var/lib/clickhouse/data/default/dist/shard10000_replica10000'])
+    node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/data/default/dist/shard10000_replica10000/1.bin'])
+
+    node.exec_in_container(['bash', '-c', 'mkdir /var/lib/clickhouse/data/default/dist/shard1_replica10000'])
+    node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/data/default/dist/shard1_replica10000/1.bin'])
+
+    node.exec_in_container(['bash', '-c', 'mkdir /var/lib/clickhouse/data/default/dist/shard10000_replica1'])
+    node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/data/default/dist/shard10000_replica1/1.bin'])
+
+    # will check that clickhouse-server is alive
+    node.restart_clickhouse()

From 6734df2a014fd8b3b587592ecfe21244f06ef0c4 Mon Sep 17 00:00:00 2001
From: lehasm <lehasm@gmail.com>
Date: Mon, 15 Feb 2021 21:25:32 +0300
Subject: [PATCH 1088/1238] Unnecessary new lines removed

---
 docs/en/sql-reference/functions/string-functions.md | 6 ++----
 docs/ru/sql-reference/functions/string-functions.md | 3 +--
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index fa9c84fa9af..03f6237bfe8 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -602,10 +602,8 @@ Hello, &quot;world&quot;!
 
 ## decodeXMLComponent {#decode-xml-component}
 
-Replaces XML predefined entities with characters. 
-Predefined entities are `&quot;` `&amp;` `&apos;` `&gt;` `&lt;`
-This function also replaces numeric character references with Unicode characters.
-Both decimal (like `&#10003;`) and hexadecimal (`&#x2713;`) forms are supported.
+Replaces XML predefined entities with characters. Predefined entities are `&quot;` `&amp;` `&apos;` `&gt;` `&lt;`
+This function also replaces numeric character references with Unicode characters. Both decimal (like `&#10003;`) and hexadecimal (`&#x2713;`) forms are supported.
 
 **Syntax**
 
diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md
index b1c4012e9f9..236583c211a 100644
--- a/docs/ru/sql-reference/functions/string-functions.md
+++ b/docs/ru/sql-reference/functions/string-functions.md
@@ -601,8 +601,7 @@ Hello, &quot;world&quot;!
 ## decodeXMLComponent {#decode-xml-component}
 
 Заменяет символами предопределенные мнемоники XML: `&quot;` `&amp;` `&apos;` `&gt;` `&lt;`
-Также эта функция заменяет числовые ссылки соответствующими символами юникод.
-Поддерживаются десятичная (например, `&#10003;`) и шестнадцатеричная (`&#x2713;`) формы.
+Также эта функция заменяет числовые ссылки соответствующими символами юникод. Поддерживаются десятичная (например, `&#10003;`) и шестнадцатеричная (`&#x2713;`) формы.
 
 **Синтаксис**
 

From e3003add577d26444a6056a55cea30ca8b3285a6 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Mon, 15 Feb 2021 01:12:02 +0300
Subject: [PATCH 1089/1238] HashTable fix bug during resize with nonstandard
 grower

---
 src/Common/HashTable/HashTable.h      |  3 +-
 src/Common/tests/gtest_hash_table.cpp | 48 +++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h
index bf159e27731..892bd0b2ba9 100644
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@@ -539,7 +539,8 @@ protected:
           *    after transferring all the elements from the old halves you need to     [         o   x    ]
           *    process tail from the collision resolution chain immediately after it   [        o    x    ]
           */
-        for (; !buf[i].isZero(*this); ++i)
+        size_t new_size = grower.bufSize();
+        for (; i < new_size && !buf[i].isZero(*this); ++i)
         {
             size_t updated_place_value = reinsert(buf[i], buf[i].getHash(*this));
 
diff --git a/src/Common/tests/gtest_hash_table.cpp b/src/Common/tests/gtest_hash_table.cpp
index 41255dcbba1..1c673166ca9 100644
--- a/src/Common/tests/gtest_hash_table.cpp
+++ b/src/Common/tests/gtest_hash_table.cpp
@@ -317,3 +317,51 @@ TEST(HashTable, SerializationDeserialization)
         ASSERT_EQ(convertToSet(cont), convertToSet(deserialized));
     }
 }
+
+template <typename T>
+struct IdentityHash
+{
+    size_t operator()(T x) const { return x; }
+};
+
+struct OneElementResizeGrower
+{
+    /// If collision resolution chains are contiguous, we can implement erase operation by moving the elements.
+    static constexpr auto performs_linear_probing_with_single_step = true;
+
+    static constexpr size_t initial_count = 1;
+
+    size_t bufSize() const { return buf_size; }
+
+    size_t place(size_t x) const { return x % buf_size; }
+
+    size_t next(size_t pos) const { return (pos + 1) % buf_size; }
+
+    bool overflow(size_t elems) const { return elems >= buf_size; }
+
+    void increaseSize() { ++buf_size; }
+
+    void set(size_t) { }
+
+    void setBufSize(size_t buf_size_) { buf_size = buf_size_; }
+
+    size_t buf_size = initial_count;
+};
+
+TEST(HashTable, Resize)
+{
+    {
+        /// Test edge case if after resize all cells are resized in end of buf and will take half of
+        /// hash table place.
+        using HashSet = HashSet<int, IdentityHash<int>, OneElementResizeGrower>;
+        HashSet cont;
+
+        cont.insert(3);
+        cont.insert(1);
+
+        std::set<int> expected = {1, 3};
+        std::set<int> actual = convertToSet(cont);
+
+        ASSERT_EQ(actual, expected);
+    }
+}

From d08dcb1958a565ad62d2e688413c3942c20e91f6 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Mon, 15 Feb 2021 22:35:49 +0300
Subject: [PATCH 1090/1238] Update docs/en/operations/settings/settings.md

---
 docs/en/operations/settings/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index f64c623415b..963f9fa18bd 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2598,7 +2598,7 @@ Sets the probability that the ClickHouse can start a trace for executed queries
 
 Possible values:
 
--   0 — The trace for a executed queries is disabled (if no parent trace context is supplied).
+-   0 — The trace for all executed queries is disabled (if no parent trace context is supplied).
 -   Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries.
 -   1 — The trace for all executed queries is enabled.
 

From 7f21a216941ae6557e8ac5f75d9093635ec71919 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Mon, 15 Feb 2021 22:40:55 +0300
Subject: [PATCH 1091/1238] Update index.md

---
 docs/en/sql-reference/window-functions/index.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
index 46f7ed3824e..07a7f2f6978 100644
--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -15,6 +15,7 @@ ClickHouse supports the standard grammar for defining windows and window functio
 | Feature | Support or workaround |
 | --------| ----------|
 | ad hoc window specification (`count(*) over (partition by id order by time desc)`) | yes |
+| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | not supported, wrap in a subquery ([feature request](https://github.com/ClickHouse/ClickHouse/issues/19857)) |
 | `WINDOW` clause (`select ... from table window w as (partiton by id)`) | yes |
 | `ROWS` frame | yes |
 | `RANGE` frame | yes, it is the default |

From 2de6d550cc04d62c8189ca225c4016efe8c1847a Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Mon, 15 Feb 2021 22:42:10 +0300
Subject: [PATCH 1092/1238] Update index.md

---
 docs/en/sql-reference/window-functions/index.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
index 07a7f2f6978..0a19b4a8da4 100644
--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -14,15 +14,15 @@ ClickHouse supports the standard grammar for defining windows and window functio
 
 | Feature | Support or workaround |
 | --------| ----------|
-| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | yes |
+| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported |
 | expressions involving window functions, e.g. `(count(*) over ()) / 2)` | not supported, wrap in a subquery ([feature request](https://github.com/ClickHouse/ClickHouse/issues/19857)) |
-| `WINDOW` clause (`select ... from table window w as (partiton by id)`) | yes |
-| `ROWS` frame | yes |
-| `RANGE` frame | yes, it is the default |
-| `GROUPS` frame | no |
+| `WINDOW` clause (`select ... from table window w as (partiton by id)`) | supported |
+| `ROWS` frame | supported |
+| `RANGE` frame | supported, the default |
+| `GROUPS` frame | not supported |
 | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
-| `rank()`, `dense_rank()`, `row_number()` | yes |
-| `lag/lead(value, offset)` | no, replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`| 
+| `rank()`, `dense_rank()`, `row_number()` | supported |
+| `lag/lead(value, offset)` | not supported, replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`| 
 
 ## References
 

From c9dd1aa58b831835a801bb886c77ccc712febcd9 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Mon, 15 Feb 2021 22:56:26 +0300
Subject: [PATCH 1093/1238] Update index.md

---
 docs/en/sql-reference/window-functions/index.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md
index 0a19b4a8da4..cbf03a44d46 100644
--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@@ -19,6 +19,7 @@ ClickHouse supports the standard grammar for defining windows and window functio
 | `WINDOW` clause (`select ... from table window w as (partiton by id)`) | supported |
 | `ROWS` frame | supported |
 | `RANGE` frame | supported, the default |
+| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead |
 | `GROUPS` frame | not supported |
 | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
 | `rank()`, `dense_rank()`, `row_number()` | supported |

From cf57c3b4a2b1741a8f12ee41ddb29659e06876de Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Mon, 15 Feb 2021 23:00:59 +0300
Subject: [PATCH 1094/1238] update comments

---
 src/Common/ZooKeeper/ZooKeeper.cpp   |  8 ------
 src/Databases/DatabaseFactory.cpp    | 12 ++++++---
 src/Databases/DatabaseReplicated.cpp | 35 +++++++++++++++++-------
 src/Databases/DatabaseReplicated.h   | 40 ++++++++--------------------
 tests/queries/skip_list.json         |  1 +
 5 files changed, 46 insertions(+), 50 deletions(-)

diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index dc6abca6892..a1c6eb9b481 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -551,14 +551,6 @@ Coordination::Error ZooKeeper::trySet(const std::string & path, const std::strin
 
 Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses)
 {
-    String desc;
-    for (const auto & r : requests)
-    {
-        auto & r_ref = *r;
-        desc += String(typeid(r_ref).name()) + "\t" + r->getPath() + "\n";
-    }
-    LOG_TRACE(&Poco::Logger::get("ZKTX"), "zk multi {}", desc);
-
     if (requests.empty())
         return Coordination::Error::ZOK;
 
diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp
index cbe1b8bb02a..ca2b9bb083e 100644
--- a/src/Databases/DatabaseFactory.cpp
+++ b/src/Databases/DatabaseFactory.cpp
@@ -14,6 +14,7 @@
 #include <Poco/File.h>
 #include <Poco/Path.h>
 #include <Interpreters/Context.h>
+#include <Common/Macros.h>
 
 #if !defined(ARCADIA_BUILD)
 #    include "config_core.h"
@@ -196,10 +197,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
 
         const auto & arguments = engine->arguments->children;
 
-        //TODO allow macros in arguments
-        const auto & zookeeper_path = safeGetLiteralValue<String>(arguments[0], "Replicated");
-        const auto & shard_name  = safeGetLiteralValue<String>(arguments[1], "Replicated");
-        const auto & replica_name  = safeGetLiteralValue<String>(arguments[2], "Replicated");
+        String zookeeper_path = safeGetLiteralValue<String>(arguments[0], "Replicated");
+        String shard_name = safeGetLiteralValue<String>(arguments[1], "Replicated");
+        String replica_name  = safeGetLiteralValue<String>(arguments[2], "Replicated");
+
+        zookeeper_path = context.getMacros()->expand(zookeeper_path);
+        shard_name = context.getMacros()->expand(shard_name);
+        replica_name = context.getMacros()->expand(replica_name);
 
         return std::make_shared<DatabaseReplicated>(database_name, metadata_path, uuid, zookeeper_path, shard_name, replica_name, context);
     }
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index dc1203e8cc9..441880ae616 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -208,10 +208,13 @@ void DatabaseReplicated::tryConnectToZooKeeper(bool force_attach)
 
         is_readonly = false;
     }
-    catch(...)
+    catch (...)
     {
         if (!force_attach)
             throw;
+
+        /// It's server startup, ignore error.
+        /// Worker thread will try to setup ZooKeeper connection
         tryLogCurrentException(log);
     }
 }
@@ -234,10 +237,11 @@ bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperP
     Coordination::Responses responses;
     auto res = current_zookeeper->tryMulti(ops, responses);
     if (res == Coordination::Error::ZOK)
-        return true;
+        return true;    /// Created new database (it's the first replica)
     if (res == Coordination::Error::ZNODEEXISTS)
-        return false;
+        return false;   /// Database exists, we will add new replica
 
+    /// Other codes are unexpected, will throw
     zkutil::KeeperMultiException::check(res, ops, responses);
     assert(false);
     __builtin_unreachable();
@@ -285,6 +289,7 @@ BlockIO DatabaseReplicated::propose(const ASTPtr & query, const Context & query_
     if (query_context.getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY)
         throw Exception(ErrorCodes::INCORRECT_QUERY, "It's not initial query. ON CLUSTER is not allowed for Replicated database.");
 
+    /// Replicas will set correct name of current database in query context (database name can be different on replicas)
     if (auto * ddl_query = query->as<ASTQueryWithTableAndOutput>())
         ddl_query->database.clear();
 
@@ -337,6 +342,11 @@ static UUID getTableUUIDIfReplicated(const String & metadata, const Context & co
 
 void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr)
 {
+    /// Let's compare local (possibly outdated) metadata with (most actual) metadata stored in ZooKeeper
+    /// and try to update the set of local tables.
+    /// We could drop all local tables and create the new ones just like it's new replica.
+    /// But it will cause all ReplicatedMergeTree tables to fetch all data parts again and data in other tables will be lost.
+
     bool new_replica = our_log_ptr == 0;
     if (new_replica)
         LOG_INFO(log, "Will create new replica from log pointer {}", max_log_ptr);
@@ -350,7 +360,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
 
     /// For ReplicatedMergeTree tables we can compare only UUIDs to ensure that it's the same table.
     /// Metadata can be different, it's handled on table replication level.
-    /// We need to handle only renamed tables.
+    /// We need to handle renamed tables only.
     /// TODO maybe we should also update MergeTree SETTINGS if required?
     std::unordered_map<UUID, String> zk_replicated_id_to_name;
     for (const auto & zk_table : table_name_to_metadata)
@@ -360,6 +370,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
             zk_replicated_id_to_name.emplace(zk_replicated_id, zk_table.first);
     }
 
+    /// We will drop or move tables which exist only in local metadata
     Strings tables_to_detach;
     std::vector<std::pair<String, String>> replicated_tables_to_rename;
     size_t total_tables = 0;
@@ -370,12 +381,16 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
         UUID local_replicated_id = UUIDHelpers::Nil;
         if (existing_tables_it->table()->supportsReplication())
         {
+            /// Check if replicated tables have the same UUID
             local_replicated_id = existing_tables_it->table()->getStorageID().uuid;
             auto it = zk_replicated_id_to_name.find(local_replicated_id);
             if (it != zk_replicated_id_to_name.end())
             {
                 if (name != it->second)
+                {
+                    /// Need just update table name
                     replicated_tables_to_rename.emplace_back(name, it->second);
+                }
                 continue;
             }
         }
@@ -383,7 +398,8 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
         auto in_zk = table_name_to_metadata.find(name);
         if (in_zk == table_name_to_metadata.end() || in_zk->second != readMetadataFile(name))
         {
-                tables_to_detach.emplace_back(std::move(name));
+            /// Local table does not exits in ZooKeeper or has different metadata
+            tables_to_detach.emplace_back(std::move(name));
         }
     }
 
@@ -407,16 +423,14 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
     std::vector<UUID> dropped_tables;
     for (const auto & table_name : tables_to_detach)
     {
-        String to_name = fmt::format("{}_{}_{}", table_name, max_log_ptr, thread_local_rng() % 1000);
-        assert(db_name < to_db_name);
         DDLGuardPtr table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, table_name);
-        DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(to_db_name, to_name);
         if (getDatabaseName() != db_name)
             throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed, will retry");
 
         auto table = tryGetTable(table_name, global_context);
         if (isDictionaryExist(table_name))
         {
+            /// We can safely drop any dictionaries because they do not store data
             LOG_DEBUG(log, "Will DROP DICTIONARY {}", backQuoteIfNeed(table_name));
             DatabaseAtomic::removeDictionary(global_context, table_name);
             ++dropped_dicts;
@@ -430,7 +444,11 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
         }
         else
         {
+            /// Table probably stores some data. Let's move it to another database.
+            String to_name = fmt::format("{}_{}_{}", table_name, max_log_ptr, thread_local_rng() % 1000);
             LOG_DEBUG(log, "Will RENAME TABLE {} TO {}.{}", backQuoteIfNeed(table_name), backQuoteIfNeed(to_db_name), backQuoteIfNeed(to_name));
+            assert(db_name < to_db_name);
+            DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(to_db_name, to_name);
             auto to_db_ptr = DatabaseCatalog::instance().getDatabase(to_db_name);
             DatabaseAtomic::renameTable(global_context, table_name, *to_db_ptr, to_name, false, false);
             ++moved_tables;
@@ -454,7 +472,6 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
         DatabaseAtomic::renameTable(global_context, from, *this, to, false, false);
     }
 
-
     for (const auto & id : dropped_tables)
         DatabaseCatalog::instance().waitTableFinallyDropped(id);
 
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 2ae97b0d82a..83efb24a49d 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -18,28 +18,6 @@ using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
 class Cluster;
 using ClusterPtr = std::shared_ptr<Cluster>;
 
-/** DatabaseReplicated engine
-  * supports replication of metadata
-  * via DDL log being written to ZooKeeper
-  * and executed on all of the replicas
-  * for a given database.
-  *
-  * One Clickhouse server can have multiple
-  * replicated databases running and updating
-  * at the same time.
-  * 
-  * The engine has two parameters ZooKeeper path and 
-  * replica name.
-  * The same ZooKeeper path corresponds to the same
-  * database. Replica names MUST be different for all replicas
-  * of the same database.
-  *
-  * Using this engine, creation of Replicated tables
-  * requires no ZooKeeper path and replica name parameters.
-  * Table's replica name is the same as database replica name.
-  * Table's ZooKeeper path is a concatenation of database
-  * ZooKeeper path, /tables/, and UUID of the table.
-  */
 class DatabaseReplicated : public DatabaseAtomic
 {
 public:
@@ -49,6 +27,9 @@ public:
 
     ~DatabaseReplicated() override;
 
+    String getEngineName() const override { return "Replicated"; }
+
+    /// If current query is initial, then the following methods add metadata updating ZooKeeper operations to current MetadataTransaction.
     void dropTable(const Context &, const String & table_name, bool no_delay) override;
     void renameTable(const Context & context, const String & table_name, IDatabase & to_database,
                      const String & to_table_name, bool exchange, bool dictionary) override;
@@ -64,22 +45,23 @@ public:
     void removeDictionary(const Context & context, const String & dictionary_name) override;
     void detachTablePermanently(const Context & context, const String & table_name) override;
 
-    void drop(const Context & /*context*/) override;
-
-    String getEngineName() const override { return "Replicated"; }
-
+    /// Try to execute DLL query on current host as initial query. If query is succeed,
+    /// then it will be executed on all replicas.
     BlockIO propose(const ASTPtr & query, const Context & query_context);
 
     void stopReplication();
-    void shutdown() override;
-
-    void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach) override;
 
     String getFullReplicaName() const;
     static std::pair<String, String> parseFullReplicaName(const String & name);
 
+    /// Returns cluster consisting of database replicas
     ClusterPtr getCluster() const;
 
+    void drop(const Context & /*context*/) override;
+
+    void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach) override;
+    void shutdown() override;
+
     friend struct DatabaseReplicatedTask;
     friend class DatabaseReplicatedDDLWorker;
 private:
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index db7b0631b97..f28e2dd7226 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -296,6 +296,7 @@
         "01015_attach_part",
         "01015_database_bad_tables",
         "01017_uniqCombined_memory_usage",
+        "01018_ddl_dictionaries_concurrent_requrests",  /// Cannot parse ATTACH DICTIONARY IF NOT EXISTS
         "01019_alter_materialized_view_atomic",
         "01019_alter_materialized_view_consistent",
         "01019_alter_materialized_view_query",

From 5273242f8608d09bb2280c04d7670b768c21235c Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Mon, 15 Feb 2021 23:26:29 +0300
Subject: [PATCH 1095/1238] Minor changes move ON to WHERE for INNER JOIN

---
 src/Interpreters/CollectJoinOnKeysVisitor.cpp | 81 +++++++++----------
 src/Interpreters/CollectJoinOnKeysVisitor.h   |  1 -
 src/Interpreters/TreeRewriter.cpp             |  9 +--
 3 files changed, 44 insertions(+), 47 deletions(-)

diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
index ec413fe08fc..9033dd0f0f8 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
@@ -16,6 +16,26 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+namespace
+{
+
+void addAndTerm(ASTPtr & ast, const ASTPtr & term)
+{
+    if (!ast)
+        ast = term;
+    else
+        ast = makeASTFunction("and", ast, term);
+}
+
+/// If this is an inner join and the expression related to less than 2 tables, then move it to WHERE
+bool canMoveToWhere(std::pair<size_t, size_t> table_numbers, ASTTableJoin::Kind kind)
+{
+    return kind == ASTTableJoin::Kind::Inner &&
+        (table_numbers.first == table_numbers.second || table_numbers.first == 0 || table_numbers.second == 0);
+}
+
+}
+
 void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast,
                                                  const std::pair<size_t, size_t> & table_no)
 {
@@ -80,57 +100,36 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
         ASTPtr right = func.arguments->children.at(1);
         auto table_numbers = getTableNumbers(left, right, data);
 
-        if (table_numbers.first != table_numbers.second && table_numbers.first > 0 && table_numbers.second > 0)
-            data.new_on_expression_valid = true;
-
-        /**
-          * if this is an inner join and the expression related to less than 2 tables, then move it to WHERE
-          */
-        if (data.kind == ASTTableJoin::Kind::Inner
-            && (table_numbers.first == table_numbers.second || table_numbers.first == 0 || table_numbers.second == 0))
+        if (canMoveToWhere(table_numbers, data.kind))
         {
-            if (!data.new_where_conditions)
-                data.new_where_conditions = ast->clone();
-            else
-                data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone());
+            addAndTerm(data.new_where_conditions, ast);
         }
         else
         {
+            if (data.kind == ASTTableJoin::Kind::Inner)
+            {
+                addAndTerm(data.new_on_expression, ast);
+            }
             data.addJoinKeys(left, right, table_numbers);
-            if (!data.new_on_expression)
-                data.new_on_expression = ast->clone();
-            else
-                data.new_on_expression = makeASTFunction("and", data.new_on_expression, ast->clone());
         }
     }
-    else if (inequality != ASOF::Inequality::None)
+    else if (inequality != ASOF::Inequality::None && !data.is_asof)
     {
-        if (!data.is_asof)
+        ASTPtr left = func.arguments->children.at(0);
+        ASTPtr right = func.arguments->children.at(1);
+        auto table_numbers = getTableNumbers(left, right, data);
+        if (canMoveToWhere(table_numbers, data.kind))
         {
-            ASTPtr left = func.arguments->children.at(0);
-            ASTPtr right = func.arguments->children.at(1);
-            auto table_numbers = getTableNumbers(left, right, data);
-
-            if (table_numbers.first != table_numbers.second && table_numbers.first > 0 && table_numbers.second > 0)
-                data.new_on_expression_valid = true;
-
-            if (data.kind == ASTTableJoin::Kind::Inner
-                && (table_numbers.first == table_numbers.second || table_numbers.first == 0 || table_numbers.second == 0))
-            {
-                if (!data.new_where_conditions)
-                    data.new_where_conditions = ast->clone();
-                else
-                    data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone());
-
-                return;
-            }
-            else
-            {
-                throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'",
-                    ErrorCodes::NOT_IMPLEMENTED);
-            }
+            addAndTerm(data.new_where_conditions, ast);
         }
-
+        else
+        {
+            throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'",
+                ErrorCodes::NOT_IMPLEMENTED);
+        }
+    }
+    else if (inequality != ASOF::Inequality::None && data.is_asof)
+    {
         if (data.asof_left_key || data.asof_right_key)
             throw Exception("ASOF JOIN expects exactly one inequality in ON section. Unexpected '" + queryToString(ast) + "'",
                 ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h
index 64547baf7d7..aa2fd80d07c 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.h
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.h
@@ -37,7 +37,6 @@ public:
         ASTPtr new_on_expression{};
         ASTPtr new_where_conditions{};
         bool has_some{false};
-        bool new_on_expression_valid{false};
 
         void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair<size_t, size_t> & table_no);
         void addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair<size_t, size_t> & table_no,
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 9f788703704..22356622f8d 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -424,11 +424,10 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele
             throw Exception("Cannot get JOIN keys from JOIN ON section: " + queryToString(table_join.on_expression),
                             ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
         if (is_asof)
+        {
             data.asofToJoinKeys();
-        else if (!data.new_on_expression_valid)
-            throw Exception("JOIN expects left and right joined keys from two joined table in ON section. Unexpected '" + queryToString(data.new_on_expression) + "'",
-                ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
-        else if (data.new_where_conditions != nullptr)
+        }
+        else if (data.new_where_conditions && data.new_on_expression)
         {
             table_join.on_expression = data.new_on_expression;
             new_where_conditions = data.new_where_conditions;
@@ -823,7 +822,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
     setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys,
                         result.analyzed_join->table_join);
 
-    ASTPtr new_where_condition;
+    ASTPtr new_where_condition = nullptr;
     collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases, new_where_condition);
     if (new_where_condition)
         moveJoinedKeyToWhere(select_query, new_where_condition);

From 5a5542dd5c6de677044e4da0b33a9a171aeb3bba Mon Sep 17 00:00:00 2001
From: Anna <annaconda@bk.ru>
Date: Tue, 16 Feb 2021 00:03:02 +0300
Subject: [PATCH 1096/1238] Minor fixes

---
 docs/_description_templates/template-function.md     | 4 +---
 docs/_description_templates/template-system-table.md | 4 ++++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/_description_templates/template-function.md b/docs/_description_templates/template-function.md
index b69d7ed5309..2ff0ee586e8 100644
--- a/docs/_description_templates/template-function.md
+++ b/docs/_description_templates/template-function.md
@@ -19,9 +19,7 @@ More text (Optional).
 
 **Returned value(s)**
 
--   Returned values list.
-
-Type: [Type](relative/path/to/type/dscr.md#type).
+-   Returned values list. [Type name](relative/path/to/type/dscr.md#type).
 
 **Example**
 
diff --git a/docs/_description_templates/template-system-table.md b/docs/_description_templates/template-system-table.md
index 3fdf9788d79..f2decc4bb6d 100644
--- a/docs/_description_templates/template-system-table.md
+++ b/docs/_description_templates/template-system-table.md
@@ -8,10 +8,14 @@ Columns:
 
 **Example**
 
+Query:
+
 ``` sql
 SELECT * FROM system.table_name
 ```
 
+Result:
+
 ``` text
 Some output. It shouldn't be too long.
 ```

From ce1f10904e820a538a4210e7a8aea92ea9021882 Mon Sep 17 00:00:00 2001
From: Anna <annaconda@bk.ru>
Date: Tue, 16 Feb 2021 00:22:10 +0300
Subject: [PATCH 1097/1238] Global replacement `Parameters` to `Arguments`

---
 .../template-function.md                      | 10 +++-
 .../functions/array-functions.md              | 44 ++++++++--------
 .../sql-reference/functions/bit-functions.md  |  8 +--
 .../functions/bitmap-functions.md             | 38 +++++++-------
 .../functions/conditional-functions.md        |  4 +-
 .../functions/date-time-functions.md          | 26 +++++-----
 .../functions/encoding-functions.md           |  4 +-
 .../functions/encryption-functions.md         |  8 +--
 .../functions/ext-dict-functions.md           | 10 ++--
 .../functions/functions-for-nulls.md          | 14 ++---
 .../en/sql-reference/functions/geo/geohash.md |  2 +-
 docs/en/sql-reference/functions/geo/h3.md     | 10 ++--
 .../sql-reference/functions/hash-functions.md | 34 ++++++------
 .../sql-reference/functions/introspection.md  |  8 +--
 .../functions/ip-address-functions.md         |  4 +-
 .../sql-reference/functions/json-functions.md |  2 +-
 .../functions/machine-learning-functions.md   |  2 +-
 .../sql-reference/functions/math-functions.md | 18 +++----
 .../functions/other-functions.md              | 52 +++++++++----------
 .../functions/random-functions.md             |  4 +-
 .../functions/rounding-functions.md           |  4 +-
 .../functions/splitting-merging-functions.md  |  6 +--
 .../functions/string-functions.md             | 22 ++++----
 .../functions/string-search-functions.md      | 24 ++++-----
 .../functions/tuple-functions.md              |  2 +-
 .../functions/tuple-map-functions.md          |  8 +--
 .../functions/type-conversion-functions.md    | 24 ++++-----
 .../sql-reference/functions/url-functions.md  |  6 +--
 .../functions/ym-dict-functions.md            |  2 +-
 29 files changed, 203 insertions(+), 197 deletions(-)

diff --git a/docs/_description_templates/template-function.md b/docs/_description_templates/template-function.md
index 2ff0ee586e8..a0074a76ef6 100644
--- a/docs/_description_templates/template-function.md
+++ b/docs/_description_templates/template-function.md
@@ -12,14 +12,20 @@ Alias: `<alias name>`. (Optional)
 
 More text (Optional).
 
-**Parameters** (Optional)
+**Arguments** (Optional)
 
 -   `x` — Description. [Type name](relative/path/to/type/dscr.md#type).
 -   `y` — Description. [Type name](relative/path/to/type/dscr.md#type).
 
+**Parameters** (Optional, only for parametric aggregate functions)
+
+-   `z` — Description. [Type name](relative/path/to/type/dscr.md#type).
+
 **Returned value(s)**
 
--   Returned values list. [Type name](relative/path/to/type/dscr.md#type).
+-   Returned values list. 
+
+Type: [Type name](relative/path/to/type/dscr.md#type).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index d5b357795d7..c9c418d57a4 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -61,7 +61,7 @@ Combines arrays passed as arguments.
 arrayConcat(arrays)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type.
     **Example**
@@ -111,7 +111,7 @@ Checks whether one array is a subset of another.
 hasAll(set, subset)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `set` – Array of any type with a set of elements.
 -   `subset` – Array of any type with elements that should be tested to be a subset of `set`.
@@ -149,7 +149,7 @@ Checks whether two arrays have intersection by some elements.
 hasAny(array1, array2)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array1` – Array of any type with a set of elements.
 -   `array2` – Array of any type with a set of elements.
@@ -191,7 +191,7 @@ For Example:
 - `hasSubstr([1,2,3,4], [2,3])` returns 1. However, `hasSubstr([1,2,3,4], [3,2])` will return `0`.
 - `hasSubstr([1,2,3,4], [1,2,3])` returns 1. However, `hasSubstr([1,2,3,4], [1,2,4])` will return `0`.
 
-**Parameters**
+**Arguments**
 
 -   `array1` – Array of any type with a set of elements.
 -   `array2` – Array of any type with a set of elements.
@@ -369,7 +369,7 @@ Removes the last item from the array.
 arrayPopBack(array)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – Array.
 
@@ -393,7 +393,7 @@ Removes the first item from the array.
 arrayPopFront(array)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – Array.
 
@@ -417,7 +417,7 @@ Adds one item to the end of the array.
 arrayPushBack(array, single_value)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – Array.
 -   `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`.
@@ -442,7 +442,7 @@ Adds one element to the beginning of the array.
 arrayPushFront(array, single_value)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – Array.
 -   `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`.
@@ -467,7 +467,7 @@ Changes the length of the array.
 arrayResize(array, size[, extender])
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `array` — Array.
 -   `size` — Required length of the array.
@@ -509,7 +509,7 @@ Returns a slice of the array.
 arraySlice(array, offset[, length])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – Array of data.
 -   `offset` – Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1.
@@ -751,7 +751,7 @@ Calculates the difference between adjacent array elements. Returns an array wher
 arrayDifference(array)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/).
 
@@ -803,7 +803,7 @@ Takes an array, returns an array containing the distinct elements only.
 arrayDistinct(array)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/).
 
@@ -871,7 +871,7 @@ Applies an aggregate function to array elements and returns its result. The name
 arrayReduce(agg_func, arr1, arr2, ..., arrN)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md).
 -   `arr` — Any number of [array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function.
@@ -936,7 +936,7 @@ Applies an aggregate function to array elements in given ranges and returns an a
 arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md).
 -   `ranges` — The ranges to aggretate which should be an [array](../../sql-reference/data-types/array.md) of [tuples](../../sql-reference/data-types/tuple.md) which containing the index and the length of each range.
@@ -1007,7 +1007,7 @@ flatten(array_of_arrays)
 
 Alias: `flatten`.
 
-**Parameters**
+**Arguments**
 
 -   `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`.
 
@@ -1033,7 +1033,7 @@ Removes consecutive duplicate elements from an array. The order of result values
 arrayCompact(arr)
 ```
 
-**Parameters**
+**Arguments**
 
 `arr` — The [array](../../sql-reference/data-types/array.md) to inspect.
 
@@ -1069,7 +1069,7 @@ Combines multiple arrays into a single array. The resulting array contains the c
 arrayZip(arr1, arr2, ..., arrN)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `arrN` — [Array](../../sql-reference/data-types/array.md).
 
@@ -1107,7 +1107,7 @@ Calculate AUC (Area Under the Curve, which is a concept in machine learning, see
 arrayAUC(arr_scores, arr_labels)
 ```
 
-**Parameters**
+**Arguments**
 - `arr_scores` — scores prediction model gives.
 - `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negtive sample.
 
@@ -1302,7 +1302,7 @@ Note that the `arrayMin` is a [higher-order function](../../sql-reference/functi
 arrayMin([func,] arr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
 -   `arr` — Array. [Array](../../sql-reference/data-types/array.md).
@@ -1357,7 +1357,7 @@ Note that the `arrayMax` is a [higher-order function](../../sql-reference/functi
 arrayMax([func,] arr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
 -   `arr` — Array. [Array](../../sql-reference/data-types/array.md).
@@ -1412,7 +1412,7 @@ Note that the `arraySum` is a [higher-order function](../../sql-reference/functi
 arraySum([func,] arr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
 -   `arr` — Array. [Array](../../sql-reference/data-types/array.md).   
@@ -1467,7 +1467,7 @@ Note that the `arrayAvg` is a [higher-order function](../../sql-reference/functi
 arrayAvg([func,] arr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
 -   `arr` — Array. [Array](../../sql-reference/data-types/array.md).   
diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md
index 57c2ae42ada..a3d0c82d8ab 100644
--- a/docs/en/sql-reference/functions/bit-functions.md
+++ b/docs/en/sql-reference/functions/bit-functions.md
@@ -35,7 +35,7 @@ Takes any integer and converts it into [binary form](https://en.wikipedia.org/wi
 SELECT bitTest(number, index)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `number` – integer number.
 -   `index` – position of bit.
@@ -100,7 +100,7 @@ The conjuction for bitwise operations:
 SELECT bitTestAll(number, index1, index2, index3, index4, ...)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `number` – integer number.
 -   `index1`, `index2`, `index3`, `index4` – positions of bit. For example, for set of positions (`index1`, `index2`, `index3`, `index4`) is true if and only if all of its positions are true (`index1` ⋀ `index2`, ⋀ `index3` ⋀ `index4`).
@@ -165,7 +165,7 @@ The disjunction for bitwise operations:
 SELECT bitTestAny(number, index1, index2, index3, index4, ...)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `number` – integer number.
 -   `index1`, `index2`, `index3`, `index4` – positions of bit.
@@ -220,7 +220,7 @@ Calculates the number of bits set to one in the binary representation of a numbe
 bitCount(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — [Integer](../../sql-reference/data-types/int-uint.md) or [floating-point](../../sql-reference/data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers.
 
diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md
index a66098beffb..bfff70576f2 100644
--- a/docs/en/sql-reference/functions/bitmap-functions.md
+++ b/docs/en/sql-reference/functions/bitmap-functions.md
@@ -21,7 +21,7 @@ Build a bitmap from unsigned integer array.
 bitmapBuild(array)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `array` – unsigned integer array.
 
@@ -45,7 +45,7 @@ Convert bitmap to integer array.
 bitmapToArray(bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -69,7 +69,7 @@ Return subset in specified range (not include the range_end).
 bitmapSubsetInRange(bitmap, range_start, range_end)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
 -   `range_start` – range start point. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
@@ -97,7 +97,7 @@ Creates a subset of bitmap with n elements taken between `range_start` and `card
 bitmapSubsetLimit(bitmap, range_start, cardinality_limit)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
 -   `range_start` – The subset starting point. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
@@ -133,7 +133,7 @@ Checks whether the bitmap contains an element.
 bitmapContains(haystack, needle)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` – [Bitmap object](#bitmap_functions-bitmapbuild), where the function searches.
 -   `needle` – Value that the function searches. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
@@ -167,7 +167,7 @@ bitmapHasAny(bitmap1, bitmap2)
 
 If you are sure that `bitmap2` contains strictly one element, consider using the [bitmapContains](#bitmap_functions-bitmapcontains) function. It works more efficiently.
 
-**Parameters**
+**Arguments**
 
 -   `bitmap*` – bitmap object.
 
@@ -197,7 +197,7 @@ If the second argument is an empty bitmap then returns 1.
 bitmapHasAll(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -221,7 +221,7 @@ Retrun bitmap cardinality of type UInt64.
 bitmapCardinality(bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -243,7 +243,7 @@ Retrun the smallest value of type UInt64 in the set, UINT32_MAX if the set is em
 
     bitmapMin(bitmap)
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -263,7 +263,7 @@ Retrun the greatest value of type UInt64 in the set, 0 if the set is empty.
 
     bitmapMax(bitmap)
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -283,7 +283,7 @@ Transform an array of values in a bitmap to another array of values, the result
 
     bitmapTransform(bitmap, from_array, to_array)
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 -   `from_array` – UInt32 array. For idx in range \[0, from_array.size()), if bitmap contains from_array\[idx\], then replace it with to_array\[idx\]. Note that the result depends on array ordering if there are common elements between from_array and to_array.
@@ -307,7 +307,7 @@ Two bitmap and calculation, the result is a new bitmap.
 bitmapAnd(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -331,7 +331,7 @@ Two bitmap or calculation, the result is a new bitmap.
 bitmapOr(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -355,7 +355,7 @@ Two bitmap xor calculation, the result is a new bitmap.
 bitmapXor(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -379,7 +379,7 @@ Two bitmap andnot calculation, the result is a new bitmap.
 bitmapAndnot(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -403,7 +403,7 @@ Two bitmap and calculation, return cardinality of type UInt64.
 bitmapAndCardinality(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -427,7 +427,7 @@ Two bitmap or calculation, return cardinality of type UInt64.
 bitmapOrCardinality(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -451,7 +451,7 @@ Two bitmap xor calculation, return cardinality of type UInt64.
 bitmapXorCardinality(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
@@ -475,7 +475,7 @@ Two bitmap andnot calculation, return cardinality of type UInt64.
 bitmapAndnotCardinality(bitmap,bitmap)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `bitmap` – bitmap object.
 
diff --git a/docs/en/sql-reference/functions/conditional-functions.md b/docs/en/sql-reference/functions/conditional-functions.md
index 446a4729ff2..2d57cbb3bd5 100644
--- a/docs/en/sql-reference/functions/conditional-functions.md
+++ b/docs/en/sql-reference/functions/conditional-functions.md
@@ -17,7 +17,7 @@ SELECT if(cond, then, else)
 
 If the condition `cond` evaluates to a non-zero value, returns the result of the expression `then`, and the result of the expression `else`, if present, is skipped. If the `cond` is zero or `NULL`, then the result of the `then` expression is skipped and the result of the `else` expression, if present, is returned.
 
-**Parameters**
+**Arguments**
 
 -   `cond` – The condition for evaluation that can be zero or not. The type is UInt8, Nullable(UInt8) or NULL.
 -   `then` - The expression to return if condition is met.
@@ -117,7 +117,7 @@ Allows you to write the [CASE](../../sql-reference/operators/index.md#operator_c
 
 Syntax: `multiIf(cond_1, then_1, cond_2, then_2, ..., else)`
 
-**Parameters:**
+**Arguments:**
 
 -   `cond_N` — The condition for the function to return `then_N`.
 -   `then_N` — The result of the function when executed.
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 4a73bdb2546..f26e1bee6c9 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -186,7 +186,7 @@ Truncates sub-seconds.
 toStartOfSecond(value[, timezone])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
 -   `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
@@ -328,7 +328,7 @@ For mode values with a meaning of “contains January 1”, the week contains Ja
 toWeek(date, [, mode][, Timezone])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `date` – Date or DateTime.
 -   `mode` – Optional parameter, Range of values is \[0,9\], default is 0.
@@ -378,7 +378,7 @@ date_trunc(unit, value[, timezone])
 
 Alias: `dateTrunc`. 
 
-**Parameters**
+**Arguments**
 
 -   `unit` — The type of interval to truncate the result. [String Literal](../syntax.md#syntax-string-literal).
     Possible values:
@@ -447,7 +447,7 @@ date_add(unit, value, date)
 
 Aliases: `dateAdd`, `DATE_ADD`. 
 
-**Parameters**
+**Arguments**
 
 -   `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
 
@@ -484,7 +484,7 @@ date_diff('unit', startdate, enddate, [timezone])
 
 Aliases: `dateDiff`, `DATE_DIFF`. 
 
-**Parameters**
+**Arguments**
 
 -   `unit` — The type of interval for result [String](../../sql-reference/data-types/string.md).
 
@@ -530,7 +530,7 @@ date_sub(unit, value, date)
 
 Aliases: `dateSub`, `DATE_SUB`. 
 
-**Parameters**
+**Arguments**
 
 -   `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md).
 
@@ -570,7 +570,7 @@ timestamp_add(date, INTERVAL value unit)
 
 Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. 
 
-**Parameters**
+**Arguments**
     
 -   `date` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 -   `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md)
@@ -606,7 +606,7 @@ timestamp_sub(unit, value, date)
 
 Aliases: `timeStampSub`, `TIMESTAMP_SUB`. 
 
-**Parameters**
+**Arguments**
 
 -   `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
 
@@ -640,7 +640,7 @@ Returns the current date and time.
 now([timezone])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md).
 
@@ -855,7 +855,7 @@ Converts a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Prolepti
 toModifiedJulianDay(date)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
 
@@ -891,7 +891,7 @@ Similar to [toModifiedJulianDay()](#tomodifiedjulianday), but instead of raising
 toModifiedJulianDayOrNull(date)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
 
@@ -927,7 +927,7 @@ Converts a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Varian
 fromModifiedJulianDay(day)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md).
 
@@ -963,7 +963,7 @@ Similar to [fromModifiedJulianDayOrNull()](#frommodifiedjuliandayornull), but in
 fromModifiedJulianDayOrNull(day)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md).
 
diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md
index bc3f5ca4345..31e84c08b39 100644
--- a/docs/en/sql-reference/functions/encoding-functions.md
+++ b/docs/en/sql-reference/functions/encoding-functions.md
@@ -15,7 +15,7 @@ Returns the string with the length as the number of passed arguments and each by
 char(number_1, [number_2, ..., number_n]);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md).
 
@@ -107,7 +107,7 @@ For `String` and `FixedString`, all bytes are simply encoded as two hexadecimal
 
 Values of floating point and Decimal types are encoded as their representation in memory. As we support little endian architecture, they are encoded in little endian. Zero leading/trailing bytes are not omitted.
 
-**Parameters**
+**Arguments**
 
 -   `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
 
diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md
index 9e360abfe26..0dd7469b25e 100644
--- a/docs/en/sql-reference/functions/encryption-functions.md
+++ b/docs/en/sql-reference/functions/encryption-functions.md
@@ -31,7 +31,7 @@ This function encrypts data using these modes:
 encrypt('mode', 'plaintext', 'key' [, iv, aad])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
 -   `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
@@ -127,7 +127,7 @@ Supported encryption modes:
 aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
 -   `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
@@ -238,7 +238,7 @@ This function decrypts ciphertext into a plaintext using these modes:
 decrypt('mode', 'ciphertext', 'key' [, iv, aad])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
 -   `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
@@ -317,7 +317,7 @@ Supported decryption modes:
 aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
 -   `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md
index 7df6ef54f2a..834fcdf8282 100644
--- a/docs/en/sql-reference/functions/ext-dict-functions.md
+++ b/docs/en/sql-reference/functions/ext-dict-functions.md
@@ -19,7 +19,7 @@ dictGet('dict_name', 'attr_name', id_expr)
 dictGetOrDefault('dict_name', 'attr_name', id_expr, default_value_expr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
 -   `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
@@ -108,7 +108,7 @@ Checks whether a key is present in a dictionary.
 dictHas('dict_name', id_expr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
 -   `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration.
@@ -130,7 +130,7 @@ Creates an array, containing all the parents of a key in the [hierarchical dicti
 dictGetHierarchy('dict_name', key)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
 -   `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value.
@@ -149,7 +149,7 @@ Checks the ancestor of a key through the whole hierarchical chain in the diction
 dictIsIn('dict_name', child_id_expr, ancestor_id_expr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
 -   `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value.
@@ -185,7 +185,7 @@ dictGet[Type]('dict_name', 'attr_name', id_expr)
 dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
 -   `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md
index c32af7194fb..df75e96c8fb 100644
--- a/docs/en/sql-reference/functions/functions-for-nulls.md
+++ b/docs/en/sql-reference/functions/functions-for-nulls.md
@@ -13,7 +13,7 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal
 isNull(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — A value with a non-compound data type.
 
@@ -53,7 +53,7 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal
 isNotNull(x)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `x` — A value with a non-compound data type.
 
@@ -93,7 +93,7 @@ Checks from left to right whether `NULL` arguments were passed and returns the f
 coalesce(x,...)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   Any number of parameters of a non-compound type. All parameters must be compatible by data type.
 
@@ -136,7 +136,7 @@ Returns an alternative value if the main argument is `NULL`.
 ifNull(x,alt)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `x` — The value to check for `NULL`.
 -   `alt` — The value that the function returns if `x` is `NULL`.
@@ -176,7 +176,7 @@ Returns `NULL` if the arguments are equal.
 nullIf(x, y)
 ```
 
-**Parameters:**
+**Arguments:**
 
 `x`, `y` — Values for comparison. They must be compatible types, or ClickHouse will generate an exception.
 
@@ -215,7 +215,7 @@ Results in a value of type [Nullable](../../sql-reference/data-types/nullable.md
 assumeNotNull(x)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `x` — The original value.
 
@@ -277,7 +277,7 @@ Converts the argument type to `Nullable`.
 toNullable(x)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `x` — The value of any non-compound type.
 
diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md
index 6f288a7687d..c27eab0b421 100644
--- a/docs/en/sql-reference/functions/geo/geohash.md
+++ b/docs/en/sql-reference/functions/geo/geohash.md
@@ -72,7 +72,7 @@ Returns an array of [geohash](#geohash)-encoded strings of given precision that
 geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md).
 -   `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md).
diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md
index 4ed651e4e9e..9dda947b3a7 100644
--- a/docs/en/sql-reference/functions/geo/h3.md
+++ b/docs/en/sql-reference/functions/geo/h3.md
@@ -162,7 +162,7 @@ Returns [H3](#h3index) point index `(lon, lat)` with specified resolution.
 geoToH3(lon, lat, resolution)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md).
 -   `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md).
@@ -201,7 +201,7 @@ Result:
 h3kRing(h3index, k)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
 -   `k` — Raduis. Type: [integer](../../../sql-reference/data-types/int-uint.md)
@@ -315,7 +315,7 @@ Returns whether or not the provided [H3](#h3index) indexes are neighbors.
 h3IndexesAreNeighbors(index1, index2)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
 -   `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
@@ -353,7 +353,7 @@ Returns an array of child indexes for the given [H3](#h3index) index.
 h3ToChildren(index, resolution)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
 -   `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
@@ -390,7 +390,7 @@ Returns the parent (coarser) index containing the given [H3](#h3index) index.
 h3ToParent(index, resolution)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
 -   `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 9394426b20b..465ad01527f 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -18,9 +18,9 @@ halfMD5(par1, ...)
 The function is relatively slow (5 million short strings per second per processor core).
 Consider using the [sipHash64](#hash_functions-siphash64) function instead.
 
-**Parameters**
+**Arguments**
 
-The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -61,9 +61,9 @@ Function [interprets](../../sql-reference/functions/type-conversion-functions.md
 3.  Then the function takes the hash value, calculated at the previous step, and the third element of the initial hash array, and calculates a hash for the array of them.
 4.  The previous step is repeated for all the remaining elements of the initial hash array.
 
-**Parameters**
+**Arguments**
 
-The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -97,9 +97,9 @@ cityHash64(par1,...)
 
 This is a fast non-cryptographic hash function. It uses the CityHash algorithm for string parameters and implementation-specific fast non-cryptographic hash function for parameters with other data types. The function uses the CityHash combinator to get the final results.
 
-**Parameters**
+**Arguments**
 
-The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -166,9 +166,9 @@ farmHash64(par1, ...)
 
 These functions use the `Fingerprint64` and `Hash64` methods respectively from all [available methods](https://github.com/google/farmhash/blob/master/src/farmhash.h).
 
-**Parameters**
+**Arguments**
 
-The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -226,7 +226,7 @@ Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add97
 javaHashUTF16LE(stringUtf16le)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `stringUtf16le` — a string in UTF-16LE encoding.
 
@@ -292,9 +292,9 @@ Produces a 64-bit [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/
 metroHash64(par1, ...)
 ```
 
-**Parameters**
+**Arguments**
 
-The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -327,9 +327,9 @@ murmurHash2_32(par1, ...)
 murmurHash2_64(par1, ...)
 ```
 
-**Parameters**
+**Arguments**
 
-Both functions take a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -358,7 +358,7 @@ Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash val
 gccMurmurHash(par1, ...);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `par1, ...` — A variable number of parameters that can be any of the [supported data types](../../sql-reference/data-types/index.md#data_types).
 
@@ -395,9 +395,9 @@ murmurHash3_32(par1, ...)
 murmurHash3_64(par1, ...)
 ```
 
-**Parameters**
+**Arguments**
 
-Both functions take a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md).
+Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
 
 **Returned Value**
 
@@ -424,7 +424,7 @@ Produces a 128-bit [MurmurHash3](https://github.com/aappleby/smhasher) hash valu
 murmurHash3_128( expr )
 ```
 
-**Parameters**
+**Arguments**
 
 -   `expr` — [Expressions](../../sql-reference/syntax.md#syntax-expressions) returning a [String](../../sql-reference/data-types/string.md)-type value.
 
diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md
index bfa1998d68a..964265a461b 100644
--- a/docs/en/sql-reference/functions/introspection.md
+++ b/docs/en/sql-reference/functions/introspection.md
@@ -32,7 +32,7 @@ If you use official ClickHouse packages, you need to install the `clickhouse-com
 addressToLine(address_of_binary_instruction)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process.
 
@@ -123,7 +123,7 @@ Converts virtual memory address inside ClickHouse server process to the symbol f
 addressToSymbol(address_of_binary_instruction)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process.
 
@@ -220,7 +220,7 @@ Converts a symbol that you can get using the [addressToSymbol](#addresstosymbol)
 demangle(symbol)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol from an object file.
 
@@ -345,7 +345,7 @@ Emits trace log message to server log for each [Block](https://clickhouse.tech/d
 logTrace('message')
 ```
 
-**Parameters**
+**Arguments**
 
 -   `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string).
 
diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md
index 0c1f675304b..eaea5e250fb 100644
--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@@ -275,7 +275,7 @@ Determines whether the input string is an IPv4 address or not. If `string` is IP
 isIPv4String(string)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `string` — IP address. [String](../../sql-reference/data-types/string.md).
 
@@ -313,7 +313,7 @@ Determines whether the input string is an IPv6 address or not. If `string` is IP
 isIPv6String(string)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `string` — IP address. [String](../../sql-reference/data-types/string.md).
 
diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md
index 05e755eaddc..edee048eb77 100644
--- a/docs/en/sql-reference/functions/json-functions.md
+++ b/docs/en/sql-reference/functions/json-functions.md
@@ -236,7 +236,7 @@ Extracts raw data from a JSON object.
 JSONExtractKeysAndValuesRaw(json[, p, a, t, h])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `json` — [String](../../sql-reference/data-types/string.md) with valid JSON.
 -   `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter.
diff --git a/docs/en/sql-reference/functions/machine-learning-functions.md b/docs/en/sql-reference/functions/machine-learning-functions.md
index 8627fc26bad..f103a4ea421 100644
--- a/docs/en/sql-reference/functions/machine-learning-functions.md
+++ b/docs/en/sql-reference/functions/machine-learning-functions.md
@@ -27,7 +27,7 @@ Compares test groups (variants) and calculates for each group the probability to
 bayesAB(distribution_name, higher_is_better, variant_names, x, y)
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `distribution_name` — Name of the probability distribution. [String](../../sql-reference/data-types/string.md). Possible values:
 
diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md
index 8dc287593c7..f56a721c0c0 100644
--- a/docs/en/sql-reference/functions/math-functions.md
+++ b/docs/en/sql-reference/functions/math-functions.md
@@ -121,7 +121,7 @@ Accepts a numeric argument and returns a UInt64 number close to 10 to the power
 cosh(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 
@@ -157,7 +157,7 @@ Result:
 acosh(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 
@@ -197,7 +197,7 @@ Result:
 sinh(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 
@@ -233,7 +233,7 @@ Result:
 asinh(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 
@@ -273,7 +273,7 @@ Result:
 atanh(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 
@@ -309,7 +309,7 @@ The [function](https://en.wikipedia.org/wiki/Atan2) calculates the angle in the
 atan2(y, x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `y` — y-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 -   `x` — x-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64).
@@ -346,7 +346,7 @@ Calculates the length of the hypotenuse of a right-angle triangle. The [function
 hypot(x, y)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — The first cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 -   `y` — The second cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64).
@@ -383,7 +383,7 @@ Calculates `log(1+x)`. The [function](https://en.wikipedia.org/wiki/Natural_loga
 log1p(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Values from the interval: `-1 < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 
@@ -423,7 +423,7 @@ The `sign` function can extract the sign of a real number.
 sign(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Values from  `-∞` to `+∞`. Support all numeric types in ClickHouse.
 
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 8f25ce023df..dcbb7d1ffeb 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -19,7 +19,7 @@ Gets a named value from the [macros](../../operations/server-configuration-param
 getMacro(name);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `name` — Name to retrieve from the `macros` section. [String](../../sql-reference/data-types/string.md#string).
 
@@ -108,7 +108,7 @@ Extracts the trailing part of a string after the last slash or backslash. This f
 basename( expr )
 ```
 
-**Parameters**
+**Arguments**
 
 -   `expr` — Expression resulting in a [String](../../sql-reference/data-types/string.md) type value. All the backslashes must be escaped in the resulting value.
 
@@ -192,7 +192,7 @@ Returns estimation of uncompressed byte size of its arguments in memory.
 byteSize(argument [, ...])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `argument` — Value.
 
@@ -349,7 +349,7 @@ The function is intended for development, debugging and demonstration.
 isConstant(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Expression to check.
 
@@ -420,7 +420,7 @@ Checks whether floating point value is finite.
 
     ifNotFinite(x,y)
 
-**Parameters**
+**Arguments**
 
 -   `x` — Value to be checked for infinity. Type: [Float\*](../../sql-reference/data-types/float.md).
 -   `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md).
@@ -460,7 +460,7 @@ Allows building a unicode-art diagram.
 
 `bar(x, min, max, width)` draws a band with a width proportional to `(x - min)` and equal to `width` characters when `x = max`.
 
-Parameters:
+Arguments:
 
 -   `x` — Size to display.
 -   `min, max` — Integer constants. The value must fit in `Int64`.
@@ -645,7 +645,7 @@ Accepts the time delta in seconds. Returns a time delta with (year, month, day,
 formatReadableTimeDelta(column[, maximum_unit])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `column` — A column with numeric time delta.
 -   `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years.
@@ -730,7 +730,7 @@ The result of the function depends on the affected data blocks and the order of
 The rows order used during the calculation of `neighbor` can differ from the order of rows returned to the user.
 To prevent that you can make a subquery with ORDER BY and call the function from outside the subquery.
 
-**Parameters**
+**Arguments**
 
 -   `column` — A column name or scalar expression.
 -   `offset` — The number of rows forwards or backwards from the current row of `column`. [Int64](../../sql-reference/data-types/int-uint.md).
@@ -924,7 +924,7 @@ The result of the function depends on the order of data in the block. It assumes
 runningConcurrency(begin, end)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `begin` — A column for the beginning time of events (inclusive). [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
 -   `end` — A column for the ending time of events (exclusive).  [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
@@ -989,7 +989,7 @@ Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md).
 getSizeOfEnumType(value)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `value` — Value of type `Enum`.
 
@@ -1018,7 +1018,7 @@ Returns size on disk (without taking into account compression).
 blockSerializedSize(value[, value[, ...]])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `value` — Any value.
 
@@ -1050,7 +1050,7 @@ Returns the name of the class that represents the data type of the column in RAM
 toColumnTypeName(value)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `value` — Any type of value.
 
@@ -1090,7 +1090,7 @@ Outputs a detailed description of data structures in RAM
 dumpColumnStructure(value)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `value` — Any type of value.
 
@@ -1120,7 +1120,7 @@ Does not include default values for custom columns set by the user.
 defaultValueOfArgumentType(expression)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `expression` — Arbitrary type of value or an expression that results in a value of an arbitrary type.
 
@@ -1162,7 +1162,7 @@ Does not include default values for custom columns set by the user.
 defaultValueOfTypeName(type)
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `type` — A string representing a type name.
 
@@ -1204,7 +1204,7 @@ Used for internal implementation of [arrayJoin](../../sql-reference/functions/ar
 SELECT replicate(x, arr);
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `arr` — Original array. ClickHouse creates a new array of the same length as the original and fills it with the value `x`.
 -   `x` — The value that the resulting array will be filled with.
@@ -1337,7 +1337,7 @@ Takes state of aggregate function. Returns result of aggregation (or finalized s
 finalizeAggregation(state)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `state` — State of aggregation. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction).
 
@@ -1441,7 +1441,7 @@ Accumulates states of an aggregate function for each row of a data block.
 runningAccumulate(agg_state[, grouping]);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction).
 -   `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined.
@@ -1547,7 +1547,7 @@ Only supports tables created with the `ENGINE = Join(ANY, LEFT, <join_keys>)` st
 joinGet(join_storage_table_name, `value_column`, join_keys)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicates where search is performed. The identifier is searched in the default database (see parameter `default_database` in the config file). To override the default database, use the `USE db_name` or specify the database and the table through the separator `db_name.db_table`, see the example.
 -   `value_column` — name of the column of the table that contains required data.
@@ -1651,7 +1651,7 @@ Generates a string with a random set of [ASCII](https://en.wikipedia.org/wiki/AS
 randomPrintableASCII(length)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `length` — Resulting string length. Positive integer.
 
@@ -1687,7 +1687,7 @@ Generates a binary string of the specified length filled with random bytes (incl
 randomString(length)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `length` — String length. Positive integer.
 
@@ -1735,7 +1735,7 @@ Generates a binary string of the specified length filled with random bytes (incl
 randomFixedString(length);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
 
@@ -1773,7 +1773,7 @@ Generates a random string of a specified length. Result string contains valid UT
 randomStringUTF8(length);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `length` — Required length of the resulting string in code points. [UInt64](../../sql-reference/data-types/int-uint.md).
 
@@ -1845,7 +1845,7 @@ Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is
 isDecimalOverflow(d, [p])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `d` — value. [Decimal](../../sql-reference/data-types/decimal.md).
 -   `p` — precision. Optional. If omitted, the initial precision of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
@@ -1882,7 +1882,7 @@ Returns number of decimal digits you need to represent the value.
 countDigits(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value.
 
@@ -1941,7 +1941,7 @@ Returns [native interface](../../interfaces/tcp.md) TCP port number listened by
 tcpPort()
 ```
 
-**Parameters**
+**Arguments**
 
 -   None.
 
diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md
index 68998928398..2b9846344e4 100644
--- a/docs/en/sql-reference/functions/random-functions.md
+++ b/docs/en/sql-reference/functions/random-functions.md
@@ -32,7 +32,7 @@ Produces a constant column with a random value.
 randConstant([x])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter.
 
@@ -81,7 +81,7 @@ fuzzBits([s], [prob])
 
 Inverts bits of `s`, each with probability `prob`.
 
-**Parameters**
+**Arguments**
 - `s` - `String` or `FixedString`
 - `prob` - constant `Float32/64`
 
diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md
index 922cf7374d7..83db1975366 100644
--- a/docs/en/sql-reference/functions/rounding-functions.md
+++ b/docs/en/sql-reference/functions/rounding-functions.md
@@ -35,7 +35,7 @@ The function returns the nearest number of the specified order. In case when giv
 round(expression [, decimal_places])
 ```
 
-**Parameters:**
+**Arguments:**
 
 -   `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types).
 -   `decimal-places` — An integer value.
@@ -114,7 +114,7 @@ For example, sum numbers 1.5, 2.5, 3.5, 4.5 with different rounding:
 roundBankers(expression [, decimal_places])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types).
 -   `decimal-places` — Decimal places. An integer number.
diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md
index 25f41211b47..c70ee20f076 100644
--- a/docs/en/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/en/sql-reference/functions/splitting-merging-functions.md
@@ -16,7 +16,7 @@ Returns an array of selected substrings. Empty substrings may be selected if the
 splitByChar(<separator>, <s>)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md).
 -   `s` — The string to split. [String](../../sql-reference/data-types/string.md).
@@ -53,7 +53,7 @@ Splits a string into substrings separated by a string. It uses a constant string
 splitByString(<separator>, <s>)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `separator` — The separator. [String](../../sql-reference/data-types/string.md).
 -   `s` — The string to split. [String](../../sql-reference/data-types/string.md).
@@ -121,7 +121,7 @@ Extracts all groups from non-overlapping substrings matched by a regular express
 extractAllGroups(text, regexp) 
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `text` — [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 -   `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 2b93dd924a3..3f6ffeee654 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -76,7 +76,7 @@ Replaces invalid UTF-8 characters by the `�` (U+FFFD) character. All running i
 toValidUTF8( input_string )
 ```
 
-Parameters:
+Arguments:
 
 -   input_string — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
 
@@ -104,7 +104,7 @@ Repeats a string as many times as specified and concatenates the replicated valu
 repeat(s, n)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `s` — The string to repeat. [String](../../sql-reference/data-types/string.md).
 -   `n` — The number of times to repeat the string. [UInt](../../sql-reference/data-types/int-uint.md).
@@ -173,7 +173,7 @@ Concatenates the strings listed in the arguments, without a separator.
 concat(s1, s2, ...)
 ```
 
-**Parameters**
+**Arguments**
 
 Values of type String or FixedString.
 
@@ -211,7 +211,7 @@ The function is named “injective” if it always returns different result for
 concatAssumeInjective(s1, s2, ...)
 ```
 
-**Parameters**
+**Arguments**
 
 Values of type String or FixedString.
 
@@ -328,7 +328,7 @@ By default removes all consecutive occurrences of common whitespace (ASCII chara
 trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `trim_character` — specified characters for trim. [String](../../sql-reference/data-types/string.md).
 -   `input_string` — string for trim. [String](../../sql-reference/data-types/string.md).
@@ -367,7 +367,7 @@ trimLeft(input_string)
 
 Alias: `ltrim(input_string)`.
 
-**Parameters**
+**Arguments**
 
 -   `input_string` — string to trim. [String](../../sql-reference/data-types/string.md).
 
@@ -405,7 +405,7 @@ trimRight(input_string)
 
 Alias: `rtrim(input_string)`.
 
-**Parameters**
+**Arguments**
 
 -   `input_string` — string to trim. [String](../../sql-reference/data-types/string.md).
 
@@ -443,7 +443,7 @@ trimBoth(input_string)
 
 Alias: `trim(input_string)`.
 
-**Parameters**
+**Arguments**
 
 -   `input_string` — string to trim. [String](../../sql-reference/data-types/string.md).
 
@@ -496,7 +496,7 @@ Replaces literals, sequences of literals and complex aliases with placeholders.
 normalizeQuery(x)
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md).
 
@@ -532,7 +532,7 @@ Returns identical 64bit hash values without the values of literals for similar q
 normalizedQueryHash(x)
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md).
 
@@ -570,7 +570,7 @@ The following five XML predefined entities will be replaced: `<`, `&`, `>`, `"`,
 encodeXMLComponent(x)
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `x` — The sequence of characters. [String](../../sql-reference/data-types/string.md).
 
diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 92591c89a37..83b0edea438 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -24,7 +24,7 @@ position(haystack, needle[, start_pos])
 
 Alias: `locate(haystack, needle[, start_pos])`.
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -95,7 +95,7 @@ Works under the assumption that the string contains a set of bytes representing
 positionCaseInsensitive(haystack, needle[, start_pos])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -138,7 +138,7 @@ For a case-insensitive search, use the function [positionCaseInsensitiveUTF8](#p
 positionUTF8(haystack, needle[, start_pos])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -211,7 +211,7 @@ Works under the assumption that the string contains a set of bytes representing
 positionCaseInsensitiveUTF8(haystack, needle[, start_pos])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -256,7 +256,7 @@ The search is performed on sequences of bytes without respect to string encoding
 multiSearchAllPositions(haystack, [needle1, needle2, ..., needlen])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -371,7 +371,7 @@ Matches all groups of the `haystack` string using the `pattern` regular expressi
 extractAllGroupsHorizontal(haystack, pattern)
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md).
 -   `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). 
@@ -412,7 +412,7 @@ Matches all groups of the `haystack` string using the `pattern` regular expressi
 extractAllGroupsVertical(haystack, pattern)
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md).
 -   `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md).
@@ -471,7 +471,7 @@ Case insensitive variant of [like](https://clickhouse.tech/docs/en/sql-reference
 ilike(haystack, pattern)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — Input string. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `pattern` — If `pattern` doesn't contain percent signs or underscores, then the `pattern` only represents the string itself. An underscore (`_`) in `pattern` stands for (matches) any single character. A percent sign (`%`) matches any sequence of zero or more characters.
@@ -548,7 +548,7 @@ For a case-insensitive search, use [countSubstringsCaseInsensitive](../../sql-re
 countSubstrings(haystack, needle[, start_pos])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -614,7 +614,7 @@ Returns the number of substring occurrences case-insensitive.
 countSubstringsCaseInsensitive(haystack, needle[, start_pos])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -680,7 +680,7 @@ Returns the number of substring occurrences in `UTF-8` case-insensitive.
 SELECT countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal).
@@ -732,7 +732,7 @@ Returns the number of regular expression matches for a `pattern` in a `haystack`
 countMatches(haystack, pattern)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md).
diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md
index dcbcd3e374b..1006b68b8ee 100644
--- a/docs/en/sql-reference/functions/tuple-functions.md
+++ b/docs/en/sql-reference/functions/tuple-functions.md
@@ -45,7 +45,7 @@ untuple(x)
 
 You can use the `EXCEPT` expression to skip columns as a result of the query.
 
-**Parameters**
+**Arguments**
 
 -   `x` - A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md).
 
diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md
index 18d008f11f2..2b3a9d9103f 100644
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@@ -15,7 +15,7 @@ Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types
 map(key1, value1[, key2, value2, ...])
 ```
 
-**Parameters** 
+**Arguments** 
 
 -   `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
 -   `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
@@ -77,7 +77,7 @@ Collect all the keys and sum corresponding values.
 mapAdd(Tuple(Array, Array), Tuple(Array, Array) [, ...])
 ```
 
-**Parameters** 
+**Arguments** 
 
 Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
 
@@ -111,7 +111,7 @@ Collect all the keys and subtract corresponding values.
 mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...])
 ```
 
-**Parameters** 
+**Arguments** 
 
 Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
 
@@ -149,7 +149,7 @@ Generates a map, where keys are a series of numbers, from minimum to maximum key
 
 The number of elements in `keys` and `values` must be the same for each row.
 
-**Parameters**
+**Arguments**
 
 -   `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
 -   `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 3ca36f41c78..450945a5ab9 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -22,7 +22,7 @@ Converts an input value to the [Int](../../sql-reference/data-types/int-uint.md)
 -   `toInt128(expr)` — Results in the `Int128` data type.
 -   `toInt256(expr)` — Results in the `Int256` data type.
 
-**Parameters**
+**Arguments**
 
 -   `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped.
 
@@ -88,7 +88,7 @@ Converts an input value to the [UInt](../../sql-reference/data-types/int-uint.md
 -   `toUInt64(expr)` — Results in the `UInt64` data type.
 -   `toUInt256(expr)` — Results in the `UInt256` data type.
 
-**Parameters**
+**Arguments**
 
 -   `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped.
 
@@ -154,7 +154,7 @@ Converts an input string to a [Nullable(Decimal(P,S))](../../sql-reference/data-
 
 These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `NULL` value instead of an exception in the event of an input value parsing error.
 
-**Parameters**
+**Arguments**
 
 -   `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
 -   `S` — Scale, the number of decimal places in the resulting value.
@@ -199,7 +199,7 @@ Converts an input value to the [Decimal(P,S)](../../sql-reference/data-types/dec
 
 These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `0` value instead of an exception in the event of an input value parsing error.
 
-**Parameters**
+**Arguments**
 
 -   `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
 -   `S` — Scale, the number of decimal places in the resulting value.
@@ -467,7 +467,7 @@ toIntervalQuarter(number)
 toIntervalYear(number)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `number` — Duration of interval. Positive integer number.
 
@@ -505,7 +505,7 @@ The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 112
 parseDateTimeBestEffort(time_string [, time_zone]);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md).
 -   `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md).
@@ -617,7 +617,7 @@ This function is similar to [‘parseDateTimeBestEffort’](#parsedatetimebestef
 parseDateTimeBestEffortUS(time_string [, time_zone]);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md).
 -   `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md).
@@ -701,7 +701,7 @@ To convert data from the `LowCardinality` data type use the [CAST](#type_convers
 toLowCardinality(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../sql-reference/data-types/index.md#data_types).
 
@@ -741,7 +741,7 @@ Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Inpu
 toUnixTimestamp64Milli(value)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `value` — DateTime64 value with any precision.
 
@@ -793,7 +793,7 @@ Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and
 fromUnixTimestamp64Milli(value [, ti])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `value` — `Int64` value with any precision.
 -   `timezone` — `String` (optional) timezone name of the result.
@@ -825,7 +825,7 @@ Converts arbitrary expressions into a string via given format.
 formatRow(format, x, y, ...)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated).
 -   `x`,`y`, ... — Expressions.
@@ -866,7 +866,7 @@ Converts arbitrary expressions into a string via given format. The function trim
 formatRowNoNewline(format, x, y, ...)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated).
 -   `x`,`y`, ... — Expressions.
diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md
index 006542f494a..3eea69c552b 100644
--- a/docs/en/sql-reference/functions/url-functions.md
+++ b/docs/en/sql-reference/functions/url-functions.md
@@ -25,7 +25,7 @@ Extracts the hostname from a URL.
 domain(url)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `url` — URL. Type: [String](../../sql-reference/data-types/string.md).
 
@@ -76,7 +76,7 @@ Extracts the the top-level domain from a URL.
 topLevelDomain(url)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `url` — URL. Type: [String](../../sql-reference/data-types/string.md).
 
@@ -242,7 +242,7 @@ Extracts network locality (`username:password@host:port`) from a URL.
 netloc(URL)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `url` — URL. [String](../../sql-reference/data-types/string.md).
 
diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md
index f70532252c7..56530b5e83b 100644
--- a/docs/en/sql-reference/functions/ym-dict-functions.md
+++ b/docs/en/sql-reference/functions/ym-dict-functions.md
@@ -115,7 +115,7 @@ Finds the highest continent in the hierarchy for the region.
 regionToTopContinent(id[, geobase]);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `id` — Region ID from the Yandex geobase. [UInt32](../../sql-reference/data-types/int-uint.md).
 -   `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional.

From 97d7a53962a2279f9c0b1d5880e82f16a04b6ed0 Mon Sep 17 00:00:00 2001
From: Anna <annaconda@bk.ru>
Date: Tue, 16 Feb 2021 00:33:53 +0300
Subject: [PATCH 1098/1238] Replacement `Parameters` to `Arguments` for
 aggregate functions

---
 .../aggregate-functions/combinators.md        |  6 ++--
 .../parametric-functions.md                   | 32 ++++++++++++-------
 .../aggregate-functions/reference/argmax.md   |  2 +-
 .../aggregate-functions/reference/argmin.md   |  2 +-
 .../aggregate-functions/reference/avg.md      |  2 +-
 .../reference/avgweighted.md                  |  2 +-
 .../aggregate-functions/reference/count.md    |  2 +-
 .../reference/grouparrayinsertat.md           |  2 +-
 .../reference/grouparraymovingavg.md          |  2 +-
 .../reference/grouparraymovingsum.md          |  2 +-
 .../reference/grouparraysample.md             |  2 +-
 .../reference/groupbitand.md                  |  2 +-
 .../reference/groupbitmap.md                  |  2 +-
 .../reference/groupbitmapand.md               |  2 +-
 .../reference/groupbitmapor.md                |  2 +-
 .../reference/groupbitmapxor.md               |  2 +-
 .../reference/groupbitor.md                   |  2 +-
 .../reference/groupbitxor.md                  |  2 +-
 .../reference/initializeAggregation.md        |  2 +-
 .../aggregate-functions/reference/kurtpop.md  |  2 +-
 .../aggregate-functions/reference/kurtsamp.md |  2 +-
 .../reference/mannwhitneyutest.md             |  2 +-
 .../aggregate-functions/reference/quantile.md |  2 +-
 .../reference/quantiledeterministic.md        |  2 +-
 .../reference/quantileexact.md                |  6 ++--
 .../reference/quantileexactweighted.md        |  2 +-
 .../reference/quantiletdigest.md              |  2 +-
 .../reference/quantiletdigestweighted.md      |  2 +-
 .../reference/quantiletiming.md               |  2 +-
 .../reference/quantiletimingweighted.md       |  2 +-
 .../aggregate-functions/reference/rankCorr.md |  2 +-
 .../aggregate-functions/reference/skewpop.md  |  2 +-
 .../aggregate-functions/reference/skewsamp.md |  2 +-
 .../reference/studentttest.md                 |  2 +-
 .../aggregate-functions/reference/topk.md     |  2 +-
 .../reference/topkweighted.md                 |  2 +-
 .../aggregate-functions/reference/uniq.md     |  2 +-
 .../reference/uniqcombined.md                 |  2 +-
 .../reference/uniqexact.md                    |  2 +-
 .../reference/uniqhll12.md                    |  2 +-
 .../reference/welchttest.md                   |  2 +-
 41 files changed, 65 insertions(+), 55 deletions(-)

diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md
index 431968bc629..015c90e90c7 100644
--- a/docs/en/sql-reference/aggregate-functions/combinators.md
+++ b/docs/en/sql-reference/aggregate-functions/combinators.md
@@ -72,7 +72,7 @@ If an aggregate function doesn’t have input values, with this combinator it re
 <aggFunction>OrDefault(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Aggregate function parameters.
 
@@ -132,7 +132,7 @@ This combinator converts a result of an aggregate function to the [Nullable](../
 <aggFunction>OrNull(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Aggregate function parameters.
 
@@ -189,7 +189,7 @@ Lets you divide data into groups, and then separately aggregates the data in tho
 <aggFunction>Resample(start, end, step)(<aggFunction_params>, resampling_key)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `start` — Starting value of the whole required interval for `resampling_key` values.
 -   `stop` — Ending value of the whole required interval for `resampling_key` values. The whole interval doesn’t include the `stop` value `[start, stop)`.
diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
index 4b3bf12aa8c..035bc91b9ed 100644
--- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
@@ -17,10 +17,13 @@ histogram(number_of_bins)(values)
 
 The functions uses [A Streaming Parallel Decision Tree Algorithm](http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf). The borders of histogram bins are adjusted as new data enters a function. In common case, the widths of bins are not equal.
 
+**Arguments**
+
+`values` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in input values.
+
 **Parameters**
 
 `number_of_bins` — Upper limit for the number of bins in the histogram. The function automatically calculates the number of bins. It tries to reach the specified number of bins, but if it fails, it uses fewer bins.
-`values` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in input values.
 
 **Returned values**
 
@@ -89,14 +92,16 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
 !!! warning "Warning"
     Events that occur at the same second may lay in the sequence in an undefined order affecting the result.
 
-**Parameters**
-
--   `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
+**Arguments**
 
 -   `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types.
 
 -   `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them.
 
+**Parameters**
+
+-   `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
+
 **Returned values**
 
 -   1, if the pattern is matched.
@@ -176,14 +181,16 @@ Counts the number of event chains that matched the pattern. The function searche
 sequenceCount(pattern)(timestamp, cond1, cond2, ...)
 ```
 
-**Parameters**
-
--   `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
+**Arguments**
 
 -   `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types.
 
 -   `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them.
 
+**Parameters**
+
+-   `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
+
 **Returned values**
 
 -   Number of non-overlapping event chains that are matched.
@@ -239,13 +246,16 @@ The function works according to the algorithm:
 windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
 ```
 
+**Arguments**
+
+-   `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1).
+-   `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md).
+
 **Parameters**
 
 -   `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
--   `mode` - It is an optional argument.
+-   `mode` - It is an optional parameter.
     -   `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
--   `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1).
--   `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
@@ -324,7 +334,7 @@ The conditions, except the first, apply in pairs: the result of the second will
 retention(cond1, cond2, ..., cond32);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `cond` — an expression that returns a `UInt8` result (1 or 0).
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
index 9899c731ce9..7639117042f 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
@@ -20,7 +20,7 @@ or
 argMax(tuple(arg, val))
 ```
 
-**Parameters**
+**Arguments**
 
 -   `arg` — Argument.
 -   `val` — Value.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
index 2fe9a313260..7ddc38cd28a 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
@@ -20,7 +20,7 @@ or
 argMin(tuple(arg, val))
 ```
 
-**Parameters**
+**Arguments**
 
 -   `arg` — Argument.
 -   `val` — Value.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md
index e2e6aace734..12dc4ac1e9d 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/avg.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md
@@ -12,7 +12,7 @@ Calculates the arithmetic mean.
 avgWeighted(x)
 ```
 
-**Parameter**
+**Arguments**
 
 -   `x` — Values.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md
index 7b9c0de2755..2df09e560b4 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md
@@ -12,7 +12,7 @@ Calculates the [weighted arithmetic mean](https://en.wikipedia.org/wiki/Weighted
 avgWeighted(x, weight)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Values.
 -   `weight` — Weights of the values.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md
index e5d31429e12..0a5aef2fe97 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/count.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/count.md
@@ -10,7 +10,7 @@ ClickHouse supports the following syntaxes for `count`:
 - `count(expr)` or `COUNT(DISTINCT expr)`.
 - `count()` or `COUNT(*)`. The `count()` syntax is ClickHouse-specific.
 
-**Parameters**
+**Arguments**
 
 The function can take:
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md
index f4b8665a0a4..68456bf7844 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md
@@ -17,7 +17,7 @@ If in one query several values are inserted into the same position, the function
 -   If a query is executed in a single thread, the first one of the inserted values is used.
 -   If a query is executed in multiple threads, the resulting value is an undetermined one of the inserted values.
 
-**Parameters**
+**Arguments**
 
 -   `x` — Value to be inserted. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../../sql-reference/data-types/index.md).
 -   `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md
index 1cd40c2002f..c732efecf58 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md
@@ -13,7 +13,7 @@ groupArrayMovingAvg(window_size)(numbers_for_summing)
 
 The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column.
 
-**Parameters**
+**Arguments**
 
 -   `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value.
 -   `window_size` — Size of the calculation window.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md
index ef979cd5f6a..c3dfeda850e 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md
@@ -13,7 +13,7 @@ groupArrayMovingSum(window_size)(numbers_for_summing)
 
 The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column.
 
-**Parameters**
+**Arguments**
 
 -   `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value.
 -   `window_size` — Size of the calculation window.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md
index 36fa6a9d661..df0b8120eef 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md
@@ -12,7 +12,7 @@ Creates an array of sample argument values. The size of the resulting array is l
 groupArraySample(max_size[, seed])(x)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `max_size` — Maximum size of the resulting array. [UInt64](../../data-types/int-uint.md).
 -   `seed` — Seed for the random number generator. Optional. [UInt64](../../data-types/int-uint.md). Default value: `123456`.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md
index 9be73fd54ec..1275ad7536c 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md
@@ -10,7 +10,7 @@ Applies bitwise `AND` for series of numbers.
 groupBitAnd(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `UInt*` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md
index 9367652db38..9317ef98783 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md
@@ -10,7 +10,7 @@ Bitmap or Aggregate calculations from a unsigned integer column, return cardinal
 groupBitmap(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `UInt*` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md
index 7c0c89040bb..f59bb541a42 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md
@@ -10,7 +10,7 @@ Calculations the AND of a bitmap column, return cardinality of type UInt64, if a
 groupBitmapAnd(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md
index 894c6c90aab..a4d99fd29e3 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md
@@ -10,7 +10,7 @@ Calculations the OR of a bitmap column, return cardinality of type UInt64, if ad
 groupBitmapOr(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md
index 5d0ec0fb097..834f088d02f 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md
@@ -10,7 +10,7 @@ Calculations the XOR of a bitmap column, return cardinality of type UInt64, if a
 groupBitmapOr(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md
index 7383e620060..e427a9ad970 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md
@@ -10,7 +10,7 @@ Applies bitwise `OR` for series of numbers.
 groupBitOr(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `UInt*` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md
index 01026012b91..4b8323f92db 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md
@@ -10,7 +10,7 @@ Applies bitwise `XOR` for series of numbers.
 groupBitXor(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` – An expression that results in `UInt*` type.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md
index ea44d5f1ddd..313d6bf81f5 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md
@@ -13,7 +13,7 @@ Use it for tests or to process columns of types `AggregateFunction` and `Aggrega
 initializeAggregation (aggregate_function, column_1, column_2);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string).
 -   `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md
index 65e7e31b9b4..db402c99663 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md
@@ -10,7 +10,7 @@ Computes the [kurtosis](https://en.wikipedia.org/wiki/Kurtosis) of a sequence.
 kurtPop(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md
index 224bbbdb9e7..4bb9f76763b 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md
@@ -12,7 +12,7 @@ It represents an unbiased estimate of the kurtosis of a random variable if passe
 kurtSamp(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
index 012df7052aa..e6dd680c457 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
@@ -16,7 +16,7 @@ mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_ind
 Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. 
 The null hypothesis is that two populations are stochastically equal. Also one-sided hypothesises can be tested. This test does not assume that data have normal distribution.
 
-**Parameters**
+**Arguments**
 
 -   `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
     -   `'two-sided'`;
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantile.md b/docs/en/sql-reference/aggregate-functions/reference/quantile.md
index 77f858a1735..d625ef4cfd9 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantile.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantile.md
@@ -18,7 +18,7 @@ quantile(level)(expr)
 
 Alias: `median`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md
index 6046447dd10..a20ac26f599 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md
@@ -18,7 +18,7 @@ quantileDeterministic(level)(expr, determinator)
 
 Alias: `medianDeterministic`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md
index a39f724f368..06ef7ccfbd3 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md
@@ -18,7 +18,7 @@ quantileExact(level)(expr)
 
 Alias: `medianExact`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
@@ -77,7 +77,7 @@ quantileExact(level)(expr)
 
 Alias: `medianExactLow`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
@@ -128,7 +128,7 @@ quantileExactHigh(level)(expr)
 
 Alias: `medianExactHigh`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md
index 3251f8298a6..210f44e7587 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md
@@ -18,7 +18,7 @@ quantileExactWeighted(level)(expr, weight)
 
 Alias: `medianExactWeighted`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md
index bda98ea338d..dcc665a68af 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md
@@ -20,7 +20,7 @@ quantileTDigest(level)(expr)
 
 Alias: `medianTDigest`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md
index 309cbe95e95..56ef598f7e7 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md
@@ -20,7 +20,7 @@ quantileTDigest(level)(expr)
 
 Alias: `medianTDigest`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 -   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md
index 867e8b87e74..58ce6495a96 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md
@@ -18,7 +18,7 @@ quantileTiming(level)(expr)
 
 Alias: `medianTiming`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md
index 817cd831d85..fb3b9dbf4d2 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md
@@ -18,7 +18,7 @@ quantileTimingWeighted(level)(expr, weight)
 
 Alias: `medianTimingWeighted`.
 
-**Parameters**
+**Arguments**
 
 -   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md
index dc23029f239..55ee1b8289b 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md
@@ -8,7 +8,7 @@ Computes a rank correlation coefficient.
 rankCorr(x, y)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `x` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64).
 -   `y` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md
index d15a5ffdd47..b9dfc390f9d 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md
@@ -10,7 +10,7 @@ Computes the [skewness](https://en.wikipedia.org/wiki/Skewness) of a sequence.
 skewPop(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md
index cb323f4b142..f7a6df8f507 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md
@@ -12,7 +12,7 @@ It represents an unbiased estimate of the skewness of a random variable if passe
 skewSamp(expr)
 ```
 
-**Parameters**
+**Arguments**
 
 `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md
index f868e976039..ba10c1d62d9 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md
@@ -16,7 +16,7 @@ studentTTest(sample_data, sample_index)
 Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
 The null hypothesis is that means of populations are equal. Normal distribution with equal variances is assumed.
 
-**Parameters**
+**Arguments**
 
 -   `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
 -   `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md
index 004a67d33af..b3e79803ba1 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/topk.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md
@@ -16,7 +16,7 @@ This function doesn’t provide a guaranteed result. In certain situations, erro
 
 We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`.
 
-**Parameters**
+**Arguments**
 
 -   ‘N’ is the number of elements to return.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md
index b597317f44e..02b9f77ea6f 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md
@@ -12,7 +12,7 @@ Similar to `topK` but takes one additional argument of integer type - `weight`.
 topKWeighted(N)(x, weight)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `N` — The number of elements to return.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md
index 81d1ec6761e..7ba2cdc6cb8 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md
@@ -10,7 +10,7 @@ Calculates the approximate number of different values of the argument.
 uniq(x[, ...])
 ```
 
-**Parameters**
+**Arguments**
 
 The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md
index c52486bc38f..4434686ae61 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md
@@ -12,7 +12,7 @@ uniqCombined(HLL_precision)(x[, ...])
 
 The `uniqCombined` function is a good choice for calculating the number of different values.
 
-**Parameters**
+**Arguments**
 
 The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md
index 9a6224533c8..eee675016ee 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md
@@ -14,7 +14,7 @@ Use the `uniqExact` function if you absolutely need an exact result. Otherwise u
 
 The `uniqExact` function uses more memory than `uniq`, because the size of the state has unbounded growth as the number of different values increases.
 
-**Parameters**
+**Arguments**
 
 The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md
index fcddc22cc46..5b23ea81eae 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md
@@ -10,7 +10,7 @@ Calculates the approximate number of different argument values, using the [Hyper
 uniqHLL12(x[, ...])
 ```
 
-**Parameters**
+**Arguments**
 
 The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md
index 3fe1c9d58b9..18cff885867 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md
@@ -16,7 +16,7 @@ welchTTest(sample_data, sample_index)
 Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
 The null hypothesis is that means of populations are equal. Normal distribution is assumed. Populations may have unequal variance.
 
-**Parameters**
+**Arguments**
 
 -   `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
 -   `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).

From d7db44c9116a6b1f767d56a5cd1963a13b5a880d Mon Sep 17 00:00:00 2001
From: Anna <annaconda@bk.ru>
Date: Tue, 16 Feb 2021 00:38:32 +0300
Subject: [PATCH 1099/1238] Other replacement

---
 .../aggregate-functions/reference/mannwhitneyutest.md     | 8 +++++---
 docs/en/sql-reference/table-functions/generate.md         | 2 +-
 docs/en/sql-reference/table-functions/mysql.md            | 2 +-
 docs/en/sql-reference/table-functions/view.md             | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
index e6dd680c457..12982849513 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md
@@ -18,14 +18,16 @@ The null hypothesis is that two populations are stochastically equal. Also one-s
 
 **Arguments**
 
+-   `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Parameters**
+
 -   `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
     -   `'two-sided'`;
     -   `'greater'`;
     -   `'less'`.
 -   `continuity_correction` - if not 0 then continuity correction in the normal approximation for the p-value is applied. (Optional, default: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md).
--   `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
--   `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
-
 
 **Returned values**
 
diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md
index 5bbd22dfe4e..be6ba2b8bc4 100644
--- a/docs/en/sql-reference/table-functions/generate.md
+++ b/docs/en/sql-reference/table-functions/generate.md
@@ -13,7 +13,7 @@ Supports all data types that can be stored in table except `LowCardinality` and
 generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]);
 ```
 
-**Parameters**
+**Arguments**
 
 -   `name` — Name of corresponding column.
 -   `TypeName` — Type of corresponding column.
diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md
index eec4a1d0c46..14cd4369285 100644
--- a/docs/en/sql-reference/table-functions/mysql.md
+++ b/docs/en/sql-reference/table-functions/mysql.md
@@ -13,7 +13,7 @@ Allows `SELECT` and `INSERT` queries to be performed on data that is stored on a
 mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause'])
 ```
 
-**Parameters**
+**Arguments**
 
 -   `host:port` — MySQL server address.
 
diff --git a/docs/en/sql-reference/table-functions/view.md b/docs/en/sql-reference/table-functions/view.md
index 9997971af65..08096c2b019 100644
--- a/docs/en/sql-reference/table-functions/view.md
+++ b/docs/en/sql-reference/table-functions/view.md
@@ -13,7 +13,7 @@ Turns a subquery into a table. The function implements views (see [CREATE VIEW](
 view(subquery)
 ```
 
-**Parameters**
+**Arguments**
 
 -   `subquery` — `SELECT` query.
 

From 2a887b9772180e6d0a731f966dc57572c73f25bd Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 15 Feb 2021 21:56:51 +0000
Subject: [PATCH 1100/1238] Add missing format factory settings

---
 .../table-engines/integrations/rabbitmq.md    |  9 +++-
 .../table-engines/integrations/rabbitmq.md    |  9 +++-
 src/Storages/RabbitMQ/RabbitMQSettings.h      |  7 ++-
 src/Storages/RabbitMQ/StorageRabbitMQ.cpp     |  9 ++++
 .../integration/test_storage_rabbitmq/test.py | 53 +++++++++++++++++++
 5 files changed, 82 insertions(+), 5 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md
index b0901ee6f6e..dbae6b62257 100644
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@@ -59,6 +59,8 @@ Optional parameters:
 -   `rabbitmq_max_block_size`
 -   `rabbitmq_flush_interval_ms`
 
+Also FormatFactory settings can be added along with rabbitmq-related settings.
+
 Required configuration:
 
 The RabbitMQ server configuration should be added using the ClickHouse config file.
@@ -75,11 +77,13 @@ Example:
 ``` sql
   CREATE TABLE queue (
     key UInt64,
-    value UInt64
+    value UInt64,
+    date DateTime
   ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672',
                             rabbitmq_exchange_name = 'exchange1',
                             rabbitmq_format = 'JSONEachRow',
-                            rabbitmq_num_consumers = 5;
+                            rabbitmq_num_consumers = 5,
+                            date_time_input_format = 'best_effort';
 ```
 
 ## Description {#description}
@@ -105,6 +109,7 @@ Exchange type options:
 -   `consistent_hash` - Data is evenly distributed between all bound tables (where the exchange name is the same). Note that this exchange type must be enabled with RabbitMQ plugin: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`.
 
 Setting `rabbitmq_queue_base` may be used for the following cases:
+
 -   to let different tables share queues, so that multiple consumers could be registered for the same queues, which makes a better performance. If using `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings, the exact match of queues is achieved in case these parameters are the same.
 -   to be able to restore reading from certain durable queues when not all messages were successfully consumed. To resume consumption from one specific queue - set its name in `rabbitmq_queue_base` setting and do not specify `rabbitmq_num_consumers` and `rabbitmq_num_queues` (defaults to 1). To resume consumption from all queues, which were declared for a specific table - just specify the same settings: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. By default, queue names will be unique to tables.
 -   to reuse queues as they are declared durable and not auto-deleted. (Can be deleted via any of RabbitMQ CLI tools.)
diff --git a/docs/ru/engines/table-engines/integrations/rabbitmq.md b/docs/ru/engines/table-engines/integrations/rabbitmq.md
index dedb5842d68..bc2eda746cf 100644
--- a/docs/ru/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/ru/engines/table-engines/integrations/rabbitmq.md
@@ -52,6 +52,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 -   `rabbitmq_max_block_size`
 -   `rabbitmq_flush_interval_ms`
 
+Настройки FormatFactory также могут быть добавлены в списке RabbitMQ настроек.
+
 Требуемая конфигурация:
 
 Конфигурация сервера RabbitMQ добавляется с помощью конфигурационного файла ClickHouse.
@@ -68,11 +70,13 @@ Example:
 ``` sql
   CREATE TABLE queue (
     key UInt64,
-    value UInt64
+    value UInt64,
+    date DateTime
   ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672',
                             rabbitmq_exchange_name = 'exchange1',
                             rabbitmq_format = 'JSONEachRow',
-                            rabbitmq_num_consumers = 5;
+                            rabbitmq_num_consumers = 5,
+                            date_time_input_format = 'best_effort';
 ```
 
 ## Описание {#description}
@@ -98,6 +102,7 @@ Example:
 -   `consistent_hash` - данные равномерно распределяются между всеми связанными таблицами, где имя точки обмена совпадает. Обратите внимание, что этот тип обмена должен быть включен с помощью плагина RabbitMQ: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`.
 
 Настройка `rabbitmq_queue_base` может быть использована в следующих случаях:
+
 1.   чтобы восстановить чтение из ранее созданных очередей, если оно прекратилось по какой-либо причине, но очереди остались непустыми. Для восстановления чтения из одной конкретной очереди, нужно написать ее имя в `rabbitmq_queue_base` настройку и не указывать настройки `rabbitmq_num_consumers` и `rabbitmq_num_queues`. Чтобы восстановить чтение из всех очередей, которые были созданы для конкретной таблицы, необходимо совпадение следующих настроек: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. По умолчанию, если настройка `rabbitmq_queue_base` не указана, будут использованы уникальные для каждой таблицы имена очередей.
 2.   чтобы объявить одни и те же очереди для разных таблиц, что позволяет создавать несколько параллельных подписчиков на каждую из очередей. То есть обеспечивается лучшая производительность. В данном случае, для таких таблиц также необходимо совпадение настроек: `rabbitmq_num_consumers`, `rabbitmq_num_queues`.
 3.   чтобы повторно использовать созданные c `durable` настройкой очереди, так как они не удаляются автоматически (но могут быть удалены с помощью любого RabbitMQ CLI).
diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h
index 2f8d6adfa16..66348d61424 100644
--- a/src/Storages/RabbitMQ/RabbitMQSettings.h
+++ b/src/Storages/RabbitMQ/RabbitMQSettings.h
@@ -1,13 +1,14 @@
 #pragma once
 
 #include <Core/BaseSettings.h>
+#include <Core/Settings.h>
 
 namespace DB
 {
     class ASTStorage;
 
 
-#define LIST_OF_RABBITMQ_SETTINGS(M) \
+#define RABBITMQ_RELATED_SETTINGS(M) \
     M(String, rabbitmq_host_port, "", "A host-port to connect to RabbitMQ server.", 0) \
     M(String, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent.", 0) \
     M(String, rabbitmq_format, "", "The message format.", 0) \
@@ -24,6 +25,10 @@ namespace DB
     M(UInt64, rabbitmq_max_block_size, 0, "Number of row collected before flushing data from RabbitMQ.", 0) \
     M(Milliseconds, rabbitmq_flush_interval_ms, 0, "Timeout for flushing data from RabbitMQ.", 0) \
 
+#define LIST_OF_RABBITMQ_SETTINGS(M) \
+    RABBITMQ_RELATED_SETTINGS(M) \
+    FORMAT_FACTORY_SETTINGS(M)
+
 DECLARE_SETTINGS_TRAITS(RabbitMQSettingsTraits, LIST_OF_RABBITMQ_SETTINGS)
 
 struct RabbitMQSettings : public BaseSettings<RabbitMQSettingsTraits>
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index 3ee9dda2bf3..edce1a4b658 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -199,6 +199,15 @@ std::shared_ptr<Context> StorageRabbitMQ::addSettings(const Context & context) c
     if (!schema_name.empty())
         modified_context->setSetting("format_schema", schema_name);
 
+    for (const auto & setting : *rabbitmq_settings)
+    {
+        const auto & setting_name = setting.getName();
+
+        /// check for non-rabbitmq-related settings
+        if (!setting_name.starts_with("rabbitmq_"))
+            modified_context->setSetting(setting_name, setting.getValue());
+    }
+
     return modified_context;
 }
 
diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index 911f6d144f9..ca89ebdea0a 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -1912,6 +1912,59 @@ def test_rabbitmq_no_connection_at_startup(rabbitmq_cluster):
     assert int(result) == messages_num, 'ClickHouse lost some messages: {}'.format(result)
 
 
+@pytest.mark.timeout(120)
+def test_rabbitmq_format_factory_settings(rabbitmq_cluster):
+    instance.query('''
+        CREATE TABLE test.format_settings (
+            id String, date DateTime
+        ) ENGINE = RabbitMQ
+            SETTINGS rabbitmq_host_port = 'rabbitmq1:5672',
+                     rabbitmq_exchange_name = 'format_settings',
+                     rabbitmq_format = 'JSONEachRow',
+                     date_time_input_format = 'best_effort';
+        ''')
+
+    credentials = pika.PlainCredentials('root', 'clickhouse')
+    parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials)
+    connection = pika.BlockingConnection(parameters)
+    channel = connection.channel()
+
+    message = json.dumps({"id":"format_settings_test","date":"2021-01-19T14:42:33.1829214Z"})
+    expected = instance.query('''SELECT parseDateTimeBestEffort(CAST('2021-01-19T14:42:33.1829214Z', 'String'))''')
+
+    channel.basic_publish(exchange='format_settings', routing_key='', body=message)
+    result = ''
+    while True:
+        result = instance.query('SELECT date FROM test.format_settings')
+        if result == expected:
+            break;
+
+    instance.query('''
+        DROP TABLE IF EXISTS test.view;
+        DROP TABLE IF EXISTS test.consumer;
+        CREATE TABLE test.view (
+            id String, date DateTime
+        ) ENGINE = MergeTree ORDER BY id;
+        CREATE MATERIALIZED VIEW test.consumer TO test.view AS
+            SELECT * FROM test.format_settings;
+        ''')
+
+    channel.basic_publish(exchange='format_settings', routing_key='', body=message)
+    result = ''
+    while True:
+        result = instance.query('SELECT date FROM test.view')
+        if result == expected:
+            break;
+
+    connection.close()
+    instance.query('''
+        DROP TABLE test.consumer;
+        DROP TABLE test.format_settings;
+    ''')
+
+    assert(result == expected)
+
+
 if __name__ == '__main__':
     cluster.start()
     input("Cluster created, press any key to destroy...")

From d9f66d8d30b35058b9d2fc0fa070ad4c3c1a5cd5 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 15 Feb 2021 23:25:19 +0000
Subject: [PATCH 1101/1238] Better doc

---
 docs/en/engines/table-engines/integrations/rabbitmq.md | 2 +-
 docs/ru/engines/table-engines/integrations/rabbitmq.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md
index dbae6b62257..946f70f903d 100644
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@@ -59,7 +59,7 @@ Optional parameters:
 -   `rabbitmq_max_block_size`
 -   `rabbitmq_flush_interval_ms`
 
-Also FormatFactory settings can be added along with rabbitmq-related settings.
+Also format settings can be added along with rabbitmq-related settings.
 
 Required configuration:
 
diff --git a/docs/ru/engines/table-engines/integrations/rabbitmq.md b/docs/ru/engines/table-engines/integrations/rabbitmq.md
index bc2eda746cf..173beecb6e7 100644
--- a/docs/ru/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/ru/engines/table-engines/integrations/rabbitmq.md
@@ -52,7 +52,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 -   `rabbitmq_max_block_size`
 -   `rabbitmq_flush_interval_ms`
 
-Настройки FormatFactory также могут быть добавлены в списке RabbitMQ настроек.
+Настройки форматов данных также могут быть добавлены в списке RabbitMQ настроек.
 
 Требуемая конфигурация:
 

From e485d4cad8e21e721ad250f9117b5717a6d64fd7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 16 Feb 2021 09:27:48 +0300
Subject: [PATCH 1102/1238] Fix SIGSEGV on Unknown packet for Distributed
 queries

On Unknown packet disconnect() will be called, which will reset the
input stream, so no need to call setAsyncCallback():

    [ 42015 ] {} <Fatal> BaseDaemon: (version 21.3.1.1, build id: 4F9644AF560F6BB6) (from thread 45051) (no query) Received signal Segmentation fault (11)
    [ 42015 ] {} <Fatal> BaseDaemon: Address: 0x90 Access: read. Address not mapped to object.
    [ 42015 ] {} <Fatal> BaseDaemon: Stack trace: 0xf82e0f4 0xf82de19 0xf83b9a5 0xf83c0e0 0xe9a6fa7 0xf95016c 0xf950939 0xf95020c 0xf950939 0xf95020c 0xf950939 0xf95020c 0xf9508f9 0xf953e40 0xf958376 0x88056af 0x8809143 0x7f4b3e1aaf27 0x7f4b3e0dc31f
    [ 42015 ] {} <Fatal> BaseDaemon: 2. ext::basic_scope_guard<DB::Connection::receivePacket(std::__1::function<void (Poco::Net::Socket&)>)::$_3>::~basic_scope_guard() @ 0xf82e0f4 in /usr/lib/debug/.build-id/4f/9644af560f6bb6.debug
    [ 42015 ] {} <Fatal> BaseDaemon: 3. DB::Connection::receivePacket(std::__1::function<void (Poco::Net::Socket&)>) @ 0xf82de19 in /usr/lib/debug/.build-id/4f/9644af560f6bb6.debug
    [ 42015 ] {} <Fatal> BaseDaemon: 4. DB::MultiplexedConnections::receivePacketUnlocked(std::__1::function<void (Poco::Net::Socket&)>) @ 0xf83b9a5 in /usr/lib/debug/.build-id/4f/9644af560f6bb6.debug
    [ 42015 ] {} <Fatal> BaseDaemon: 5. DB::MultiplexedConnections::drain() @ 0xf83c0e0 in /usr/lib/debug/.build-id/4f/9644af560f6bb6.debug
    [ 42015 ] {} <Fatal> BaseDaemon: 6. DB::RemoteQueryExecutor::finish(std::__1::unique_ptr<DB::RemoteQueryExecutorReadContext, std::__1::default_delete<DB::RemoteQueryExecutorReadContext> >*) @ 0xe9a6fa7 in /usr/lib/debug/.build-id/4f/9644af560f6bb6.debug
    [ 42015 ] {} <Fatal> BaseDaemon: 7. DB::PipelineExecutor::tryAddProcessorToStackIfUpdated() @ 0xf95016c in /usr/lib/debug/.build-id/4f/9644af560f6bb6.debug
    ...
---
 src/Client/Connection.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index e38a6b240a6..164b9565633 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -756,7 +756,11 @@ std::optional<UInt64> Connection::checkPacket(size_t timeout_microseconds)
 Packet Connection::receivePacket(std::function<void(Poco::Net::Socket &)> async_callback)
 {
     in->setAsyncCallback(std::move(async_callback));
-    SCOPE_EXIT(in->setAsyncCallback({}));
+    SCOPE_EXIT({
+        /// disconnect() will reset "in".
+        if (in)
+            in->setAsyncCallback({});
+    });
 
     try
     {

From e39215e38bb6c82fa863f1c117eded0389d7a381 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Tue, 16 Feb 2021 11:03:02 +0300
Subject: [PATCH 1103/1238] Fix has_some condition on CollectJoinOnKeysVisitor

---
 src/Interpreters/CollectJoinOnKeysVisitor.cpp | 3 ++-
 src/Interpreters/TreeRewriter.cpp             | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
index 9033dd0f0f8..a0ea27e9905 100644
--- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
@@ -49,7 +49,8 @@ void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const
     else
         throw Exception("Cannot detect left and right JOIN keys. JOIN ON section is ambiguous.",
                         ErrorCodes::AMBIGUOUS_COLUMN_NAME);
-    has_some = true;
+    if (table_no.first != table_no.second && table_no.first > 0 && table_no.second > 0)
+        has_some = true;
 }
 
 void CollectJoinOnKeysMatcher::Data::addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast,
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 22356622f8d..cef4a0203bb 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -427,7 +427,7 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele
         {
             data.asofToJoinKeys();
         }
-        else if (data.new_where_conditions && data.new_on_expression)
+        else if (data.new_on_expression)
         {
             table_join.on_expression = data.new_on_expression;
             new_where_conditions = data.new_where_conditions;

From 3d19d0644ebbf292eebf1135aac059a08f2d6c82 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@yandex-team.ru>
Date: Tue, 16 Feb 2021 13:46:25 +0300
Subject: [PATCH 1104/1238] Update join on associativity in some tests

---
 tests/queries/0_stateless/00826_cross_to_inner_join.reference   | 2 +-
 tests/queries/0_stateless/00849_multiple_comma_join_2.reference | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/tests/queries/0_stateless/00826_cross_to_inner_join.reference
index e7c8d6b1ea9..84867de2849 100644
--- a/tests/queries/0_stateless/00826_cross_to_inner_join.reference
+++ b/tests/queries/0_stateless/00826_cross_to_inner_join.reference
@@ -95,7 +95,7 @@ SELECT
     t2_00826.a,
     t2_00826.b
 FROM t1_00826
-ALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)
+ALL INNER JOIN t2_00826 ON (((a = t2_00826.a) AND (a = t2_00826.a)) AND (a = t2_00826.a)) AND (b = t2_00826.b)
 WHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b)))
 cross split conjunction
 SELECT
diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference
index fc39ef13935..4db65b0b795 100644
--- a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference
+++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference
@@ -127,7 +127,7 @@ FROM
     ) AS `--.s`
     CROSS JOIN t3
 ) AS `--.s`
-ALL INNER JOIN t4 ON (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`)
+ALL INNER JOIN t4 ON ((a = `--t1.a`) AND (a = `--t2.a`)) AND (a = `--t3.a`)
 WHERE (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`)
 SELECT `--t1.a` AS `t1.a`
 FROM 

From 0b0b481825ba2e71074823d2d0bbce043e6e9b4f Mon Sep 17 00:00:00 2001
From: gyuton <40863448+gyuton@users.noreply.github.com>
Date: Tue, 16 Feb 2021 14:02:33 +0300
Subject: [PATCH 1105/1238] DOCSUP-5602: Edited and translated to russian
 (#20302)

* Edited and added translation

* Minor fixes

* Fix typo

Co-authored-by: George <gyuton@yandex-team.ru>
Co-authored-by: Ivan <5627721+abyss7@users.noreply.github.com>
---
 .../sql-reference/functions/url-functions.md  | 146 +++++++++++++++-
 .../sql-reference/functions/url-functions.md  | 162 ++++++++++++++++++
 2 files changed, 299 insertions(+), 9 deletions(-)

diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md
index 006542f494a..975695f40b3 100644
--- a/docs/en/sql-reference/functions/url-functions.md
+++ b/docs/en/sql-reference/functions/url-functions.md
@@ -133,10 +133,9 @@ For example:
 
 ### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom}
 
-Same as `cutToFirstSignificantSubdomain` but accept custom TLD list name, useful if:
+Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. Accepts custom [TLD list](https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains) name.
 
-- you need fresh TLD list,
-- or you have custom.
+Can be useful if you need fresh TLD list or you have custom.
 
 Configuration example:
 
@@ -149,21 +148,150 @@ Configuration example:
 </top_level_domains_lists>
 ```
 
-Example:
+**Syntax**
 
--   `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/', 'public_suffix_list') = 'yandex.com.tr'`.
+``` sql
+cutToFirstSignificantSubdomain(URL, TLD)
+```
+
+**Parameters**
+
+-   `URL` — URL. [String](../../sql-reference/data-types/string.md).
+-   `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
+
+**Returned value**
+
+-   Part of the domain that includes top-level subdomains up to the first significant subdomain.
+
+Type: [String](../../sql-reference/data-types/string.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
+```
+
+Result:
+
+```text
+┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐
+│ foo.there-is-no-such-domain                                                                   │
+└───────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+**See Also**
+
+-   [firstSignificantSubdomain](#firstsignificantsubdomain).
 
 ### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww}
 
-Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name.
+Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. Accepts custom TLD list name.
+
+Can be useful if you need fresh TLD list or you have custom.
+
+Configuration example:
+
+```xml
+<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
+<top_level_domains_lists>
+    <!-- https://publicsuffix.org/list/public_suffix_list.dat -->
+    <public_suffix_list>public_suffix_list.dat</public_suffix_list>
+    <!-- NOTE: path is under top_level_domains_path -->
+</top_level_domains_lists>
+```
+
+**Syntax**
+
+```sql
+cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD)
+```
+
+**Parameters**
+
+-   `URL` — URL. [String](../../sql-reference/data-types/string.md).
+-   `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
+
+**Returned value**
+
+-   Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`.
+
+Type: [String](../../sql-reference/data-types/string.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list');
+```
+
+Result:
+
+```text
+┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐
+│ www.foo                                                                      │
+└──────────────────────────────────────────────────────────────────────────────┘
+```
+
+**See Also**
+
+-   [firstSignificantSubdomain](#firstsignificantsubdomain).
 
 ### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom}
 
-Same as `firstSignificantSubdomain` but accept custom TLD list name.
+Returns the first significant subdomain. Accepts customs TLD list name.
 
-### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww}
+Can be useful if you need fresh TLD list or you have custom.
 
-Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name.
+Configuration example:
+
+```xml
+<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
+<top_level_domains_lists>
+    <!-- https://publicsuffix.org/list/public_suffix_list.dat -->
+    <public_suffix_list>public_suffix_list.dat</public_suffix_list>
+    <!-- NOTE: path is under top_level_domains_path -->
+</top_level_domains_lists>
+```
+
+**Syntax**
+
+```sql
+firstSignificantSubdomainCustom(URL, TLD)
+```
+
+**Parameters**
+
+-   `URL` — URL. [String](../../sql-reference/data-types/string.md).
+-   `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
+
+**Returned value**
+
+-   First significant subdomain.
+
+Type: [String](../../sql-reference/data-types/string.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
+```
+
+Result:
+
+```text 
+┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐
+│ foo                                                                                      │
+└──────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+**See Also**
+
+-   [firstSignificantSubdomain](#firstsignificantsubdomain).
 
 ### port(URL\[, default_port = 0\]) {#port}
 
diff --git a/docs/ru/sql-reference/functions/url-functions.md b/docs/ru/sql-reference/functions/url-functions.md
index 1008e2a359c..7541e16bed4 100644
--- a/docs/ru/sql-reference/functions/url-functions.md
+++ b/docs/ru/sql-reference/functions/url-functions.md
@@ -115,6 +115,168 @@ SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk')
 
 Например, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
 
+### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom}
+
+Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена. Принимает имя пользовательского [списка доменов верхнего уровня](https://ru.wikipedia.org/wiki/Список_доменов_верхнего_уровня).
+
+Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский.
+
+Пример конфигурации:
+
+```xml
+<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
+<top_level_domains_lists>
+    <!-- https://publicsuffix.org/list/public_suffix_list.dat -->
+    <public_suffix_list>public_suffix_list.dat</public_suffix_list>
+    <!-- NOTE: path is under top_level_domains_path -->
+</top_level_domains_lists>
+```
+
+**Синтаксис**
+
+``` sql
+cutToFirstSignificantSubdomain(URL, TLD)
+```
+
+**Parameters**
+
+-   `URL` — URL. [String](../../sql-reference/data-types/string.md).
+-   `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-   Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена.
+
+Тип: [String](../../sql-reference/data-types/string.md).
+
+**Пример**
+
+Запрос:
+
+```sql
+SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
+```
+
+Результат:
+
+```text
+┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐
+│ foo.there-is-no-such-domain                                                                   │
+└───────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+**Смотрите также**
+
+-   [firstSignificantSubdomain](#firstsignificantsubdomain).
+
+### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww}
+
+Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена, не опуская "www". Принимает имя пользовательского списка доменов верхнего уровня.
+
+Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский.
+
+Пример конфигурации:
+
+```xml
+<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
+<top_level_domains_lists>
+    <!-- https://publicsuffix.org/list/public_suffix_list.dat -->
+    <public_suffix_list>public_suffix_list.dat</public_suffix_list>
+    <!-- NOTE: path is under top_level_domains_path -->
+</top_level_domains_lists>
+```
+
+**Синтаксис**
+
+```sql
+cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD)
+```
+
+**Параметры**
+
+-   `URL` — URL. [String](../../sql-reference/data-types/string.md).
+-   `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-   Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена, без удаления `www`.
+
+Тип: [String](../../sql-reference/data-types/string.md).
+
+**Пример**
+
+Запрос:
+
+```sql
+SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list');
+```
+
+Результат:
+
+```text
+┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐
+│ www.foo                                                                      │
+└──────────────────────────────────────────────────────────────────────────────┘
+```
+
+**Смотрите также**
+
+-   [firstSignificantSubdomain](#firstsignificantsubdomain).
+
+### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom}
+
+Возвращает первый существенный поддомен. Принимает имя пользовательского списка доменов верхнего уровня.
+
+Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский.
+
+Пример конфигурации:
+
+```xml
+<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
+<top_level_domains_lists>
+    <!-- https://publicsuffix.org/list/public_suffix_list.dat -->
+    <public_suffix_list>public_suffix_list.dat</public_suffix_list>
+    <!-- NOTE: path is under top_level_domains_path -->
+</top_level_domains_lists>
+```
+
+**Синтаксис**
+
+```sql
+firstSignificantSubdomainCustom(URL, TLD)
+```
+
+**Параметры**
+
+-   `URL` — URL. [String](../../sql-reference/data-types/string.md).
+-   `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-   Первый существенный поддомен.
+
+Тип: [String](../../sql-reference/data-types/string.md).
+
+**Пример**
+
+Запрос:
+
+```sql
+SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
+```
+
+Результат:
+
+```text 
+┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐
+│ foo                                                                                      │
+└──────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+**Смотрите также**
+
+-   [firstSignificantSubdomain](#firstsignificantsubdomain).
+
 ### port(URL[, default_port = 0]) {#port}
 
 Возвращает порт или значение `default_port`, если в URL-адресе нет порта (или передан невалидный URL) 

From dc32d1fa4196d496d8433d97b7e8f199e3a8a7f2 Mon Sep 17 00:00:00 2001
From: Vladimir <vdimir@yandex-team.ru>
Date: Tue, 16 Feb 2021 14:21:23 +0300
Subject: [PATCH 1106/1238] Make `Arguments` bold in doc

---
 docs/en/sql-reference/functions/other-functions.md  | 2 +-
 docs/en/sql-reference/functions/string-functions.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index dcbb7d1ffeb..04e921b5c55 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -460,7 +460,7 @@ Allows building a unicode-art diagram.
 
 `bar(x, min, max, width)` draws a band with a width proportional to `(x - min)` and equal to `width` characters when `x = max`.
 
-Arguments:
+**Arguments**
 
 -   `x` — Size to display.
 -   `min, max` — Integer constants. The value must fit in `Int64`.
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 3f6ffeee654..dc5304b39aa 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -76,7 +76,7 @@ Replaces invalid UTF-8 characters by the `�` (U+FFFD) character. All running i
 toValidUTF8( input_string )
 ```
 
-Arguments:
+**Arguments**
 
 -   input_string — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
 

From 7c5d8458661d644aebb607fd344c82478143ea1f Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Tue, 16 Feb 2021 15:37:49 +0300
Subject: [PATCH 1107/1238] refactor function

---
 src/Functions/FunctionFile.cpp                | 175 +++++++++++-------
 src/IO/ReadBufferFromFile.h                   |   4 +-
 .../01658_read_file_to_stringcolumn.reference |   3 +
 .../01658_read_file_to_stringcolumn.sh        |   6 +-
 4 files changed, 113 insertions(+), 75 deletions(-)

diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp
index e4327862982..f477f6123c3 100644
--- a/src/Functions/FunctionFile.cpp
+++ b/src/Functions/FunctionFile.cpp
@@ -11,93 +11,124 @@
 namespace DB
 {
 
-    namespace ErrorCodes
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int NOT_IMPLEMENTED;
+    extern const int INCORRECT_FILE_NAME;
+    extern const int DATABASE_ACCESS_DENIED;
+    extern const int FILE_DOESNT_EXIST;
+}
+
+/// A function to read file as a string.
+class FunctionFile : public IFunction
+{
+public:
+    static constexpr auto name = "file";
+    static FunctionPtr create(const Context &context) { return std::make_shared<FunctionFile>(context); }
+    explicit FunctionFile(const Context &context_) : context(context_) {}
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 1; }
+    bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        extern const int ILLEGAL_COLUMN;
-        extern const int NOT_IMPLEMENTED;
-        extern const int INCORRECT_FILE_NAME;
-        extern const int DATABASE_ACCESS_DENIED;
+        if (!isString(arguments[0].type))
+            throw Exception(getName() + " is only implemented for types String", ErrorCodes::NOT_IMPLEMENTED);
+        return std::make_shared<DataTypeString>();
     }
 
-    /** A function to read file as a string.
-  */
-    class FunctionFile : public IFunction
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
-    public:
-        static constexpr auto name = "file";
-        static FunctionPtr create(const Context &context) { return std::make_shared<FunctionFile>(context); }
-        explicit FunctionFile(const Context &context_) : context(context_) {}
+        const ColumnPtr column = arguments[0].column;
+        const ColumnString * expected = checkAndGetColumn<ColumnString>(column.get());
+        if (!expected)
+            throw Exception(
+                fmt::format("Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()),
+                ErrorCodes::ILLEGAL_COLUMN);
 
-        String getName() const override { return name; }
+        const ColumnString::Chars & chars = expected->getChars();
+        const ColumnString::Offsets & offsets = expected->getOffsets();
 
-        size_t getNumberOfArguments() const override { return 1; }
-        bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
+        std::vector<String> checked_filenames(input_rows_count);
 
-        DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+        auto result = ColumnString::create();
+        auto & res_chars = result->getChars();
+        auto & res_offsets = result->getOffsets();
+
+        res_offsets.resize(input_rows_count);
+
+        size_t source_offset = 0;
+        size_t result_offset = 0;
+        for (size_t row = 0; row < input_rows_count; ++row)
         {
-            if (!isString(arguments[0].type))
-                throw Exception(getName() + " is only implemented for types String", ErrorCodes::NOT_IMPLEMENTED);
-            return std::make_shared<DataTypeString>();
+            const char * filename = reinterpret_cast<const char *>(&chars[source_offset]);
+
+            const String user_files_path = context.getUserFilesPath();
+            String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString();
+            Poco::Path poco_filepath = Poco::Path(filename);
+            if (poco_filepath.isRelative())
+                poco_filepath = Poco::Path(user_files_absolute_path, poco_filepath);
+            const String file_absolute_path = poco_filepath.absolute().toString();
+            checkReadIsAllowedOrThrow(user_files_absolute_path, file_absolute_path);
+
+            checked_filenames[row] = file_absolute_path;
+            auto file = Poco::File(file_absolute_path);
+
+            if (!file.exists())
+                throw Exception(fmt::format("File {} doesn't exist.", file_absolute_path), ErrorCodes::FILE_DOESNT_EXIST);
+
+            const auto current_file_size = Poco::File(file_absolute_path).getSize();
+
+            result_offset += current_file_size + 1;
+            res_offsets[row] = result_offset;
+            source_offset = offsets[row];
         }
 
-        bool useDefaultImplementationForConstants() const override { return true; }
+        res_chars.resize(result_offset);
 
-        ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+        size_t prev_offset = 0;
+
+        for (size_t row = 0; row < input_rows_count; ++row)
         {
-            const auto & column = arguments[0].column;
-            const char * filename = nullptr;
-            if (const auto * column_string = checkAndGetColumn<ColumnString>(column.get()))
-            {
-                const auto & filename_chars = column_string->getChars();
-                filename = reinterpret_cast<const char *>(&filename_chars[0]);
-                auto res = ColumnString::create();
-                auto & res_chars = res->getChars();
-                auto & res_offsets = res->getOffsets();
+            auto file_absolute_path = checked_filenames[row];
+            ReadBufferFromFile in(file_absolute_path);
+            char * res_buf = reinterpret_cast<char *>(&res_chars[prev_offset]);
 
-                const String user_files_path = context.getUserFilesPath();
-                String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString();
-                Poco::Path poco_filepath = Poco::Path(filename);
-                if (poco_filepath.isRelative())
-                    poco_filepath = Poco::Path(user_files_absolute_path, poco_filepath);
-                const String file_absolute_path = poco_filepath.absolute().toString();
-                checkReadIsAllowed(user_files_absolute_path, file_absolute_path);
-
-                ReadBufferFromFile in(file_absolute_path);
-                ssize_t file_len = Poco::File(file_absolute_path).getSize();
-                res_chars.resize_exact(file_len + 1);
-                char *res_buf = reinterpret_cast<char *>(&res_chars[0]);
-                in.readStrict(res_buf, file_len);
-                res_offsets.push_back(file_len + 1);
-                res_buf[file_len] = '\0';
-
-                return res;
-            }
-            else
-            {
-                throw Exception("Bad Function arguments for file() " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
-            }
+            const size_t file_lenght = res_offsets[row] - prev_offset - 1;
+            prev_offset = res_offsets[row];
+            in.readStrict(res_buf, file_lenght);
+            res_buf[file_lenght] = '\0';
         }
 
-    private:
-        void checkReadIsAllowed(const std::string & user_files_absolute_path, const std::string & file_absolute_path) const
-        {
-            // If run in Local mode, no need for path checking.
-            if (context.getApplicationType() != Context::ApplicationType::LOCAL)
-                if (file_absolute_path.find(user_files_absolute_path) != 0)
-                    throw Exception("File is not inside " + user_files_absolute_path, ErrorCodes::DATABASE_ACCESS_DENIED);
-
-            Poco::File path_poco_file = Poco::File(file_absolute_path);
-            if (path_poco_file.exists() && path_poco_file.isDirectory())
-                throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME);
-        }
-
-        const Context & context;
-    };
-
-
-    void registerFunctionFile(FunctionFactory & factory)
-    {
-        factory.registerFunction<FunctionFile>();
+        return result;
     }
 
+private:
+
+    void checkReadIsAllowedOrThrow(const std::string & user_files_absolute_path, const std::string & file_absolute_path) const
+    {
+        // If run in Local mode, no need for path checking.
+        if (context.getApplicationType() != Context::ApplicationType::LOCAL)
+            if (file_absolute_path.find(user_files_absolute_path) != 0)
+                throw Exception("File is not inside " + user_files_absolute_path, ErrorCodes::DATABASE_ACCESS_DENIED);
+
+        Poco::File path_poco_file = Poco::File(file_absolute_path);
+        if (path_poco_file.exists() && path_poco_file.isDirectory())
+            throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME);
+    }
+
+    const Context & context;
+};
+
+
+void registerFunctionFile(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionFile>();
+}
+
 }
diff --git a/src/IO/ReadBufferFromFile.h b/src/IO/ReadBufferFromFile.h
index cebda605b21..33365bc7ceb 100644
--- a/src/IO/ReadBufferFromFile.h
+++ b/src/IO/ReadBufferFromFile.h
@@ -25,11 +25,11 @@ protected:
     CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead};
 
 public:
-    ReadBufferFromFile(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
+    explicit ReadBufferFromFile(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
         char * existing_memory = nullptr, size_t alignment = 0);
 
     /// Use pre-opened file descriptor.
-    ReadBufferFromFile(
+    explicit ReadBufferFromFile(
         int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
         const std::string & original_file_name = {},
         size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
index a22076de920..87659c32e39 100644
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference
@@ -5,6 +5,9 @@ aaaaaaaaa	bbbbbbbbb
 ccccccccc	aaaaaaaaa	bbbbbbbbb
 ccccccccc	aaaaaaaaa	bbbbbbbbb
 :0
+aaaaaaaaa
+bbbbbbbbb
+ccccccccc
 :107
 :79
 :35
diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index 43e1e11a193..0359d803a23 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -28,7 +28,11 @@ ${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/a.txt'), file('${u
 ${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$?
 ${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$?
 ${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/c.txt'), * from data";echo ":"$?
-
+${CLICKHOUSE_CLIENT} --multiquery --query "
+	create table filenames(name String) engine=MergeTree() order by tuple();
+	insert into filenames values ('a.txt'), ('b.txt'), ('c.txt');
+	select file(name) from filenames format TSV;
+"
 
 # Invalid cases: (Here using sub-shell to catch exception avoiding the test quit)
 # Test non-exists file

From b404fea18d2175c27683938291901be2bfdb4728 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Tue, 16 Feb 2021 15:40:09 +0300
Subject: [PATCH 1108/1238] better

---
 tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
index 0359d803a23..593f0e59ea7 100755
--- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
+++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh
@@ -32,6 +32,7 @@ ${CLICKHOUSE_CLIENT} --multiquery --query "
 	create table filenames(name String) engine=MergeTree() order by tuple();
 	insert into filenames values ('a.txt'), ('b.txt'), ('c.txt');
 	select file(name) from filenames format TSV;
+	drop table if exists filenames;
 "
 
 # Invalid cases: (Here using sub-shell to catch exception avoiding the test quit)

From e37e48b3245fb38b7f11e6b43e069c37a3ad34dc Mon Sep 17 00:00:00 2001
From: Sergi Almacellas Abellana <sergi@koolpi.com>
Date: Tue, 16 Feb 2021 14:31:04 +0100
Subject: [PATCH 1109/1238] Fix typo and ReplicatedMergeTree link on tutorial

I was reading your online documentation and I found that there was a typo on the sql command and there was some missing link.
Not quite familiar with the clickhouse contribution process, I just edited the files fix directly from github, let me know if there is something else missing from my side.

Hope this helps!
---
 docs/en/getting-started/tutorial.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/getting-started/tutorial.md b/docs/en/getting-started/tutorial.md
index 64363c963c5..fe697972dff 100644
--- a/docs/en/getting-started/tutorial.md
+++ b/docs/en/getting-started/tutorial.md
@@ -644,7 +644,7 @@ If there are no replicas at the moment on replicated table creation, a new first
 
 ``` sql
 CREATE TABLE tutorial.hits_replica (...)
-ENGINE = ReplcatedMergeTree(
+ENGINE = ReplicatedMergeTree(
     '/clickhouse_perftest/tables/{shard}/hits',
     '{replica}'
 )

From 7b54b892b5eed13edfb0963dd02287fbe0d8881f Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Tue, 16 Feb 2021 17:05:58 +0300
Subject: [PATCH 1110/1238] fix

---
 src/Databases/DatabaseOnDisk.cpp         |  2 +-
 src/Interpreters/Context.cpp             |  4 ++--
 src/Interpreters/Context.h               |  2 +-
 src/Interpreters/DDLWorker.cpp           |  9 +++++++--
 src/Interpreters/DDLWorker.h             |  2 +-
 src/Storages/StorageMaterializedView.cpp | 19 +++++++++++++++----
 tests/queries/skip_list.json             |  7 +++++++
 7 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index 24bab42cad2..e5d2b23ace0 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -460,7 +460,7 @@ void DatabaseOnDisk::renameTable(
 
     if (from_atomic_to_ordinary)
     {
-        auto & atomic_db = assert_cast<DatabaseAtomic &>(*this);
+        auto & atomic_db = dynamic_cast<DatabaseAtomic &>(*this);
         /// Special case: usually no actions with symlinks are required when detaching/attaching table,
         /// but not when moving from Atomic database to Ordinary
         if (table->storesDataOnDisk())
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index d0a1e4d37bf..766b14dea42 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2553,10 +2553,10 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w
     return StorageID::createEmpty();
 }
 
-void Context::initMetadataTransaction(MetadataTransactionPtr txn)
+void Context::initMetadataTransaction(MetadataTransactionPtr txn, [[maybe_unused]] bool attach_existing)
 {
     assert(!metadata_transaction);
-    assert(query_context == this);
+    assert(attach_existing || query_context == this);
     metadata_transaction = std::move(txn);
 }
 
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index f6ee28aca22..8b59b225480 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -746,7 +746,7 @@ public:
     IHostContextPtr & getHostContext();
     const IHostContextPtr & getHostContext() const;
 
-    void initMetadataTransaction(MetadataTransactionPtr txn);
+    void initMetadataTransaction(MetadataTransactionPtr txn, bool attach_to_context = false);
     MetadataTransactionPtr getMetadataTransaction() const;
 
     struct MySQLWireContext
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index f08f47b1c0e..c342a994395 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -328,6 +328,8 @@ void DDLWorker::scheduleTasks()
         LOG_TRACE(log, "No tasks to schedule");
         return;
     }
+    else if (max_tasks_in_queue < queue_nodes.size())
+        cleanup_event->set();
 
     bool server_startup = current_tasks.empty();
     auto begin_node = queue_nodes.begin();
@@ -489,9 +491,8 @@ void DDLWorker::processTask(DDLTaskBase & task)
 
         if (create_active_res == Coordination::Error::ZNODEEXISTS)
         {
-            /// Connection has been lost and now we are retrying to write query status,
+            /// Connection has been lost and now we are retrying,
             /// but our previous ephemeral node still exists.
-            assert(task.was_executed);
             zkutil::EventPtr eph_node_disappeared = std::make_shared<Poco::Event>();
             String dummy;
             if (zookeeper->tryGet(active_node_path, dummy, nullptr, eph_node_disappeared))
@@ -826,6 +827,7 @@ void DDLWorker::cleanupQueue(Int64, const ZooKeeperPtr & zookeeper)
             ops.emplace_back(zkutil::makeRemoveRequest(fs::path(node_path) / "finished", -1));
             ops.emplace_back(zkutil::makeRemoveRequest(node_path, -1));
             auto rm_entry_res = zookeeper->tryMulti(ops, res);
+
             if (rm_entry_res == Coordination::Error::ZNONODE)
             {
                 /// Most likely both node_path/finished and node_path were removed concurrently.
@@ -888,8 +890,11 @@ void DDLWorker::createStatusDirs(const std::string & node_path, const ZooKeeperP
         return;
 
     if (is_currently_deleting)
+    {
+        cleanup_event->set();
         throw Exception(ErrorCodes::UNFINISHED, "Cannot create status dirs for {}, "
                         "most likely because someone is deleting it concurrently", node_path);
+    }
 
     /// Connection lost or entry was removed
     assert(Coordination::isHardwareError(code) || code == Coordination::Error::ZNONODE);
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 0985884eef7..c39a832c098 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -102,7 +102,7 @@ protected:
     virtual bool canRemoveQueueEntry(const String & entry_name, const Coordination::Stat & stat);
 
     /// Init task node
-    static void createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper);
+    void createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper);
 
     virtual void initializeMainThread();
 
diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp
index fb75a933910..32317968fe5 100644
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@@ -194,9 +194,9 @@ BlockOutputStreamPtr StorageMaterializedView::write(const ASTPtr & query, const
 }
 
 
-static void executeDropQuery(ASTDropQuery::Kind kind, const Context & global_context, const StorageID & target_table_id, bool no_delay)
+static void executeDropQuery(ASTDropQuery::Kind kind, const Context & global_context, const Context & current_context, const StorageID & target_table_id, bool no_delay)
 {
-    if (DatabaseCatalog::instance().tryGetTable(target_table_id, global_context))
+    if (DatabaseCatalog::instance().tryGetTable(target_table_id, current_context))
     {
         /// We create and execute `drop` query for internal table.
         auto drop_query = std::make_shared<ASTDropQuery>();
@@ -206,7 +206,18 @@ static void executeDropQuery(ASTDropQuery::Kind kind, const Context & global_con
         drop_query->no_delay = no_delay;
         drop_query->if_exists = true;
         ASTPtr ast_drop_query = drop_query;
+        /// FIXME We have to use global context to execute DROP query for inner table
+        /// to avoid "Not enough privileges" error if current user has only DROP VIEW ON mat_view_name privilege
+        /// and not allowed to drop inner table explicitly. Allowing to drop inner table without explicit grant
+        /// looks like expected behaviour and we have tests for it.
         auto drop_context = Context(global_context);
+        drop_context.getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
+        if (auto txn = current_context.getMetadataTransaction())
+        {
+            /// For Replicated database
+            drop_context.setQueryContext(const_cast<Context &>(current_context));
+            drop_context.initMetadataTransaction(txn, true);
+        }
         InterpreterDropQuery drop_interpreter(ast_drop_query, drop_context);
         drop_interpreter.execute();
     }
@@ -226,13 +237,13 @@ void StorageMaterializedView::drop()
 void StorageMaterializedView::dropInnerTable(bool no_delay, const Context & context)
 {
     if (has_inner_table && tryGetTargetTable())
-        executeDropQuery(ASTDropQuery::Kind::Drop, context, target_table_id, no_delay);
+        executeDropQuery(ASTDropQuery::Kind::Drop, global_context, context, target_table_id, no_delay);
 }
 
 void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context & context, TableExclusiveLockHolder &)
 {
     if (has_inner_table)
-        executeDropQuery(ASTDropQuery::Kind::Truncate, context, target_table_id, true);
+        executeDropQuery(ASTDropQuery::Kind::Truncate, global_context, context, target_table_id, true);
 }
 
 void StorageMaterializedView::checkStatementCanBeForwarded() const
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 5c75fc0300b..52cef210748 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -103,9 +103,16 @@
         "00738_lock_for_inner_table"
     ],
     "database-replicated": [
+        /// Tests with DETACH TABLE (it's not allowed)
+        /// and tests with SET (session and query settings are not supported)
         "memory_tracking",
         "memory_usage",
         "live_view",
+        "00152_insert_different_granularity",
+        "01715_background_checker_blather_zookeeper",
+        "01714_alter_drop_version",
+        "01114_materialize_clear_index_compact_parts",
+        "00814_replicated_minimalistic_part_header_zookeeper",
         "01188_attach_table_from_pat",
         "01415_sticking_mutations",
         "01130_in_memory_parts",

From 75117389eccf862b1a08b93a32d4f839846715f6 Mon Sep 17 00:00:00 2001
From: M0r64n <M0r64n1635@gmail.com>
Date: Tue, 16 Feb 2021 18:50:11 +0400
Subject: [PATCH 1111/1238] Add a couple of QOL file engine settings

---
 docs/en/operations/settings/settings.md       | 20 +++++++++++++++++++
 src/Core/Settings.h                           |  2 ++
 src/Storages/StorageFile.cpp                  | 12 ++++++++++-
 ..._engine_file_empty_if_not_exists.reference |  0
 .../01720_engine_file_empty_if_not_exists.sql | 15 ++++++++++++++
 ...1_engine_file_truncate_on_insert.reference | 13 ++++++++++++
 .../01721_engine_file_truncate_on_insert.sql  | 20 +++++++++++++++++++
 7 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.reference
 create mode 100644 tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql
 create mode 100644 tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference
 create mode 100644 tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 43519bfc8dc..6440f09bb40 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2659,3 +2659,23 @@ Result:
 Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
 
 [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
+
+## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}
+
+Allows to select data from a file engine table without file.
+
+Possible values:
+- 0 — `SELECT` throws exception.
+- 1 — `SELECT` returns empty result.
+
+Default value: `0`.
+
+## engine_file_truncate_on_insert {#engine-file-truncate-on-insert}
+
+Enables or disables truncate before insert in file engine tables.
+
+Possible values:
+- 0 — Disabled.
+- 1 — Enabled.
+
+Default value: `0`.
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 9bb9ad30f15..98c3b9d1f85 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -421,6 +421,8 @@ class IColumn;
     M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
     M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
     M(Bool, allow_experimental_query_deduplication, false, "Allow sending parts' UUIDs for a query in order to deduplicate data parts if any", 0) \
+    M(Bool, engine_file_empty_if_not_exists, false, "Allows to select data from a file engine table without file", 0) \
+    M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \
     \
     /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
     \
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index a5935ba3bf4..856d03ea2ce 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -34,6 +34,7 @@
 #include <Storages/Distributed/DirectoryMonitor.h>
 #include <Processors/Sources/SourceWithProgress.h>
 #include <Processors/Formats/InputStreamFromInputFormat.h>
+#include <Processors/Sources/NullSource.h>
 #include <Processors/Pipe.h>
 
 namespace fs = std::filesystem;
@@ -427,7 +428,12 @@ Pipe StorageFile::read(
         paths = {""};   /// when use fd, paths are empty
     else
         if (paths.size() == 1 && !Poco::File(paths[0]).exists())
-            throw Exception("File " + paths[0] + " doesn't exist", ErrorCodes::FILE_DOESNT_EXIST);
+        {
+            if (context.getSettingsRef().engine_file_empty_if_not_exists)
+                return Pipe(std::make_shared<NullSource>(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID())));
+            else
+                throw Exception("File " + paths[0] + " doesn't exist", ErrorCodes::FILE_DOESNT_EXIST);
+        }
 
 
     auto files_info = std::make_shared<StorageFileSource::FilesInfo>();
@@ -547,6 +553,10 @@ BlockOutputStreamPtr StorageFile::write(
         throw Exception("Method write is not implemented for Distributed format", ErrorCodes::NOT_IMPLEMENTED);
 
     std::string path;
+    if (context.getSettingsRef().engine_file_truncate_on_insert)
+        if (0 != ::truncate(paths[0].c_str(), 0))
+            throwFromErrnoWithPath("Cannot truncate file " + paths[0], paths[0], ErrorCodes::CANNOT_TRUNCATE_FILE);
+
     if (!paths.empty())
     {
         path = paths[0];
diff --git a/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.reference b/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql b/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql
new file mode 100644
index 00000000000..c04e01ccc88
--- /dev/null
+++ b/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql
@@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS file_engine_table;
+
+CREATE TABLE file_engine_table (id UInt32) ENGINE=File(TSV);
+
+SELECT * FROM file_engine_table; --{ serverError 107 }
+
+SET engine_file_empty_if_not_exists=0;
+
+SELECT * FROM file_engine_table; --{ serverError 107 }
+
+SET engine_file_empty_if_not_exists=1;
+
+SELECT * FROM file_engine_table;
+
+SET engine_file_empty_if_not_exists=0;
diff --git a/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference
new file mode 100644
index 00000000000..a25fb4f0e7e
--- /dev/null
+++ b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference
@@ -0,0 +1,13 @@
+1
+2
+3
+4
+1
+2
+3
+4
+5
+6
+0
+1
+2
\ No newline at end of file
diff --git a/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql
new file mode 100644
index 00000000000..65246db7963
--- /dev/null
+++ b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql
@@ -0,0 +1,20 @@
+INSERT INTO TABLE FUNCTION file('01718_file/test/data.TSV', 'TSV', 'id UInt32') VALUES ('file', 42);
+ATTACH TABLE test FROM '01718_file/test' (id UInt8) ENGINE=File(TSV);
+
+CREATE TABLE file_engine_table (id UInt32) ENGINE=File(TabSeparated);
+
+INSERT INTO file_engine_table VALUES (1), (2), (3);
+INSERT INTO file_engine_table VALUES (4);
+SELECT * FROM file_engine_table;
+
+SET engine_file_truncate_on_insert=0;
+
+INSERT INTO file_engine_table VALUES (5), (6);
+SELECT * FROM file_engine_table;
+
+SET engine_file_truncate_on_insert=1;
+
+INSERT INTO file_engine_table VALUES (0), (1), (2);
+SELECT * FROM file_engine_table;
+
+SET engine_file_truncate_on_insert=0;

From 94ba4942d76773df87fd02ed5cf0acb735ee10c6 Mon Sep 17 00:00:00 2001
From: Nikita Mikhailov <jakalletti@jakalletti-build.sas.yp-c.yandex.net>
Date: Tue, 16 Feb 2021 19:47:45 +0300
Subject: [PATCH 1112/1238] empty


From 6c9771484b25d8ef8340a7e5c612a95a9af05ef6 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Tue, 16 Feb 2021 22:39:25 +0300
Subject: [PATCH 1113/1238] add hung check to stress test

---
 docker/test/stress/run.sh |  2 +-
 docker/test/stress/stress | 15 +++++++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 9da2f3d3ada..323e0be4d4b 100755
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -64,7 +64,7 @@ clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
 clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
 clickhouse-client --query "SHOW TABLES FROM test"
 
-./stress --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION"
+./stress --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" && echo "OK" > /test_output/script_exit_code.txt || echo "FAIL" > /test_output/script_exit_code.txt
 
 stop
 start
diff --git a/docker/test/stress/stress b/docker/test/stress/stress
index 458f78fcdb4..d2ec86b4421 100755
--- a/docker/test/stress/stress
+++ b/docker/test/stress/stress
@@ -1,8 +1,9 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from multiprocessing import cpu_count
-from subprocess import Popen, check_call
+from subprocess import Popen, call, STDOUT
 import os
+import sys
 import shutil
 import argparse
 import logging
@@ -64,7 +65,8 @@ if __name__ == "__main__":
     parser.add_argument("--server-log-folder", default='/var/log/clickhouse-server')
     parser.add_argument("--output-folder")
     parser.add_argument("--global-time-limit", type=int, default=3600)
-    parser.add_argument("--num-parallel", default=cpu_count());
+    parser.add_argument("--num-parallel", default=cpu_count())
+    parser.add_argument('--hung-check', action='store_true', default=False)
 
     args = parser.parse_args()
     func_pipes = []
@@ -81,4 +83,13 @@ if __name__ == "__main__":
         logging.info("Finished %s from %s processes", len(retcodes), len(func_pipes))
         time.sleep(5)
 
+    logging.info("All processes finished")
+    if args.hung_check:
+        logging.info("Checking if some queries hung")
+        cmd = "{} {} {}".format(args.test_cmd, "--hung-check", "00001_select_1")
+        res = call(cmd, shell=True, stderr=STDOUT)
+        if res != 0:
+            logging.info("Hung check failed with exit code {}".format(res))
+            sys.exit(1)
+
     logging.info("Stress test finished")

From f83be158ba986b86df8c819b87a0b90d1009068e Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Thu, 4 Feb 2021 18:59:05 +0300
Subject: [PATCH 1114/1238] SHOW TABLES is now considered as one query in the
 quota calculations, not two queries.

---
 .../InterpreterShowProcesslistQuery.h             |  5 +++++
 src/Interpreters/InterpreterShowTablesQuery.h     |  5 +++++
 tests/integration/test_quota/test.py              | 15 +++++++++++----
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/InterpreterShowProcesslistQuery.h b/src/Interpreters/InterpreterShowProcesslistQuery.h
index 6b87fd7edc3..fa0bbf075bd 100644
--- a/src/Interpreters/InterpreterShowProcesslistQuery.h
+++ b/src/Interpreters/InterpreterShowProcesslistQuery.h
@@ -20,6 +20,11 @@ public:
 
     BlockIO execute() override;
 
+    /// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then
+    /// the SELECT query will checks the quota and limits.
+    bool ignoreQuota() const override { return true; }
+    bool ignoreLimits() const override { return true; }
+
 private:
     ASTPtr query_ptr;
     Context & context;
diff --git a/src/Interpreters/InterpreterShowTablesQuery.h b/src/Interpreters/InterpreterShowTablesQuery.h
index fc5cb2b7505..4f720e68622 100644
--- a/src/Interpreters/InterpreterShowTablesQuery.h
+++ b/src/Interpreters/InterpreterShowTablesQuery.h
@@ -20,6 +20,11 @@ public:
 
     BlockIO execute() override;
 
+    /// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then
+    /// the SELECT query will checks the quota and limits.
+    bool ignoreQuota() const override { return true; }
+    bool ignoreLimits() const override { return true; }
+
 private:
     ASTPtr query_ptr;
     Context & context;
diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py
index 84454159a58..9289ba47209 100644
--- a/tests/integration/test_quota/test.py
+++ b/tests/integration/test_quota/test.py
@@ -71,12 +71,12 @@ def started_cluster():
 @pytest.fixture(autouse=True)
 def reset_quotas_and_usage_info():
     try:
-        yield
-    finally:
-        copy_quota_xml('simpliest.xml')  # To reset usage info.
         instance.query("DROP QUOTA IF EXISTS qA, qB")
         copy_quota_xml('simpliest.xml')  # To reset usage info.
         copy_quota_xml('normal_limits.xml')
+        yield
+    finally:
+        pass
 
 
 def test_quota_from_users_xml():
@@ -379,4 +379,11 @@ def test_query_inserts():
 
     instance.query("INSERT INTO test_table values(1)")
     system_quota_usage(
-        [["myQuota", "default", 31556952, 1, 1000, 0, 500, 1, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
\ No newline at end of file
+        [["myQuota", "default", 31556952, 1, 1000, 0, 500, 1, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
+
+def test_consumption_show_tables_quota():
+    instance.query("SHOW TABLES")
+
+    assert re.match(
+        "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t1\\t\\\\N\\t19\\t\\\\N\\t1\\t1000\\t35\\t\\\\N\\t.*\\t\\\\N\n",
+        instance.query("SHOW QUOTA"))

From d8d2bd885c72ae06707f0a15001f2bfb7ba21054 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Thu, 4 Feb 2021 22:14:44 +0300
Subject: [PATCH 1115/1238] Fix calculation of interval's end in quota
 consumption.

---
 src/Access/EnabledQuota.cpp | 43 ++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp
index e9d586a692f..e865ffb9b25 100644
--- a/src/Access/EnabledQuota.cpp
+++ b/src/Access/EnabledQuota.cpp
@@ -39,35 +39,47 @@ struct EnabledQuota::Impl
     }
 
 
+    /// Returns the end of the current interval. If the passed `current_time` is greater than that end,
+    /// the function automatically recalculates the interval's end by adding the interval's duration
+    /// one or more times until the interval's end is greater than `current_time`.
+    /// If that recalculation occurs the function also resets amounts of resources used and sets the variable
+    /// `counters_were_reset`.
     static std::chrono::system_clock::time_point getEndOfInterval(
-        const Interval & interval, std::chrono::system_clock::time_point current_time, bool * counters_were_reset = nullptr)
+        const Interval & interval, std::chrono::system_clock::time_point current_time, bool & counters_were_reset)
     {
         auto & end_of_interval = interval.end_of_interval;
         auto end_loaded = end_of_interval.load();
         auto end = std::chrono::system_clock::time_point{end_loaded};
         if (current_time < end)
         {
-            if (counters_were_reset)
-                *counters_were_reset = false;
+            counters_were_reset = false;
             return end;
         }
 
-        const auto duration = interval.duration;
+        /// We reset counters only if the interval's end has been calculated before.
+        /// If it hasn't we just calculate the interval's end for the first time and don't reset counters yet.
+        bool need_reset_counters = (end_loaded.count() != 0);
 
         do
         {
-            end = end + (current_time - end + duration) / duration * duration;
+            /// Calculate the end of the next interval:
+            ///  |                     X                                 |
+            /// end               current_time                next_end = end + duration * n
+            /// where n is an integer number, n >= 1.
+            const auto duration = interval.duration;
+            UInt64 n = static_cast<UInt64>((current_time - end + duration) / duration);
+            end = end + duration * n;
             if (end_of_interval.compare_exchange_strong(end_loaded, end.time_since_epoch()))
-            {
-                boost::range::fill(interval.used, 0);
                 break;
-            }
             end = std::chrono::system_clock::time_point{end_loaded};
         }
         while (current_time >= end);
 
-        if (counters_were_reset)
-            *counters_were_reset = true;
+        if (need_reset_counters)
+        {
+            boost::range::fill(interval.used, 0);
+            counters_were_reset = true;
+        }
         return end;
     }
 
@@ -89,7 +101,7 @@ struct EnabledQuota::Impl
             if (used > max)
             {
                 bool counters_were_reset = false;
-                auto end_of_interval = getEndOfInterval(interval, current_time, &counters_were_reset);
+                auto end_of_interval = getEndOfInterval(interval, current_time, counters_were_reset);
                 if (counters_were_reset)
                 {
                     used = (interval.used[resource_type] += amount);
@@ -116,9 +128,9 @@ struct EnabledQuota::Impl
                 continue;
             if (used > max)
             {
-                bool used_counters_reset = false;
-                std::chrono::system_clock::time_point end_of_interval = getEndOfInterval(interval, current_time, &used_counters_reset);
-                if (!used_counters_reset)
+                bool counters_were_reset = false;
+                std::chrono::system_clock::time_point end_of_interval = getEndOfInterval(interval, current_time, counters_were_reset);
+                if (!counters_were_reset)
                     throwQuotaExceed(user_name, intervals.quota_name, resource_type, used, max, interval.duration, end_of_interval);
             }
         }
@@ -177,7 +189,8 @@ std::optional<QuotaUsage> EnabledQuota::Intervals::getUsage(std::chrono::system_
         auto & out = usage.intervals.back();
         out.duration = in.duration;
         out.randomize_interval = in.randomize_interval;
-        out.end_of_interval = Impl::getEndOfInterval(in, current_time);
+        bool counters_were_reset = false;
+        out.end_of_interval = Impl::getEndOfInterval(in, current_time, counters_were_reset);
         for (auto resource_type : ext::range(MAX_RESOURCE_TYPE))
         {
             if (in.max[resource_type])

From 298130402ebd2327af746ba2785a6c1cf1e684ea Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Fri, 5 Feb 2021 22:38:19 +0300
Subject: [PATCH 1116/1238] SYSTEM queries now consume quota.

---
 src/Interpreters/InterpreterSystemQuery.h             |  3 ---
 ...myquota.xml => assign_myquota_to_default_user.xml} |  0
 .../configs/users.d/{quota.xml => myquota.xml}        |  0
 .../test_quota/configs/users.d/user_with_no_quota.xml | 10 ++++++++++
 tests/integration/test_quota/test.py                  | 11 +++++++----
 5 files changed, 17 insertions(+), 7 deletions(-)
 rename tests/integration/test_quota/configs/users.d/{assign_myquota.xml => assign_myquota_to_default_user.xml} (100%)
 rename tests/integration/test_quota/configs/users.d/{quota.xml => myquota.xml} (100%)
 create mode 100644 tests/integration/test_quota/configs/users.d/user_with_no_quota.xml

diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h
index 6fd96c15a2e..6fa0a432191 100644
--- a/src/Interpreters/InterpreterSystemQuery.h
+++ b/src/Interpreters/InterpreterSystemQuery.h
@@ -37,9 +37,6 @@ public:
 
     BlockIO execute() override;
 
-    bool ignoreQuota() const override { return true; }
-    bool ignoreLimits() const override { return true; }
-
 private:
     ASTPtr query_ptr;
     Context & context;
diff --git a/tests/integration/test_quota/configs/users.d/assign_myquota.xml b/tests/integration/test_quota/configs/users.d/assign_myquota_to_default_user.xml
similarity index 100%
rename from tests/integration/test_quota/configs/users.d/assign_myquota.xml
rename to tests/integration/test_quota/configs/users.d/assign_myquota_to_default_user.xml
diff --git a/tests/integration/test_quota/configs/users.d/quota.xml b/tests/integration/test_quota/configs/users.d/myquota.xml
similarity index 100%
rename from tests/integration/test_quota/configs/users.d/quota.xml
rename to tests/integration/test_quota/configs/users.d/myquota.xml
diff --git a/tests/integration/test_quota/configs/users.d/user_with_no_quota.xml b/tests/integration/test_quota/configs/users.d/user_with_no_quota.xml
new file mode 100644
index 00000000000..70f51cfff43
--- /dev/null
+++ b/tests/integration/test_quota/configs/users.d/user_with_no_quota.xml
@@ -0,0 +1,10 @@
+<yandex>
+    <users>
+        <user_with_no_quota>
+            <no_password/>
+            <networks>
+                <ip>::/0</ip>
+            </networks>
+        </user_with_no_quota>
+    </users>
+</yandex>
diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py
index 9289ba47209..353d776c0f3 100644
--- a/tests/integration/test_quota/test.py
+++ b/tests/integration/test_quota/test.py
@@ -7,9 +7,10 @@ from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import assert_eq_with_retry, TSV
 
 cluster = ClickHouseCluster(__file__)
-instance = cluster.add_instance('instance', user_configs=["configs/users.d/assign_myquota.xml",
+instance = cluster.add_instance('instance', user_configs=["configs/users.d/assign_myquota_to_default_user.xml",
                                                           "configs/users.d/drop_default_quota.xml",
-                                                          "configs/users.d/quota.xml"])
+                                                          "configs/users.d/myquota.xml",
+                                                          "configs/users.d/user_with_no_quota.xml"])
 
 
 def check_system_quotas(canonical):
@@ -49,9 +50,11 @@ def system_quotas_usage(canonical):
 def copy_quota_xml(local_file_name, reload_immediately=True):
     script_dir = os.path.dirname(os.path.realpath(__file__))
     instance.copy_file_to_container(os.path.join(script_dir, local_file_name),
-                                    '/etc/clickhouse-server/users.d/quota.xml')
+                                    '/etc/clickhouse-server/users.d/myquota.xml')
     if reload_immediately:
-        instance.query("SYSTEM RELOAD CONFIG")
+         # We use the special user 'user_with_no_quota' here because
+         # we don't want SYSTEM RELOAD CONFIG to mess our quota consuming checks.
+        instance.query("SYSTEM RELOAD CONFIG", user='user_with_no_quota')
 
 
 @pytest.fixture(scope="module", autouse=True)

From d357fb9129b09a1749e6055bd19ef57f4187ffb1 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Fri, 5 Feb 2021 22:39:08 +0300
Subject: [PATCH 1117/1238] Fix reading from the table system.quota_usage.

---
 src/Storages/System/StorageSystemQuotaUsage.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Storages/System/StorageSystemQuotaUsage.cpp b/src/Storages/System/StorageSystemQuotaUsage.cpp
index 002ab081bcf..6d6e22e7be6 100644
--- a/src/Storages/System/StorageSystemQuotaUsage.cpp
+++ b/src/Storages/System/StorageSystemQuotaUsage.cpp
@@ -137,6 +137,9 @@ void StorageSystemQuotaUsage::fillDataImpl(
         column_quota_name.insertData(quota_name.data(), quota_name.length());
         column_quota_key.insertData(quota_key.data(), quota_key.length());
 
+        if (add_column_is_current)
+            column_is_current->push_back(quota_id == current_quota_id);
+
         if (!interval)
         {
             column_start_time.insertDefault();
@@ -171,9 +174,6 @@ void StorageSystemQuotaUsage::fillDataImpl(
             addValue(*column_max[resource_type], *column_max_null_map[resource_type], interval->max[resource_type], type_info);
             addValue(*column_usage[resource_type], *column_usage_null_map[resource_type], interval->used[resource_type], type_info);
         }
-
-        if (add_column_is_current)
-            column_is_current->push_back(quota_id == current_quota_id);
     };
 
     auto add_rows = [&](const String & quota_name, const UUID & quota_id, const String & quota_key, const std::vector<QuotaUsage::Interval> & intervals)

From 5f8a6ab9c109a82ab044b6ee573f86320175839a Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Tue, 9 Feb 2021 12:29:33 +0300
Subject: [PATCH 1118/1238] remove probably useless code

---
 src/Access/EnabledQuota.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp
index e865ffb9b25..4a77426004d 100644
--- a/src/Access/EnabledQuota.cpp
+++ b/src/Access/EnabledQuota.cpp
@@ -26,10 +26,6 @@ struct EnabledQuota::Impl
         std::chrono::seconds duration,
         std::chrono::system_clock::time_point end_of_interval)
     {
-        std::function<String(UInt64)> amount_to_string = [](UInt64 amount) { return std::to_string(amount); };
-        if (resource_type == Quota::EXECUTION_TIME)
-            amount_to_string = [&](UInt64 amount) { return ext::to_string(std::chrono::nanoseconds(amount)); };
-
         const auto & type_info = Quota::ResourceTypeInfo::get(resource_type);
         throw Exception(
             "Quota for user " + backQuote(user_name) + " for " + ext::to_string(duration) + " has been exceeded: "

From 29362bb483a9f8390e9e2016a9ed6b6c4acf116a Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 16 Feb 2021 21:48:26 +0000
Subject: [PATCH 1119/1238] Support vhost

---
 .../en/engines/table-engines/integrations/rabbitmq.md | 11 ++++++++++-
 .../ru/engines/table-engines/integrations/rabbitmq.md | 11 ++++++++++-
 src/Storages/RabbitMQ/StorageRabbitMQ.cpp             |  7 +++++--
 src/Storages/RabbitMQ/StorageRabbitMQ.h               |  1 +
 .../RabbitMQ/WriteBufferToRabbitMQProducer.cpp        |  6 +++++-
 src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h |  2 ++
 6 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md
index b0901ee6f6e..c73876fdebe 100644
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@@ -59,10 +59,11 @@ Optional parameters:
 -   `rabbitmq_max_block_size`
 -   `rabbitmq_flush_interval_ms`
 
-Required configuration:
 
 The RabbitMQ server configuration should be added using the ClickHouse config file.
 
+Required configuration:
+
 ``` xml
  <rabbitmq>
     <username>root</username>
@@ -70,6 +71,14 @@ The RabbitMQ server configuration should be added using the ClickHouse config fi
  </rabbitmq>
 ```
 
+Additional configuration:
+
+``` xml
+ <rabbitmq>
+    <vhost>clickhouse</vhost>
+ </rabbitmq>
+```
+
 Example:
 
 ``` sql
diff --git a/docs/ru/engines/table-engines/integrations/rabbitmq.md b/docs/ru/engines/table-engines/integrations/rabbitmq.md
index dedb5842d68..2a44e085ede 100644
--- a/docs/ru/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/ru/engines/table-engines/integrations/rabbitmq.md
@@ -52,10 +52,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 -   `rabbitmq_max_block_size`
 -   `rabbitmq_flush_interval_ms`
 
-Требуемая конфигурация:
 
 Конфигурация сервера RabbitMQ добавляется с помощью конфигурационного файла ClickHouse.
 
+Требуемая конфигурация:
+
 ``` xml
  <rabbitmq>
     <username>root</username>
@@ -63,6 +64,14 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
  </rabbitmq>
 ```
 
+Дополнительная конфигурация:
+
+``` xml
+ <rabbitmq>
+    <vhost>clickhouse</vhost>
+ </rabbitmq>
+```
+
 Example:
 
 ``` sql
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index 3ee9dda2bf3..d14f11c4a29 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -94,6 +94,7 @@ StorageRabbitMQ::StorageRabbitMQ(
         , login_password(std::make_pair(
                     global_context.getConfigRef().getString("rabbitmq.username"),
                     global_context.getConfigRef().getString("rabbitmq.password")))
+        , vhost(global_context.getConfigRef().getString("rabbitmq.vhost", "/"))
         , semaphore(0, num_consumers)
         , unique_strbase(getRandomName())
         , queue_size(std::max(QUEUE_SIZE, static_cast<uint32_t>(getMaxBlockSize())))
@@ -483,7 +484,9 @@ bool StorageRabbitMQ::restoreConnection(bool reconnecting)
     }
 
     connection = std::make_unique<AMQP::TcpConnection>(event_handler.get(),
-            AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/"));
+            AMQP::Address(
+                parsed_address.first, parsed_address.second,
+                AMQP::Login(login_password.first, login_password.second), vhost));
 
     cnt_retries = 0;
     while (!connection->ready() && !stream_cancelled && ++cnt_retries != RETRIES_MAX)
@@ -702,7 +705,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer()
 ProducerBufferPtr StorageRabbitMQ::createWriteBuffer()
 {
     return std::make_shared<WriteBufferToRabbitMQProducer>(
-        parsed_address, global_context, login_password, routing_keys, exchange_name, exchange_type,
+        parsed_address, global_context, login_password, vhost, routing_keys, exchange_name, exchange_type,
         producer_id.fetch_add(1), persistent, wait_confirm, log,
         row_delimiter ? std::optional<char>{row_delimiter} : std::nullopt, 1, 1024);
 }
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h
index 893c5167a97..aa316e7a842 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.h
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h
@@ -94,6 +94,7 @@ private:
     String address;
     std::pair<String, UInt16> parsed_address;
     std::pair<String, String> login_password;
+    String vhost;
 
     std::unique_ptr<uv_loop_t> loop;
     std::shared_ptr<RabbitMQHandler> event_handler;
diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
index 08b95d46115..ac1b253b4bb 100644
--- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
+++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp
@@ -29,6 +29,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer(
         std::pair<String, UInt16> & parsed_address_,
         const Context & global_context,
         const std::pair<String, String> & login_password_,
+        const String & vhost_,
         const Names & routing_keys_,
         const String & exchange_name_,
         const AMQP::ExchangeType exchange_type_,
@@ -42,6 +43,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer(
         : WriteBuffer(nullptr, 0)
         , parsed_address(parsed_address_)
         , login_password(login_password_)
+        , vhost(vhost_)
         , routing_keys(routing_keys_)
         , exchange_name(exchange_name_)
         , exchange_type(exchange_type_)
@@ -149,7 +151,9 @@ bool WriteBufferToRabbitMQProducer::setupConnection(bool reconnecting)
     }
 
     connection = std::make_unique<AMQP::TcpConnection>(event_handler.get(),
-            AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/"));
+            AMQP::Address(
+                parsed_address.first, parsed_address.second,
+                AMQP::Login(login_password.first, login_password.second), vhost));
 
     cnt_retries = 0;
     while (!connection->ready() && ++cnt_retries != RETRIES_MAX)
diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
index 2897e20b21d..e88f92239ca 100644
--- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
+++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h
@@ -21,6 +21,7 @@ public:
             std::pair<String, UInt16> & parsed_address_,
             const Context & global_context,
             const std::pair<String, String> & login_password_,
+            const String & vhost_,
             const Names & routing_keys_,
             const String & exchange_name_,
             const AMQP::ExchangeType exchange_type_,
@@ -53,6 +54,7 @@ private:
 
     std::pair<String, UInt16> parsed_address;
     const std::pair<String, String> login_password;
+    const String vhost;
     const Names routing_keys;
     const String exchange_name;
     AMQP::ExchangeType exchange_type;

From c809af5dc251cd4087002534ffab9f08dbd63daa Mon Sep 17 00:00:00 2001
From: tison <wander4096@gmail.com>
Date: Wed, 17 Feb 2021 12:56:57 +0800
Subject: [PATCH 1120/1238] ignore data store files

---
 .gitignore | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.gitignore b/.gitignore
index 1e9765dca9e..d33dbf0600d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -137,3 +137,9 @@ website/package-lock.json
 /prof
 
 *.iml
+
+# data store
+/programs/server/data
+/programs/server/metadata
+/programs/server/store
+

From fa200160915ee9c187e5e64a4a1e395d70430b7f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Wed, 17 Feb 2021 09:53:18 +0300
Subject: [PATCH 1121/1238] Enable distributed_aggregation_memory_efficient by
 default

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 9bb9ad30f15..6c05d247037 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -100,7 +100,7 @@ class IColumn;
     M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
     M(UInt64, group_by_two_level_threshold, 100000, "From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.", 0) \
     M(UInt64, group_by_two_level_threshold_bytes, 100000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.", 0) \
-    M(Bool, distributed_aggregation_memory_efficient, false, "Is the memory-saving mode of distributed aggregation enabled.", 0) \
+    M(Bool, distributed_aggregation_memory_efficient, true, "Is the memory-saving mode of distributed aggregation enabled.", 0) \
     M(UInt64, aggregation_memory_efficient_merge_threads, 0, "Number of threads to use for merge intermediate aggregation results in memory efficient mode. When bigger, then more memory is consumed. 0 means - same as 'max_threads'.", 0) \
     \
     M(UInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled.", 0) \

From 5f88f5817f4a348051e7aeaa93b8bdb589b8805a Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Wed, 17 Feb 2021 11:23:24 +0300
Subject: [PATCH 1122/1238] Rename untyped function reinterpretAs into
 reinterpret

---
 src/Functions/reinterpretAs.cpp               | 50 +++++++++----------
 .../01676_reinterpret_as.reference            |  6 +--
 .../0_stateless/01676_reinterpret_as.sql      | 42 ++++++++--------
 3 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp
index 363455cb38f..1d105f4ce38 100644
--- a/src/Functions/reinterpretAs.cpp
+++ b/src/Functions/reinterpretAs.cpp
@@ -39,12 +39,12 @@ namespace
  * 3. Types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString,
  * String, and types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID).
  */
-class FunctionReinterpretAs : public IFunction
+class FunctionReinterpret : public IFunction
 {
 public:
-    static constexpr auto name = "reinterpretAs";
+    static constexpr auto name = "reinterpret";
 
-    static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAs>(); }
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpret>(); }
 
     String getName() const override { return name; }
 
@@ -308,11 +308,11 @@ private:
 };
 
 template <typename ToDataType, typename Name>
-class FunctionReinterpretAsTyped : public IFunction
+class FunctionReinterpretAs : public IFunction
 {
 public:
     static constexpr auto name = Name::name;
-    static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAsTyped>(); }
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAs>(); }
 
     String getName() const override { return name; }
 
@@ -365,7 +365,7 @@ public:
         return impl.executeImpl(arguments_with_type, return_type, input_rows_count);
     }
 
-    FunctionReinterpretAs impl;
+    FunctionReinterpret impl;
 };
 
 struct NameReinterpretAsUInt8       { static constexpr auto name = "reinterpretAsUInt8"; };
@@ -387,26 +387,26 @@ struct NameReinterpretAsUUID        { static constexpr auto name = "reinterpretA
 struct NameReinterpretAsString      { static constexpr auto name = "reinterpretAsString"; };
 struct NameReinterpretAsFixedString { static constexpr auto name = "reinterpretAsFixedString"; };
 
-using FunctionReinterpretAsUInt8 = FunctionReinterpretAsTyped<DataTypeUInt8, NameReinterpretAsUInt8>;
-using FunctionReinterpretAsUInt16 = FunctionReinterpretAsTyped<DataTypeUInt16, NameReinterpretAsUInt16>;
-using FunctionReinterpretAsUInt32 = FunctionReinterpretAsTyped<DataTypeUInt32, NameReinterpretAsUInt32>;
-using FunctionReinterpretAsUInt64 = FunctionReinterpretAsTyped<DataTypeUInt64, NameReinterpretAsUInt64>;
-using FunctionReinterpretAsUInt256 = FunctionReinterpretAsTyped<DataTypeUInt256, NameReinterpretAsUInt256>;
-using FunctionReinterpretAsInt8 = FunctionReinterpretAsTyped<DataTypeInt8, NameReinterpretAsInt8>;
-using FunctionReinterpretAsInt16 = FunctionReinterpretAsTyped<DataTypeInt16, NameReinterpretAsInt16>;
-using FunctionReinterpretAsInt32 = FunctionReinterpretAsTyped<DataTypeInt32, NameReinterpretAsInt32>;
-using FunctionReinterpretAsInt64 = FunctionReinterpretAsTyped<DataTypeInt64, NameReinterpretAsInt64>;
-using FunctionReinterpretAsInt128 = FunctionReinterpretAsTyped<DataTypeInt128, NameReinterpretAsInt128>;
-using FunctionReinterpretAsInt256 = FunctionReinterpretAsTyped<DataTypeInt256, NameReinterpretAsInt256>;
-using FunctionReinterpretAsFloat32 = FunctionReinterpretAsTyped<DataTypeFloat32, NameReinterpretAsFloat32>;
-using FunctionReinterpretAsFloat64 = FunctionReinterpretAsTyped<DataTypeFloat64, NameReinterpretAsFloat64>;
-using FunctionReinterpretAsDate = FunctionReinterpretAsTyped<DataTypeDate, NameReinterpretAsDate>;
-using FunctionReinterpretAsDateTime = FunctionReinterpretAsTyped<DataTypeDateTime, NameReinterpretAsDateTime>;
-using FunctionReinterpretAsUUID = FunctionReinterpretAsTyped<DataTypeUUID, NameReinterpretAsUUID>;
+using FunctionReinterpretAsUInt8 = FunctionReinterpretAs<DataTypeUInt8, NameReinterpretAsUInt8>;
+using FunctionReinterpretAsUInt16 = FunctionReinterpretAs<DataTypeUInt16, NameReinterpretAsUInt16>;
+using FunctionReinterpretAsUInt32 = FunctionReinterpretAs<DataTypeUInt32, NameReinterpretAsUInt32>;
+using FunctionReinterpretAsUInt64 = FunctionReinterpretAs<DataTypeUInt64, NameReinterpretAsUInt64>;
+using FunctionReinterpretAsUInt256 = FunctionReinterpretAs<DataTypeUInt256, NameReinterpretAsUInt256>;
+using FunctionReinterpretAsInt8 = FunctionReinterpretAs<DataTypeInt8, NameReinterpretAsInt8>;
+using FunctionReinterpretAsInt16 = FunctionReinterpretAs<DataTypeInt16, NameReinterpretAsInt16>;
+using FunctionReinterpretAsInt32 = FunctionReinterpretAs<DataTypeInt32, NameReinterpretAsInt32>;
+using FunctionReinterpretAsInt64 = FunctionReinterpretAs<DataTypeInt64, NameReinterpretAsInt64>;
+using FunctionReinterpretAsInt128 = FunctionReinterpretAs<DataTypeInt128, NameReinterpretAsInt128>;
+using FunctionReinterpretAsInt256 = FunctionReinterpretAs<DataTypeInt256, NameReinterpretAsInt256>;
+using FunctionReinterpretAsFloat32 = FunctionReinterpretAs<DataTypeFloat32, NameReinterpretAsFloat32>;
+using FunctionReinterpretAsFloat64 = FunctionReinterpretAs<DataTypeFloat64, NameReinterpretAsFloat64>;
+using FunctionReinterpretAsDate = FunctionReinterpretAs<DataTypeDate, NameReinterpretAsDate>;
+using FunctionReinterpretAsDateTime = FunctionReinterpretAs<DataTypeDateTime, NameReinterpretAsDateTime>;
+using FunctionReinterpretAsUUID = FunctionReinterpretAs<DataTypeUUID, NameReinterpretAsUUID>;
 
-using FunctionReinterpretAsString = FunctionReinterpretAsTyped<DataTypeString, NameReinterpretAsString>;
+using FunctionReinterpretAsString = FunctionReinterpretAs<DataTypeString, NameReinterpretAsString>;
 
-using FunctionReinterpretAsFixedString = FunctionReinterpretAsTyped<DataTypeFixedString, NameReinterpretAsFixedString>;
+using FunctionReinterpretAsFixedString = FunctionReinterpretAs<DataTypeFixedString, NameReinterpretAsFixedString>;
 
 }
 
@@ -433,7 +433,7 @@ void registerFunctionsReinterpretAs(FunctionFactory & factory)
 
     factory.registerFunction<FunctionReinterpretAsFixedString>();
 
-    factory.registerFunction<FunctionReinterpretAs>();
+    factory.registerFunction<FunctionReinterpret>();
 }
 
 }
diff --git a/tests/queries/0_stateless/01676_reinterpret_as.reference b/tests/queries/0_stateless/01676_reinterpret_as.reference
index bbde2d5ed57..f7ca2bbedfa 100644
--- a/tests/queries/0_stateless/01676_reinterpret_as.reference
+++ b/tests/queries/0_stateless/01676_reinterpret_as.reference
@@ -25,6 +25,6 @@ Integer and Float types
 0.2	1045220557
 0.2	4596373779694328218
 Integer and String types
-1	49
-1	49
-11	12593
+1	1	49
+1	1	49
+11	11	12593
diff --git a/tests/queries/0_stateless/01676_reinterpret_as.sql b/tests/queries/0_stateless/01676_reinterpret_as.sql
index 88dc6437043..cc5dba1e110 100644
--- a/tests/queries/0_stateless/01676_reinterpret_as.sql
+++ b/tests/queries/0_stateless/01676_reinterpret_as.sql
@@ -1,30 +1,30 @@
 SELECT 'Into String';
-SELECT reinterpretAs(49, 'String');
+SELECT reinterpret(49, 'String');
 SELECT 'Into FixedString';
-SELECT reinterpretAs(49, 'FixedString(1)');
-SELECT reinterpretAs(49, 'FixedString(2)');
-SELECT reinterpretAs(49, 'FixedString(3)');
-SELECT reinterpretAs(49, 'FixedString(4)');
+SELECT reinterpret(49, 'FixedString(1)');
+SELECT reinterpret(49, 'FixedString(2)');
+SELECT reinterpret(49, 'FixedString(3)');
+SELECT reinterpret(49, 'FixedString(4)');
 SELECT reinterpretAsFixedString(49);
 SELECT 'Into Numeric Representable';
 SELECT 'Integer and Integer types';
-SELECT reinterpretAs(257, 'UInt8'), reinterpretAsUInt8(257);
-SELECT reinterpretAs(257, 'Int8'), reinterpretAsInt8(257);
-SELECT reinterpretAs(257, 'UInt16'), reinterpretAsUInt16(257);
-SELECT reinterpretAs(257, 'Int16'), reinterpretAsInt16(257);
-SELECT reinterpretAs(257, 'UInt32'), reinterpretAsUInt32(257);
-SELECT reinterpretAs(257, 'Int32'), reinterpretAsInt32(257);
-SELECT reinterpretAs(257, 'UInt64'), reinterpretAsUInt64(257);
-SELECT reinterpretAs(257, 'Int64'), reinterpretAsInt64(257);
-SELECT reinterpretAs(257, 'Int128'), reinterpretAsInt128(257);
-SELECT reinterpretAs(257, 'UInt256'), reinterpretAsUInt256(257);
-SELECT reinterpretAs(257, 'Int256'), reinterpretAsInt256(257);
+SELECT reinterpret(257, 'UInt8'), reinterpretAsUInt8(257);
+SELECT reinterpret(257, 'Int8'), reinterpretAsInt8(257);
+SELECT reinterpret(257, 'UInt16'), reinterpretAsUInt16(257);
+SELECT reinterpret(257, 'Int16'), reinterpretAsInt16(257);
+SELECT reinterpret(257, 'UInt32'), reinterpretAsUInt32(257);
+SELECT reinterpret(257, 'Int32'), reinterpretAsInt32(257);
+SELECT reinterpret(257, 'UInt64'), reinterpretAsUInt64(257);
+SELECT reinterpret(257, 'Int64'), reinterpretAsInt64(257);
+SELECT reinterpret(257, 'Int128'), reinterpretAsInt128(257);
+SELECT reinterpret(257, 'UInt256'), reinterpretAsUInt256(257);
+SELECT reinterpret(257, 'Int256'), reinterpretAsInt256(257);
 SELECT 'Integer and Float types';
-SELECT reinterpretAs(toFloat32(0.2), 'UInt32'), reinterpretAsUInt32(toFloat32(0.2));
-SELECT reinterpretAs(toFloat64(0.2), 'UInt64'), reinterpretAsUInt64(toFloat64(0.2));
+SELECT reinterpret(toFloat32(0.2), 'UInt32'), reinterpretAsUInt32(toFloat32(0.2));
+SELECT reinterpret(toFloat64(0.2), 'UInt64'), reinterpretAsUInt64(toFloat64(0.2));
 SELECT reinterpretAsFloat32(a), reinterpretAsUInt32(toFloat32(0.2)) as a;
 SELECT reinterpretAsFloat64(a), reinterpretAsUInt64(toFloat64(0.2)) as a;
 SELECT 'Integer and String types';
-SELECT reinterpretAsString(a), reinterpretAsUInt8('1') as a;
-SELECT reinterpretAsString(a), reinterpretAsUInt8('11') as a;
-SELECT reinterpretAsString(a), reinterpretAsUInt16('11') as a;
+SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('1') as a;
+SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('11') as a;
+SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt16('11') as a;

From 3b40099578b474cc2ba26980148c666edb55c3c5 Mon Sep 17 00:00:00 2001
From: feng lv <fenglv15@mails.ucas.ac.cn>
Date: Wed, 17 Feb 2021 08:26:52 +0000
Subject: [PATCH 1123/1238] fix subquery with limit

---
 src/Interpreters/InterpreterSelectQuery.cpp     | 17 +++++++++++++++--
 .../01720_union_distinct_with_limit.reference   |  1 +
 .../01720_union_distinct_with_limit.sql         |  8 ++++++++
 3 files changed, 24 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/01720_union_distinct_with_limit.reference
 create mode 100644 tests/queries/0_stateless/01720_union_distinct_with_limit.sql

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 84de6fa4e6c..a325a8d3328 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -784,9 +784,22 @@ static bool hasWithTotalsInAnySubqueryInFromClause(const ASTSelectQuery & query)
     {
         if (const auto * ast_union = query_table->as<ASTSelectWithUnionQuery>())
         {
+            ///NOTE: Child of subquery can be ASTSelectWithUnionQuery or ASTSelectQuery,
+            /// and after normalization, the height of the AST tree is at most 2
             for (const auto & elem : ast_union->list_of_selects->children)
-                if (hasWithTotalsInAnySubqueryInFromClause(elem->as<ASTSelectQuery &>()))
-                    return true;
+            {
+                if (const auto * child_union = elem->as<ASTSelectWithUnionQuery>())
+                {
+                    for (const auto & child_elem : child_union->list_of_selects->children)
+                        if (hasWithTotalsInAnySubqueryInFromClause(child_elem->as<ASTSelectQuery &>()))
+                            return true;
+                }
+                else
+                {
+                    if (hasWithTotalsInAnySubqueryInFromClause(elem->as<ASTSelectQuery &>()))
+                        return true;
+                }
+            }
         }
     }
 
diff --git a/tests/queries/0_stateless/01720_union_distinct_with_limit.reference b/tests/queries/0_stateless/01720_union_distinct_with_limit.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01720_union_distinct_with_limit.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01720_union_distinct_with_limit.sql b/tests/queries/0_stateless/01720_union_distinct_with_limit.sql
new file mode 100644
index 00000000000..9fc5b3eafd2
--- /dev/null
+++ b/tests/queries/0_stateless/01720_union_distinct_with_limit.sql
@@ -0,0 +1,8 @@
+SELECT x
+FROM
+(
+    SELECT 1 AS x
+    UNION DISTINCT
+    SELECT 1
+)
+LIMIT 1;

From e52cc1ac1fe7b3c937cc16d75dbcf623fca86c2c Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Wed, 17 Feb 2021 11:31:20 +0300
Subject: [PATCH 1124/1238] Updated documentation

---
 .../functions/type-conversion-functions.md    | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 3ca36f41c78..6bc274eba73 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -303,7 +303,7 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
 └────────────┴───────┘
 ```
 
-## reinterpretAs(x, T) {#type_conversion_function-cast}
+## reinterpret(x, T) {#type_conversion_function-reinterpret}
 
 Performs byte reinterpretation of ‘x’ as ‘t’ data type.
 
@@ -313,9 +313,9 @@ Following reinterpretations are allowed:
 3. FixedString, String, types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString,
 
 ``` sql
-SELECT reinterpretAs(toInt8(-1), 'UInt8') as int_to_uint,
-    reinterpretAs(toInt8(1), 'Float32') as int_to_float,
-    reinterpretAs('1', 'UInt32') as string_to_int;
+SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
+    reinterpret(toInt8(1), 'Float32') as int_to_float,
+    reinterpret('1', 'UInt32') as string_to_int;
 ```
 
 ``` text
@@ -324,23 +324,23 @@ SELECT reinterpretAs(toInt8(-1), 'UInt8') as int_to_uint,
 └─────────────┴──────────────┴───────────────┘
 ```
 
-## reinterpretAsUInt(8\|16\|32\|64\|256) {#reinterpretasuint8163264256}
+## reinterpretAsUInt(8\|16\|32\|64\|256) {#type_conversion_function-reinterpretAsUInt8163264256}
 
-## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#reinterpretasint8163264128256}
+## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#type_conversion_function-reinterpretAsInt8163264128256}
 
-## reinterpretAsFloat(32\|64) {#reinterpretasfloat3264}
+## reinterpretAsFloat(32\|64) {##type_conversion_function-reinterpretAsFloat}
 
-## reinterpretAsDate {#reinterpretasdate}
+## reinterpretAsDate {#type_conversion_function-reinterpretAsDate}
 
-## reinterpretAsDateTime {#reinterpretasdatetime}
+## reinterpretAsDateTime {#type_conversion_function-reinterpretAsDateTime}
 
-## reinterpretAsString {#type_conversion_functions-reinterpretAsString}
+## reinterpretAsString {#type_conversion_function-reinterpretAsString}
 
-## reinterpretAsFixedString {#reinterpretasfixedstring}
+## reinterpretAsFixedString {#type_conversion_function-reinterpretAsFixedString}
 
-## reinterpretAsUUID {#reinterpretasuuid}
+## reinterpretAsUUID {#type_conversion_function-reinterpretAsUUID}
 
-These functions are aliases for `reinterpretAs`function.
+These functions are aliases for `reinterpret` function.
 
 ## CAST(x, T) {#type_conversion_function-cast}
 
@@ -401,7 +401,7 @@ bounds of type T.
 
 Example
 ``` sql
-SELECT cast(-1, 'UInt8') as uint8; 
+SELECT cast(-1, 'UInt8') as uint8;
 ```
 
 
@@ -422,7 +422,7 @@ Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in c
 
 ## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
 
-Converts ‘x’ to the ‘t’ data type. Always returns nullable type and returns NULL 
+Converts ‘x’ to the ‘t’ data type. Always returns nullable type and returns NULL
 if the casted value is not representable in the target type.
 
 Example:
@@ -817,9 +817,9 @@ SELECT fromUnixTimestamp64Milli(i64, 'UTC')
 
 ## formatRow {#formatrow}
 
-Converts arbitrary expressions into a string via given format. 
+Converts arbitrary expressions into a string via given format.
 
-**Syntax** 
+**Syntax**
 
 ``` sql
 formatRow(format, x, y, ...)
@@ -860,7 +860,7 @@ Result:
 
 Converts arbitrary expressions into a string via given format. The function trims the last `\n` if any.
 
-**Syntax** 
+**Syntax**
 
 ``` sql
 formatRowNoNewline(format, x, y, ...)

From dd02106a08a5e02620cc9028cb04a2e8ad0b07a9 Mon Sep 17 00:00:00 2001
From: tavplubix <avtokmakov@yandex-team.ru>
Date: Wed, 17 Feb 2021 12:01:41 +0300
Subject: [PATCH 1125/1238] Update run.sh

---
 docker/test/stress/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 323e0be4d4b..88a633ac488 100755
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -64,7 +64,7 @@ clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
 clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
 clickhouse-client --query "SHOW TABLES FROM test"
 
-./stress --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" && echo "OK" > /test_output/script_exit_code.txt || echo "FAIL" > /test_output/script_exit_code.txt
+./stress --hung-check --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" && echo "OK" > /test_output/script_exit_code.txt || echo "FAIL" > /test_output/script_exit_code.txt
 
 stop
 start

From c608fa1e6a3539f74e8956e441e4f68b99367982 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Wed, 17 Feb 2021 12:53:12 +0300
Subject: [PATCH 1126/1238] Added error reinterpretation tests

---
 src/Functions/reinterpretAs.cpp                          | 4 ++++
 tests/queries/0_stateless/01676_reinterpret_as.reference | 1 +
 tests/queries/0_stateless/01676_reinterpret_as.sql       | 4 ++++
 3 files changed, 9 insertions(+)

diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp
index 1d105f4ce38..c15ba969fdb 100644
--- a/src/Functions/reinterpretAs.cpp
+++ b/src/Functions/reinterpretAs.cpp
@@ -93,6 +93,10 @@ public:
                     + " because only Numeric, String or FixedString can be reinterpreted in Numeric",
                     ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
         }
+        else
+            throw Exception("Cannot reinterpret " + from_type->getName() + " as " + to_type->getName()
+                    + " because only reinterpretation in String, FixedString and Numeric types is supported",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         return to_type;
     }
diff --git a/tests/queries/0_stateless/01676_reinterpret_as.reference b/tests/queries/0_stateless/01676_reinterpret_as.reference
index f7ca2bbedfa..b39deb55a7f 100644
--- a/tests/queries/0_stateless/01676_reinterpret_as.reference
+++ b/tests/queries/0_stateless/01676_reinterpret_as.reference
@@ -28,3 +28,4 @@ Integer and String types
 1	1	49
 1	1	49
 11	11	12593
+ReinterpretErrors
diff --git a/tests/queries/0_stateless/01676_reinterpret_as.sql b/tests/queries/0_stateless/01676_reinterpret_as.sql
index cc5dba1e110..ff727f284bb 100644
--- a/tests/queries/0_stateless/01676_reinterpret_as.sql
+++ b/tests/queries/0_stateless/01676_reinterpret_as.sql
@@ -28,3 +28,7 @@ SELECT 'Integer and String types';
 SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('1') as a;
 SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('11') as a;
 SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt16('11') as a;
+SELECT 'ReinterpretErrors';
+SELECT reinterpret(toDecimal64(1, 2), 'UInt8'); -- {serverError 43}
+SELECT reinterpret('123', 'FixedString(1)'); -- {serverError 43}
+SELECT reinterpret(toDateTime('9922337203.6854775808', 1), 'Decimal64(1)'); -- {serverError 43}

From b2c09f002f592a2bec866ff7e698aa0f0a89ff57 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Wed, 17 Feb 2021 15:26:00 +0300
Subject: [PATCH 1127/1238] Dictionary create source with functions crash fix

---
 .../getDictionaryConfigurationFromAST.cpp     |  6 +++-
 ...ary_create_source_with_functions.reference |  1 +
 ...ictionary_create_source_with_functions.sql | 28 +++++++++++++++++++
 3 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01720_dictionary_create_source_with_functions.reference
 create mode 100644 tests/queries/0_stateless/01720_dictionary_create_source_with_functions.sql

diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
index 2d4f971ef58..acfb11787de 100644
--- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
+++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
@@ -401,10 +401,14 @@ void buildConfigurationFromFunctionWithKeyValueArguments(
         {
             auto builder = FunctionFactory::instance().tryGet(func->name, context);
             auto function = builder->build({});
-            auto result = function->execute({}, {}, 0);
+            function->prepare({});
+
+            size_t input_rows_count = 1;
+            auto result = function->execute({}, function->getResultType(), input_rows_count);
 
             Field value;
             result->get(0, value);
+
             AutoPtr<Text> text_value(doc->createTextNode(getFieldAsString(value)));
             current_xml_element->appendChild(text_value);
         }
diff --git a/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.reference b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.reference
new file mode 100644
index 00000000000..38abe3c9f52
--- /dev/null
+++ b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.reference
@@ -0,0 +1 @@
+1	First
diff --git a/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.sql b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.sql
new file mode 100644
index 00000000000..a0a4fbbfab9
--- /dev/null
+++ b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.sql
@@ -0,0 +1,28 @@
+DROP DATABASE IF EXISTS 01720_dictionary_db;
+CREATE DATABASE 01720_dictionary_db;
+
+CREATE TABLE 01720_dictionary_db.dictionary_source_table
+(
+	key UInt8,
+    value String
+)
+ENGINE = TinyLog;
+
+INSERT INTO 01720_dictionary_db.dictionary_source_table VALUES (1, 'First');
+
+CREATE DICTIONARY 01720_dictionary_db.dictionary
+(
+    key UInt64,
+    value String
+)
+PRIMARY KEY key
+SOURCE(CLICKHOUSE(DB '01720_dictionary_db' TABLE 'dictionary_source_table' HOST hostName() PORT tcpPort()))
+LIFETIME(0)
+LAYOUT(FLAT());
+
+SELECT * FROM 01720_dictionary_db.dictionary;
+
+DROP DICTIONARY 01720_dictionary_db.dictionary;
+DROP TABLE 01720_dictionary_db.dictionary_source_table;
+
+DROP DATABASE 01720_dictionary_db;

From e0980fd0b73b5c819b6206292c0334f11e6d8e11 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 17 Feb 2021 17:41:21 +0300
Subject: [PATCH 1128/1238] Fix fasttest retry for failed tests

---
 docker/test/fasttest/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index e6294b5d74d..90663102f17 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -344,7 +344,7 @@ function run_tests
         01666_blns
     )
 
-    time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
+    (time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
 
     # substr is to remove semicolon after test name
     readarray -t FAILED_TESTS < <(awk '/\[ FAIL|TIMEOUT|ERROR \]/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")

From 42c22475e31a1a94731825987d7ef6c77f22ecbc Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Wed, 17 Feb 2021 18:55:24 +0300
Subject: [PATCH 1129/1238] Don't backport base commit of branch in the same
 branch (#20628)

---
 utils/github/backport.py | 2 +-
 utils/github/local.py    | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/utils/github/backport.py b/utils/github/backport.py
index 576e3b069c2..7fddbbee241 100644
--- a/utils/github/backport.py
+++ b/utils/github/backport.py
@@ -62,7 +62,7 @@ class Backport:
         RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$')
         RE_BACKPORTED = re.compile(r'^v(\d+\.\d+)-backported$')
 
-        # pull-requests are sorted by ancestry from the least recent.
+        # pull-requests are sorted by ancestry from the most recent.
         for pr in pull_requests:
             while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']):
                 logging.info("PR #{} is already inside {}. Dropping this branch for further PRs".format(pr['number'], branches[-1][0]))
diff --git a/utils/github/local.py b/utils/github/local.py
index a997721bc76..2ad8d4b8b71 100644
--- a/utils/github/local.py
+++ b/utils/github/local.py
@@ -6,15 +6,15 @@ import os
 import re
 
 
-class RepositoryBase(object):
+class RepositoryBase:
     def __init__(self, repo_path):
         import git
 
         self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path))
 
-        # commit comparator
+        # comparator of commits
         def cmp(x, y):
-            if x == y:
+            if str(x) == str(y):
                 return 0
             if self._repo.is_ancestor(x, y):
                 return -1

From 50e135db0f925b33d44be562af3cc71dabdf8daf Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 17 Feb 2021 19:24:04 +0300
Subject: [PATCH 1130/1238] Added comment

---
 src/Dictionaries/getDictionaryConfigurationFromAST.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
index acfb11787de..04ba1db09fc 100644
--- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
+++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
@@ -403,6 +403,8 @@ void buildConfigurationFromFunctionWithKeyValueArguments(
             auto function = builder->build({});
             function->prepare({});
 
+            /// We assume that function will not take arguments and will return constant value like tcpPort or hostName
+            /// Such functions will return column with size equal to input_rows_count.
             size_t input_rows_count = 1;
             auto result = function->execute({}, function->getResultType(), input_rows_count);
 

From c704a8cc45a298f363c9b5de2349ca8dcdd45d1f Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Wed, 17 Feb 2021 20:05:52 +0300
Subject: [PATCH 1131/1238] Log stdout and stderr when failed to start docker
 in integration tests.

---
 tests/integration/helpers/cluster.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 14aa2f252c5..aaba3a34555 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -730,7 +730,7 @@ class ClickHouseCluster:
 
             clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate']
             print(("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd))))
-            subprocess.check_output(clickhouse_start_cmd)
+            subprocess_check_call(clickhouse_start_cmd)
             print("ClickHouse instance created")
 
             start_deadline = time.time() + 20.0  # seconds

From 18e036d19b1402007c2e5806c89ce435ced96517 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitbar@yandex-team.ru>
Date: Mon, 11 Jan 2021 04:50:30 +0300
Subject: [PATCH 1132/1238] Improved serialization for data types combined of
 Arrays and Tuples. Improved matching enum data types to protobuf enum type.
 Fixed serialization of the Map data type. Omitted values are now set by
 default.

---
 docker/test/stateless/Dockerfile              |    1 +
 src/Columns/ColumnFixedString.cpp             |   14 +
 src/Columns/ColumnFixedString.h               |    3 +-
 src/Common/ErrorCodes.cpp                     |    6 +-
 src/DataTypes/DataTypeAggregateFunction.cpp   |   41 -
 src/DataTypes/DataTypeAggregateFunction.h     |    2 -
 src/DataTypes/DataTypeArray.cpp               |   50 -
 src/DataTypes/DataTypeArray.h                 |    9 -
 src/DataTypes/DataTypeDate.cpp                |   26 -
 src/DataTypes/DataTypeDate.h                  |    2 -
 src/DataTypes/DataTypeDateTime.cpp            |   28 -
 src/DataTypes/DataTypeDateTime.h              |    2 -
 src/DataTypes/DataTypeDateTime64.cpp          |   26 -
 src/DataTypes/DataTypeDateTime64.h            |    2 -
 src/DataTypes/DataTypeDecimalBase.cpp         |    2 -
 src/DataTypes/DataTypeEnum.cpp                |   30 -
 src/DataTypes/DataTypeEnum.h                  |    3 -
 src/DataTypes/DataTypeFixedString.cpp         |   61 +-
 src/DataTypes/DataTypeFixedString.h           |    3 -
 src/DataTypes/DataTypeLowCardinality.cpp      |   25 -
 src/DataTypes/DataTypeLowCardinality.h        |    2 -
 src/DataTypes/DataTypeMap.cpp                 |   10 -
 src/DataTypes/DataTypeMap.h                   |    5 +-
 src/DataTypes/DataTypeNullable.cpp            |   27 -
 src/DataTypes/DataTypeNullable.h              |    3 -
 src/DataTypes/DataTypeNumberBase.cpp          |   30 -
 src/DataTypes/DataTypeNumberBase.h            |    3 -
 src/DataTypes/DataTypeString.cpp              |   51 -
 src/DataTypes/DataTypeString.h                |    3 -
 src/DataTypes/DataTypeTuple.cpp               |   27 -
 src/DataTypes/DataTypeTuple.h                 |    3 -
 src/DataTypes/DataTypeUUID.cpp                |   26 -
 src/DataTypes/DataTypeUUID.h                  |    2 -
 src/DataTypes/DataTypesDecimal.cpp            |   29 -
 src/DataTypes/DataTypesDecimal.h              |    3 -
 src/DataTypes/IDataType.h                     |    7 -
 src/DataTypes/IDataTypeDummy.h                |    2 -
 src/Formats/FormatSettings.h                  |    3 +-
 src/Formats/ProtobufColumnMatcher.cpp         |   55 -
 src/Formats/ProtobufColumnMatcher.h           |  196 --
 src/Formats/ProtobufReader.cpp                |  945 +-----
 src/Formats/ProtobufReader.h                  |  294 +-
 src/Formats/ProtobufSerializer.cpp            | 2921 +++++++++++++++++
 src/Formats/ProtobufSerializer.h              |   52 +
 src/Formats/ProtobufWriter.cpp                |  843 +----
 src/Formats/ProtobufWriter.h                  |  322 +-
 src/Formats/ya.make                           |    2 +-
 .../Formats/Impl/ProtobufRowInputFormat.cpp   |   73 +-
 .../Formats/Impl/ProtobufRowInputFormat.h     |   13 +-
 .../Formats/Impl/ProtobufRowOutputFormat.cpp  |   71 +-
 .../Formats/Impl/ProtobufRowOutputFormat.h    |   29 +-
 src/Storages/Kafka/KafkaBlockOutputStream.cpp |    2 +-
 .../RabbitMQ/RabbitMQBlockOutputStream.cpp    |    2 +-
 .../00825_protobuf_format_array_3dim.proto    |   14 +
 ...00825_protobuf_format_array_3dim.reference |   52 +
 .../00825_protobuf_format_array_3dim.sh       |   35 +
 ...0825_protobuf_format_array_of_arrays.proto |    9 +
 ..._protobuf_format_array_of_arrays.reference |   41 +
 .../00825_protobuf_format_array_of_arrays.sh  |   38 +
 .../00825_protobuf_format_enum_mapping.proto  |   13 +
 ...825_protobuf_format_enum_mapping.reference |   31 +
 .../00825_protobuf_format_enum_mapping.sh     |   37 +
 .../00825_protobuf_format_map.proto           |    5 +
 .../00825_protobuf_format_map.reference       |   19 +
 .../0_stateless/00825_protobuf_format_map.sh  |   40 +
 ...0825_protobuf_format_nested_optional.proto |   10 +
 ..._protobuf_format_nested_optional.reference |   25 +
 .../00825_protobuf_format_nested_optional.sh  |   41 +
 .../00825_protobuf_format_table_default.proto |    6 +
 ...25_protobuf_format_table_default.reference |   37 +
 .../00825_protobuf_format_table_default.sh    |   38 +
 .../protobuf_length_delimited_encoder.py      |  180 +
 tests/queries/skip_list.json                  |    6 +
 73 files changed, 3990 insertions(+), 3079 deletions(-)
 delete mode 100644 src/Formats/ProtobufColumnMatcher.cpp
 delete mode 100644 src/Formats/ProtobufColumnMatcher.h
 create mode 100644 src/Formats/ProtobufSerializer.cpp
 create mode 100644 src/Formats/ProtobufSerializer.h
 create mode 100644 tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto
 create mode 100644 tests/queries/0_stateless/00825_protobuf_format_array_3dim.reference
 create mode 100755 tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
 create mode 100644 tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto
 create mode 100644 tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.reference
 create mode 100755 tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
 create mode 100644 tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto
 create mode 100644 tests/queries/0_stateless/00825_protobuf_format_enum_mapping.reference
 create mode 100755 tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
 create mode 100644 tests/queries/0_stateless/00825_protobuf_format_map.proto
 create mode 100644 tests/queries/0_stateless/00825_protobuf_format_map.reference
 create mode 100755 tests/queries/0_stateless/00825_protobuf_format_map.sh
 create mode 100644 tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto
 create mode 100644 tests/queries/0_stateless/00825_protobuf_format_nested_optional.reference
 create mode 100755 tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
 create mode 100644 tests/queries/0_stateless/00825_protobuf_format_table_default.proto
 create mode 100644 tests/queries/0_stateless/00825_protobuf_format_table_default.reference
 create mode 100755 tests/queries/0_stateless/00825_protobuf_format_table_default.sh
 create mode 100755 tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py

diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile
index b063f8d81f6..10b213803c9 100644
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@@ -13,6 +13,7 @@ RUN apt-get update -y \
             ncdu \
             netcat-openbsd \
             openssl \
+            protobuf-compiler \
             python3 \
             python3-lxml \
             python3-requests \
diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp
index 55e387ff2ee..6cfec89a5dc 100644
--- a/src/Columns/ColumnFixedString.cpp
+++ b/src/Columns/ColumnFixedString.cpp
@@ -446,4 +446,18 @@ void ColumnFixedString::getExtremes(Field & min, Field & max) const
     get(max_idx, max);
 }
 
+void ColumnFixedString::alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size)
+{
+    size_t length = data.size() - old_size;
+    if (length < n)
+    {
+        data.resize_fill(old_size + n);
+    }
+    else if (length > n)
+    {
+        data.resize_assume_reserved(old_size);
+        throw Exception("Too large value for FixedString(" + std::to_string(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE);
+    }
+}
+
 }
diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h
index 286b3a752dc..24a99c27b13 100644
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@@ -182,7 +182,8 @@ public:
     const Chars & getChars() const { return chars; }
 
     size_t getN() const { return n; }
+
+    static void alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size);
 };
 
-
 }
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index d0d83448b68..52c22c2e371 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -404,7 +404,7 @@
     M(432, UNKNOWN_CODEC) \
     M(433, ILLEGAL_CODEC_PARAMETER) \
     M(434, CANNOT_PARSE_PROTOBUF_SCHEMA) \
-    M(435, NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD) \
+    M(435, NO_COLUMN_SERIALIZED_TO_REQUIRED_PROTOBUF_FIELD) \
     M(436, PROTOBUF_BAD_CAST) \
     M(437, PROTOBUF_FIELD_NOT_REPEATED) \
     M(438, DATA_TYPE_CANNOT_BE_PROMOTED) \
@@ -412,7 +412,7 @@
     M(440, INVALID_LIMIT_EXPRESSION) \
     M(441, CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING) \
     M(442, BAD_DATABASE_FOR_TEMPORARY_TABLE) \
-    M(443, NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA) \
+    M(443, NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS) \
     M(444, UNKNOWN_PROTOBUF_FORMAT) \
     M(445, CANNOT_MPROTECT) \
     M(446, FUNCTION_NOT_ALLOWED) \
@@ -535,6 +535,8 @@
     M(566, CANNOT_RMDIR) \
     M(567, DUPLICATED_PART_UUIDS) \
     M(568, RAFT_ERROR) \
+    M(569, MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD) \
+    M(570, DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp
index 9104c12120f..e92994ae979 100644
--- a/src/DataTypes/DataTypeAggregateFunction.cpp
+++ b/src/DataTypes/DataTypeAggregateFunction.cpp
@@ -10,8 +10,6 @@
 #include <Common/AlignedBuffer.h>
 
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <IO/WriteBufferFromString.h>
@@ -261,45 +259,6 @@ void DataTypeAggregateFunction::deserializeTextCSV(IColumn & column, ReadBuffer
 }
 
 
-void DataTypeAggregateFunction::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(
-        protobuf.writeAggregateFunction(function, assert_cast<const ColumnAggregateFunction &>(column).getData()[row_num]));
-}
-
-void DataTypeAggregateFunction::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    ColumnAggregateFunction & column_concrete = assert_cast<ColumnAggregateFunction &>(column);
-    Arena & arena = column_concrete.createOrGetArena();
-    size_t size_of_state = function->sizeOfData();
-    AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData());
-    function->create(place);
-    try
-    {
-        if (!protobuf.readAggregateFunction(function, place, arena))
-        {
-            function->destroy(place);
-            return;
-        }
-        auto & container = column_concrete.getData();
-        if (allow_add_row)
-        {
-            container.emplace_back(place);
-            row_added = true;
-        }
-        else
-            container.back() = place;
-    }
-    catch (...)
-    {
-        function->destroy(place);
-        throw;
-    }
-}
-
 MutableColumnPtr DataTypeAggregateFunction::createColumn() const
 {
     return ColumnAggregateFunction::create(function);
diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h
index 9ae7c67a803..d07d46fd3ee 100644
--- a/src/DataTypes/DataTypeAggregateFunction.h
+++ b/src/DataTypes/DataTypeAggregateFunction.h
@@ -59,8 +59,6 @@ public:
     void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
 
     MutableColumnPtr createColumn() const override;
 
diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp
index 3ad84a8fcd7..27088ab822c 100644
--- a/src/DataTypes/DataTypeArray.cpp
+++ b/src/DataTypes/DataTypeArray.cpp
@@ -6,7 +6,6 @@
 #include <IO/WriteBufferFromString.h>
 
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeFactory.h>
@@ -522,55 +521,6 @@ void DataTypeArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr, cons
 }
 
 
-void DataTypeArray::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
-    const ColumnArray::Offsets & offsets = column_array.getOffsets();
-    size_t offset = offsets[row_num - 1] + value_index;
-    size_t next_offset = offsets[row_num];
-    const IColumn & nested_column = column_array.getData();
-    size_t i;
-    for (i = offset; i < next_offset; ++i)
-    {
-        size_t element_stored = 0;
-        nested->serializeProtobuf(nested_column, i, protobuf, element_stored);
-        if (!element_stored)
-            break;
-    }
-    value_index += i - offset;
-}
-
-
-void DataTypeArray::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    ColumnArray & column_array = assert_cast<ColumnArray &>(column);
-    IColumn & nested_column = column_array.getData();
-    ColumnArray::Offsets & offsets = column_array.getOffsets();
-    size_t old_size = offsets.size();
-    try
-    {
-        bool nested_row_added;
-        do
-            nested->deserializeProtobuf(nested_column, protobuf, true, nested_row_added);
-        while (nested_row_added && protobuf.canReadMoreValues());
-        if (allow_add_row)
-        {
-            offsets.emplace_back(nested_column.size());
-            row_added = true;
-        }
-        else
-            offsets.back() = nested_column.size();
-    }
-    catch (...)
-    {
-        offsets.resize_assume_reserved(old_size);
-        nested_column.popBack(nested_column.size() - offsets.back());
-        throw;
-    }
-}
-
-
 MutableColumnPtr DataTypeArray::createColumn() const
 {
     return ColumnArray::create(nested->createColumn(), ColumnArray::ColumnOffsets::create());
diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h
index ba19ad021be..4185163e2e7 100644
--- a/src/DataTypes/DataTypeArray.h
+++ b/src/DataTypes/DataTypeArray.h
@@ -85,15 +85,6 @@ public:
             DeserializeBinaryBulkStatePtr & state,
             SubstreamsCache * cache) const override;
 
-    void serializeProtobuf(const IColumn & column,
-                           size_t row_num,
-                           ProtobufWriter & protobuf,
-                           size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column,
-                             ProtobufReader & protobuf,
-                             bool allow_add_row,
-                             bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     Field getDefault() const override;
diff --git a/src/DataTypes/DataTypeDate.cpp b/src/DataTypes/DataTypeDate.cpp
index 2c1dfcbb0fe..192a89cc454 100644
--- a/src/DataTypes/DataTypeDate.cpp
+++ b/src/DataTypes/DataTypeDate.cpp
@@ -4,8 +4,6 @@
 #include <Columns/ColumnsNumber.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeFactory.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 
 #include <Common/assert_cast.h>
 
@@ -81,30 +79,6 @@ void DataTypeDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const
     assert_cast<ColumnUInt16 &>(column).getData().push_back(value.getDayNum());
 }
 
-void DataTypeDate::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeDate(DayNum(assert_cast<const ColumnUInt16 &>(column).getData()[row_num])));
-}
-
-void DataTypeDate::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    DayNum d;
-    if (!protobuf.readDate(d))
-        return;
-
-    auto & container = assert_cast<ColumnUInt16 &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(d);
-        row_added = true;
-    }
-    else
-        container.back() = d;
-}
-
 bool DataTypeDate::equals(const IDataType & rhs) const
 {
     return typeid(rhs) == typeid(*this);
diff --git a/src/DataTypes/DataTypeDate.h b/src/DataTypes/DataTypeDate.h
index 00afba424e4..496d7fe0b22 100644
--- a/src/DataTypes/DataTypeDate.h
+++ b/src/DataTypes/DataTypeDate.h
@@ -24,8 +24,6 @@ public:
     void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
 
     bool canBeUsedAsVersion() const override { return true; }
     bool canBeInsideNullable() const override { return true; }
diff --git a/src/DataTypes/DataTypeDateTime.cpp b/src/DataTypes/DataTypeDateTime.cpp
index bfb4473e429..d2bbb4a1efa 100644
--- a/src/DataTypes/DataTypeDateTime.cpp
+++ b/src/DataTypes/DataTypeDateTime.cpp
@@ -5,8 +5,6 @@
 #include <common/DateLUT.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/Operators.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromString.h>
@@ -164,32 +162,6 @@ void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c
     assert_cast<ColumnType &>(column).getData().push_back(x);
 }
 
-void DataTypeDateTime::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-
-    // On some platforms `time_t` is `long` but not `unsigned int` (UInt32 that we store in column), hence static_cast.
-    value_index = static_cast<bool>(protobuf.writeDateTime(static_cast<time_t>(assert_cast<const ColumnType &>(column).getData()[row_num])));
-}
-
-void DataTypeDateTime::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    time_t t;
-    if (!protobuf.readDateTime(t))
-        return;
-
-    auto & container = assert_cast<ColumnType &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(t);
-        row_added = true;
-    }
-    else
-        container.back() = t;
-}
-
 bool DataTypeDateTime::equals(const IDataType & rhs) const
 {
     /// DateTime with different timezones are equal, because:
diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h
index 47c7f361091..edec889309b 100644
--- a/src/DataTypes/DataTypeDateTime.h
+++ b/src/DataTypes/DataTypeDateTime.h
@@ -68,8 +68,6 @@ public:
     void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
 
     bool canBeUsedAsVersion() const override { return true; }
     bool canBeInsideNullable() const override { return true; }
diff --git a/src/DataTypes/DataTypeDateTime64.cpp b/src/DataTypes/DataTypeDateTime64.cpp
index ef1a971510a..09e39c2de1a 100644
--- a/src/DataTypes/DataTypeDateTime64.cpp
+++ b/src/DataTypes/DataTypeDateTime64.cpp
@@ -6,8 +6,6 @@
 #include <common/DateLUT.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/Operators.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromString.h>
@@ -182,30 +180,6 @@ void DataTypeDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
     assert_cast<ColumnType &>(column).getData().push_back(x);
 }
 
-void DataTypeDateTime64::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeDateTime64(assert_cast<const ColumnType &>(column).getData()[row_num], scale));
-}
-
-void DataTypeDateTime64::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    DateTime64 t = 0;
-    if (!protobuf.readDateTime64(t, scale))
-        return;
-
-    auto & container = assert_cast<ColumnType &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(t);
-        row_added = true;
-    }
-    else
-        container.back() = t;
-}
-
 bool DataTypeDateTime64::equals(const IDataType & rhs) const
 {
     if (const auto * ptype = typeid_cast<const DataTypeDateTime64 *>(&rhs))
diff --git a/src/DataTypes/DataTypeDateTime64.h b/src/DataTypes/DataTypeDateTime64.h
index 003e83b7195..198c3739f58 100644
--- a/src/DataTypes/DataTypeDateTime64.h
+++ b/src/DataTypes/DataTypeDateTime64.h
@@ -42,8 +42,6 @@ public:
     void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
 
     bool equals(const IDataType & rhs) const override;
 
diff --git a/src/DataTypes/DataTypeDecimalBase.cpp b/src/DataTypes/DataTypeDecimalBase.cpp
index 9fb445ab00d..ab17996167c 100644
--- a/src/DataTypes/DataTypeDecimalBase.cpp
+++ b/src/DataTypes/DataTypeDecimalBase.cpp
@@ -4,8 +4,6 @@
 #include <Common/typeid_cast.h>
 #include <Core/DecimalFunctions.h>
 #include <DataTypes/DataTypeFactory.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp
index 650a1da6407..043c971266c 100644
--- a/src/DataTypes/DataTypeEnum.cpp
+++ b/src/DataTypes/DataTypeEnum.cpp
@@ -1,7 +1,5 @@
 #include <IO/WriteBufferFromString.h>
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Parsers/IAST.h>
@@ -254,34 +252,6 @@ void DataTypeEnum<Type>::deserializeBinaryBulk(
     x.resize(initial_size + size / sizeof(FieldType));
 }
 
-template <typename Type>
-void DataTypeEnum<Type>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    protobuf.prepareEnumMapping(values);
-    value_index = static_cast<bool>(protobuf.writeEnum(assert_cast<const ColumnType &>(column).getData()[row_num]));
-}
-
-template<typename Type>
-void DataTypeEnum<Type>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    protobuf.prepareEnumMapping(values);
-    row_added = false;
-    Type value;
-    if (!protobuf.readEnum(value))
-        return;
-
-    auto & container = assert_cast<ColumnType &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(value);
-        row_added = true;
-    }
-    else
-        container.back() = value;
-}
-
 template <typename Type>
 Field DataTypeEnum<Type>::getDefault() const
 {
diff --git a/src/DataTypes/DataTypeEnum.h b/src/DataTypes/DataTypeEnum.h
index c75d348f15c..003613edb98 100644
--- a/src/DataTypes/DataTypeEnum.h
+++ b/src/DataTypes/DataTypeEnum.h
@@ -132,9 +132,6 @@ public:
     void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, const size_t offset, size_t limit) const override;
     void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, const size_t limit, const double avg_value_size_hint) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override { return ColumnType::create(); }
 
     Field getDefault() const override;
diff --git a/src/DataTypes/DataTypeFixedString.cpp b/src/DataTypes/DataTypeFixedString.cpp
index 585c5709be7..21cfe855169 100644
--- a/src/DataTypes/DataTypeFixedString.cpp
+++ b/src/DataTypes/DataTypeFixedString.cpp
@@ -2,8 +2,6 @@
 #include <Columns/ColumnConst.h>
 
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeFixedString.h>
 #include <DataTypes/DataTypeFactory.h>
 
@@ -25,7 +23,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int CANNOT_READ_ALL_DATA;
-    extern const int TOO_LARGE_STRING_SIZE;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int UNEXPECTED_AST_STRUCTURE;
 }
@@ -127,16 +124,7 @@ static inline void alignStringLength(const DataTypeFixedString & type,
                                      ColumnFixedString::Chars & data,
                                      size_t string_start)
 {
-    size_t length = data.size() - string_start;
-    if (length < type.getN())
-    {
-        data.resize_fill(string_start + type.getN());
-    }
-    else if (length > type.getN())
-    {
-        data.resize_assume_reserved(string_start);
-        throw Exception("Too large value for " + type.getName(), ErrorCodes::TOO_LARGE_STRING_SIZE);
-    }
+    ColumnFixedString::alignStringLength(data, type.getN(), string_start);
 }
 
 template <typename Reader>
@@ -215,53 +203,6 @@ void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr
 }
 
 
-void DataTypeFixedString::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
-    value_index = static_cast<bool>(protobuf.writeString(StringRef(pos, n)));
-}
-
-
-void DataTypeFixedString::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    auto & column_string = assert_cast<ColumnFixedString &>(column);
-    ColumnFixedString::Chars & data = column_string.getChars();
-    size_t old_size = data.size();
-    try
-    {
-        if (allow_add_row)
-        {
-            if (protobuf.readStringInto(data))
-            {
-                alignStringLength(*this, data, old_size);
-                row_added = true;
-            }
-            else
-                data.resize_assume_reserved(old_size);
-        }
-        else
-        {
-            ColumnFixedString::Chars temp_data;
-            if (protobuf.readStringInto(temp_data))
-            {
-                alignStringLength(*this, temp_data, 0);
-                column_string.popBack(1);
-                old_size = data.size();
-                data.insertSmallAllowReadWriteOverflow15(temp_data.begin(), temp_data.end());
-            }
-        }
-    }
-    catch (...)
-    {
-        data.resize_assume_reserved(old_size);
-        throw;
-    }
-}
-
-
 MutableColumnPtr DataTypeFixedString::createColumn() const
 {
     return ColumnFixedString::create(n);
diff --git a/src/DataTypes/DataTypeFixedString.h b/src/DataTypes/DataTypeFixedString.h
index e410d1b0596..af82e4b5d11 100644
--- a/src/DataTypes/DataTypeFixedString.h
+++ b/src/DataTypes/DataTypeFixedString.h
@@ -66,9 +66,6 @@ public:
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     Field getDefault() const override;
diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp
index 9614c150c7d..1b21b7de4bc 100644
--- a/src/DataTypes/DataTypeLowCardinality.cpp
+++ b/src/DataTypes/DataTypeLowCardinality.cpp
@@ -808,31 +808,6 @@ void DataTypeLowCardinality::serializeTextXML(const IColumn & column, size_t row
     serializeImpl(column, row_num, &IDataType::serializeAsTextXML, ostr, settings);
 }
 
-void DataTypeLowCardinality::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    serializeImpl(column, row_num, &IDataType::serializeProtobuf, protobuf, value_index);
-}
-
-void DataTypeLowCardinality::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    if (allow_add_row)
-    {
-        deserializeImpl(column, &IDataType::deserializeProtobuf, protobuf, true, row_added);
-        return;
-    }
-
-    row_added = false;
-    auto & low_cardinality_column= getColumnLowCardinality(column);
-    auto  nested_column = low_cardinality_column.getDictionary().getNestedColumn();
-    auto temp_column = nested_column->cloneEmpty();
-    size_t unique_row_number = low_cardinality_column.getIndexes().getUInt(low_cardinality_column.size() - 1);
-    temp_column->insertFrom(*nested_column, unique_row_number);
-    bool dummy;
-    dictionary_type.get()->deserializeProtobuf(*temp_column, protobuf, false, dummy);
-    low_cardinality_column.popBack(1);
-    low_cardinality_column.insertFromFullColumn(*temp_column, 0);
-}
-
 template <typename... Params, typename... Args>
 void DataTypeLowCardinality::serializeImpl(
     const IColumn & column, size_t row_num, DataTypeLowCardinality::SerializeFunctionPtr<Params...> func, Args &&... args) const
diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h
index 6ed2b792ce3..14beb423f1f 100644
--- a/src/DataTypes/DataTypeLowCardinality.h
+++ b/src/DataTypes/DataTypeLowCardinality.h
@@ -65,8 +65,6 @@ public:
     void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
     void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
     void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
 
     MutableColumnPtr createColumn() const override;
 
diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp
index af2ed8805e8..9972452862f 100644
--- a/src/DataTypes/DataTypeMap.cpp
+++ b/src/DataTypes/DataTypeMap.cpp
@@ -336,16 +336,6 @@ void DataTypeMap::deserializeBinaryBulkWithMultipleStreamsImpl(
     nested->deserializeBinaryBulkWithMultipleStreams(column_map.getNestedColumnPtr(), limit, settings, state, cache);
 }
 
-void DataTypeMap::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    nested->serializeProtobuf(extractNestedColumn(column), row_num, protobuf, value_index);
-}
-
-void DataTypeMap::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    nested->deserializeProtobuf(extractNestedColumn(column), protobuf, allow_add_row, row_added);
-}
-
 MutableColumnPtr DataTypeMap::createColumn() const
 {
     return ColumnMap::create(nested->createColumn());
diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h
index ea495f05548..88ea44a0d5a 100644
--- a/src/DataTypes/DataTypeMap.h
+++ b/src/DataTypes/DataTypeMap.h
@@ -76,9 +76,6 @@ public:
            DeserializeBinaryBulkStatePtr & state,
            SubstreamsCache * cache) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     Field getDefault() const override;
@@ -92,6 +89,8 @@ public:
     const DataTypePtr & getValueType() const { return value_type; }
     DataTypes getKeyValueTypes() const { return {key_type, value_type}; }
 
+    const DataTypePtr & getNestedType() const { return nested; }
+
 private:
     template <typename Writer>
     void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && writer) const;
diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp
index c3b734686f8..903ebeb3ddc 100644
--- a/src/DataTypes/DataTypeNullable.cpp
+++ b/src/DataTypes/DataTypeNullable.cpp
@@ -486,33 +486,6 @@ void DataTypeNullable::serializeTextXML(const IColumn & column, size_t row_num,
         nested_data_type->serializeAsTextXML(col.getNestedColumn(), row_num, ostr, settings);
 }
 
-void DataTypeNullable::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
-    if (!col.isNullAt(row_num))
-        nested_data_type->serializeProtobuf(col.getNestedColumn(), row_num, protobuf, value_index);
-}
-
-void DataTypeNullable::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    ColumnNullable & col = assert_cast<ColumnNullable &>(column);
-    IColumn & nested_column = col.getNestedColumn();
-    size_t old_size = nested_column.size();
-    try
-    {
-        nested_data_type->deserializeProtobuf(nested_column, protobuf, allow_add_row, row_added);
-        if (row_added)
-            col.getNullMapData().push_back(0);
-    }
-    catch (...)
-    {
-        nested_column.popBack(nested_column.size() - old_size);
-        col.getNullMapData().resize_assume_reserved(old_size);
-        row_added = false;
-        throw;
-    }
-}
-
 MutableColumnPtr DataTypeNullable::createColumn() const
 {
     return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create());
diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h
index db641faf0af..5e71a1bee4d 100644
--- a/src/DataTypes/DataTypeNullable.h
+++ b/src/DataTypes/DataTypeNullable.h
@@ -73,9 +73,6 @@ public:
     void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     Field getDefault() const override;
diff --git a/src/DataTypes/DataTypeNumberBase.cpp b/src/DataTypes/DataTypeNumberBase.cpp
index a9b9bbc8090..ae3e6762d27 100644
--- a/src/DataTypes/DataTypeNumberBase.cpp
+++ b/src/DataTypes/DataTypeNumberBase.cpp
@@ -8,8 +8,6 @@
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 
 
 namespace DB
@@ -205,34 +203,6 @@ void DataTypeNumberBase<T>::deserializeBinaryBulk(IColumn & column, ReadBuffer &
 }
 
 
-template <typename T>
-void DataTypeNumberBase<T>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeNumber(assert_cast<const ColumnVector<T> &>(column).getData()[row_num]));
-}
-
-
-template <typename T>
-void DataTypeNumberBase<T>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    T value;
-    if (!protobuf.readNumber(value))
-        return;
-
-    auto & container = typeid_cast<ColumnVector<T> &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(value);
-        row_added = true;
-    }
-    else
-        container.back() = value;
-}
-
-
 template <typename T>
 MutableColumnPtr DataTypeNumberBase<T>::createColumn() const
 {
diff --git a/src/DataTypes/DataTypeNumberBase.h b/src/DataTypes/DataTypeNumberBase.h
index 1491eabfbd5..22a70ac7277 100644
--- a/src/DataTypes/DataTypeNumberBase.h
+++ b/src/DataTypes/DataTypeNumberBase.h
@@ -45,9 +45,6 @@ public:
     void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
     void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     bool isParametric() const override { return false; }
diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp
index c752d136642..d760df5075d 100644
--- a/src/DataTypes/DataTypeString.cpp
+++ b/src/DataTypes/DataTypeString.cpp
@@ -9,8 +9,6 @@
 #include <Core/Field.h>
 
 #include <Formats/FormatSettings.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeFactory.h>
 
@@ -311,55 +309,6 @@ void DataTypeString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, con
 }
 
 
-void DataTypeString::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeString(assert_cast<const ColumnString &>(column).getDataAt(row_num)));
-}
-
-
-void DataTypeString::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    auto & column_string = assert_cast<ColumnString &>(column);
-    ColumnString::Chars & data = column_string.getChars();
-    ColumnString::Offsets & offsets = column_string.getOffsets();
-    size_t old_size = offsets.size();
-    try
-    {
-        if (allow_add_row)
-        {
-            if (protobuf.readStringInto(data))
-            {
-                data.emplace_back(0);
-                offsets.emplace_back(data.size());
-                row_added = true;
-            }
-            else
-                data.resize_assume_reserved(offsets.back());
-        }
-        else
-        {
-            ColumnString::Chars temp_data;
-            if (protobuf.readStringInto(temp_data))
-            {
-                temp_data.emplace_back(0);
-                column_string.popBack(1);
-                old_size = offsets.size();
-                data.insertSmallAllowReadWriteOverflow15(temp_data.begin(), temp_data.end());
-                offsets.emplace_back(data.size());
-            }
-        }
-    }
-    catch (...)
-    {
-        offsets.resize_assume_reserved(old_size);
-        data.resize_assume_reserved(offsets.back());
-        throw;
-    }
-}
-
 Field DataTypeString::getDefault() const
 {
     return String();
diff --git a/src/DataTypes/DataTypeString.h b/src/DataTypes/DataTypeString.h
index f6db8fe73d4..7f8aa1fd0cf 100644
--- a/src/DataTypes/DataTypeString.h
+++ b/src/DataTypes/DataTypeString.h
@@ -47,9 +47,6 @@ public:
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     Field getDefault() const override;
diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp
index c62aa1c1187..2261e776ea2 100644
--- a/src/DataTypes/DataTypeTuple.cpp
+++ b/src/DataTypes/DataTypeTuple.cpp
@@ -504,33 +504,6 @@ void DataTypeTuple::deserializeBinaryBulkWithMultipleStreamsImpl(
     settings.path.pop_back();
 }
 
-void DataTypeTuple::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    for (; value_index < elems.size(); ++value_index)
-    {
-        size_t stored = 0;
-        elems[value_index]->serializeProtobuf(extractElementColumn(column, value_index), row_num, protobuf, stored);
-        if (!stored)
-            break;
-    }
-}
-
-void DataTypeTuple::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    bool all_elements_get_row = true;
-    addElementSafe(elems, column, [&]
-    {
-        for (const auto & i : ext::range(0, ext::size(elems)))
-        {
-            bool element_row_added;
-            elems[i]->deserializeProtobuf(extractElementColumn(column, i), protobuf, allow_add_row, element_row_added);
-            all_elements_get_row &= element_row_added;
-        }
-    });
-    row_added = all_elements_get_row;
-}
-
 MutableColumnPtr DataTypeTuple::createColumn() const
 {
     size_t size = elems.size();
diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h
index 0b28ebe5a63..12ccf574c0e 100644
--- a/src/DataTypes/DataTypeTuple.h
+++ b/src/DataTypes/DataTypeTuple.h
@@ -81,9 +81,6 @@ public:
             DeserializeBinaryBulkStatePtr & state,
             SubstreamsCache * cache) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     MutableColumnPtr createColumn() const override;
 
     Field getDefault() const override;
diff --git a/src/DataTypes/DataTypeUUID.cpp b/src/DataTypes/DataTypeUUID.cpp
index 94a043eb472..b66cbadaef0 100644
--- a/src/DataTypes/DataTypeUUID.cpp
+++ b/src/DataTypes/DataTypeUUID.cpp
@@ -1,8 +1,6 @@
 #include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Columns/ColumnsNumber.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 #include <Common/assert_cast.h>
@@ -79,30 +77,6 @@ void DataTypeUUID::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const
     assert_cast<ColumnUInt128 &>(column).getData().push_back(value);
 }
 
-void DataTypeUUID::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeUUID(UUID(assert_cast<const ColumnUInt128 &>(column).getData()[row_num])));
-}
-
-void DataTypeUUID::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    UUID uuid;
-    if (!protobuf.readUUID(uuid))
-        return;
-
-    auto & container = assert_cast<ColumnUInt128 &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(uuid);
-        row_added = true;
-    }
-    else
-        container.back() = uuid;
-}
-
 bool DataTypeUUID::equals(const IDataType & rhs) const
 {
     return typeid(rhs) == typeid(*this);
diff --git a/src/DataTypes/DataTypeUUID.h b/src/DataTypes/DataTypeUUID.h
index 6290d05cc3b..de0c7c7d8cf 100644
--- a/src/DataTypes/DataTypeUUID.h
+++ b/src/DataTypes/DataTypeUUID.h
@@ -26,8 +26,6 @@ public:
     void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
 
     bool canBeUsedInBitOperations() const override { return true; }
     bool canBeInsideNullable() const override { return true; }
diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp
index 6c325c5d371..e174a242462 100644
--- a/src/DataTypes/DataTypesDecimal.cpp
+++ b/src/DataTypes/DataTypesDecimal.cpp
@@ -4,8 +4,6 @@
 #include <Common/typeid_cast.h>
 #include <Core/DecimalFunctions.h>
 #include <DataTypes/DataTypeFactory.h>
-#include <Formats/ProtobufReader.h>
-#include <Formats/ProtobufWriter.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <IO/readDecimalText.h>
@@ -111,33 +109,6 @@ T DataTypeDecimal<T>::parseFromString(const String & str) const
     return x;
 }
 
-template <typename T>
-void DataTypeDecimal<T>::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
-{
-    if (value_index)
-        return;
-    value_index = static_cast<bool>(protobuf.writeDecimal(assert_cast<const ColumnType &>(column).getData()[row_num], this->scale));
-}
-
-
-template <typename T>
-void DataTypeDecimal<T>::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
-{
-    row_added = false;
-    T decimal;
-    if (!protobuf.readDecimal(decimal, this->precision, this->scale))
-        return;
-
-    auto & container = assert_cast<ColumnType &>(column).getData();
-    if (allow_add_row)
-    {
-        container.emplace_back(decimal);
-        row_added = true;
-    }
-    else
-        container.back() = decimal;
-}
-
 
 static DataTypePtr create(const ASTPtr & arguments)
 {
diff --git a/src/DataTypes/DataTypesDecimal.h b/src/DataTypes/DataTypesDecimal.h
index 3f7b4e2ac63..08f44c60c41 100644
--- a/src/DataTypes/DataTypesDecimal.h
+++ b/src/DataTypes/DataTypesDecimal.h
@@ -46,9 +46,6 @@ public:
     void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
 
-    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
-    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
-
     bool equals(const IDataType & rhs) const override;
 
     T parseFromString(const String & str) const;
diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index dba5bc3f5a9..c9c848a8037 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -26,9 +26,6 @@ class Field;
 using DataTypePtr = std::shared_ptr<const IDataType>;
 using DataTypes = std::vector<DataTypePtr>;
 
-class ProtobufReader;
-class ProtobufWriter;
-
 struct NameAndTypePair;
 
 
@@ -235,10 +232,6 @@ public:
     /// If method will throw an exception, then column will be in same state as before call to method.
     virtual void deserializeBinary(IColumn & column, ReadBuffer & istr) const = 0;
 
-    /** Serialize to a protobuf. */
-    virtual void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const = 0;
-    virtual void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const = 0;
-
     /** Text serialization with escaping but without quoting.
       */
     void serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
diff --git a/src/DataTypes/IDataTypeDummy.h b/src/DataTypes/IDataTypeDummy.h
index f27359e5f74..08cc0778a6e 100644
--- a/src/DataTypes/IDataTypeDummy.h
+++ b/src/DataTypes/IDataTypeDummy.h
@@ -34,8 +34,6 @@ public:
     void deserializeBinaryBulk(IColumn &, ReadBuffer &, size_t, double) const override      { throwNoSerialization(); }
     void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
     void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &) const override    { throwNoSerialization(); }
-    void serializeProtobuf(const IColumn &, size_t, ProtobufWriter &, size_t &) const override { throwNoSerialization(); }
-    void deserializeProtobuf(IColumn &, ProtobufReader &, bool, bool &) const override      { throwNoSerialization(); }
 
     MutableColumnPtr createColumn() const override
     {
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 3f031fa2311..c1f02c65748 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -120,7 +120,6 @@ struct FormatSettings
 
     struct
     {
-        bool write_row_delimiters = true;
         /**
          * Some buffers (kafka / rabbit) split the rows internally using callback,
          * and always send one row per message, so we can push there formats
@@ -128,7 +127,7 @@ struct FormatSettings
          * we have to enforce exporting at most one row in the format output,
          * because Protobuf without delimiters is not generally useful.
          */
-        bool allow_many_rows_no_delimiters = false;
+        bool allow_multiple_rows_without_delimiter = false;
     } protobuf;
 
     struct
diff --git a/src/Formats/ProtobufColumnMatcher.cpp b/src/Formats/ProtobufColumnMatcher.cpp
deleted file mode 100644
index f4803d1af10..00000000000
--- a/src/Formats/ProtobufColumnMatcher.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-#include "ProtobufColumnMatcher.h"
-#if USE_PROTOBUF
-#include <Common/Exception.h>
-#include <google/protobuf/descriptor.pb.h>
-#include <Poco/String.h>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA;
-}
-
-
-namespace
-{
-    String columnNameToSearchableForm(const String & str)
-    {
-        return Poco::replace(Poco::toUpper(str), ".", "_");
-    }
-}
-
-namespace ProtobufColumnMatcher
-{
-    namespace details
-    {
-        ColumnNameMatcher::ColumnNameMatcher(const std::vector<String> & column_names) : column_usage(column_names.size())
-        {
-            column_usage.resize(column_names.size(), false);
-            for (size_t i = 0; i != column_names.size(); ++i)
-                column_name_to_index_map.emplace(columnNameToSearchableForm(column_names[i]), i);
-        }
-
-        size_t ColumnNameMatcher::findColumn(const String & field_name)
-        {
-            auto it = column_name_to_index_map.find(columnNameToSearchableForm(field_name));
-            if (it == column_name_to_index_map.end())
-                return -1;
-            size_t column_index = it->second;
-            if (column_usage[column_index])
-                return -1;
-            column_usage[column_index] = true;
-            return column_index;
-        }
-
-        void throwNoCommonColumns()
-        {
-            throw Exception("No common columns with provided protobuf schema", ErrorCodes::NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA);
-        }
-    }
-}
-
-}
-#endif
diff --git a/src/Formats/ProtobufColumnMatcher.h b/src/Formats/ProtobufColumnMatcher.h
deleted file mode 100644
index 35521be7a9b..00000000000
--- a/src/Formats/ProtobufColumnMatcher.h
+++ /dev/null
@@ -1,196 +0,0 @@
-#pragma once
-
-#if !defined(ARCADIA_BUILD)
-#    include "config_formats.h"
-#endif
-
-#if USE_PROTOBUF
-#    include <memory>
-#    include <unordered_map>
-#    include <vector>
-#    include <common/types.h>
-#    include <boost/blank.hpp>
-#    include <google/protobuf/descriptor.h>
-#    include <google/protobuf/descriptor.pb.h>
-
-namespace google
-{
-namespace protobuf
-{
-    class Descriptor;
-    class FieldDescriptor;
-}
-}
-
-
-namespace DB
-{
-namespace ProtobufColumnMatcher
-{
-    struct DefaultTraits
-    {
-        using MessageData = boost::blank;
-        using FieldData = boost::blank;
-    };
-
-    template <typename Traits = DefaultTraits>
-    struct Message;
-
-    /// Represents a field in a protobuf message.
-    template <typename Traits = DefaultTraits>
-    struct Field
-    {
-        const google::protobuf::FieldDescriptor * field_descriptor = nullptr;
-
-        /// Same as field_descriptor->number().
-        UInt32 field_number = 0;
-
-        /// Index of a column; either 'column_index' or 'nested_message' is set.
-        size_t column_index = -1;
-        std::unique_ptr<Message<Traits>> nested_message;
-
-        typename Traits::FieldData data;
-    };
-
-    /// Represents a protobuf message.
-    template <typename Traits>
-    struct Message
-    {
-        std::vector<Field<Traits>> fields;
-
-        /// Points to the parent message if this is a nested message.
-        Message * parent = nullptr;
-        size_t index_in_parent = -1;
-
-        typename Traits::MessageData data;
-    };
-
-    /// Utility function finding matching columns for each protobuf field.
-    template <typename Traits = DefaultTraits>
-    static std::unique_ptr<Message<Traits>> matchColumns(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type);
-
-    template <typename Traits = DefaultTraits>
-    static std::unique_ptr<Message<Traits>> matchColumns(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type,
-        std::vector<const google::protobuf::FieldDescriptor *> & field_descriptors_without_match);
-
-    namespace details
-    {
-        [[noreturn]] void throwNoCommonColumns();
-
-        class ColumnNameMatcher
-        {
-        public:
-            ColumnNameMatcher(const std::vector<String> & column_names);
-            size_t findColumn(const String & field_name);
-
-        private:
-            std::unordered_map<String, size_t> column_name_to_index_map;
-            std::vector<bool> column_usage;
-        };
-
-        template <typename Traits>
-        std::unique_ptr<Message<Traits>> matchColumnsRecursive(
-            ColumnNameMatcher & name_matcher,
-            const google::protobuf::Descriptor * message_type,
-            const String & field_name_prefix,
-            std::vector<const google::protobuf::FieldDescriptor *> * field_descriptors_without_match)
-        {
-            auto message = std::make_unique<Message<Traits>>();
-            for (int i = 0; i != message_type->field_count(); ++i)
-            {
-                const google::protobuf::FieldDescriptor * field_descriptor = message_type->field(i);
-                if ((field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_MESSAGE)
-                    || (field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_GROUP))
-                {
-                    auto nested_message = matchColumnsRecursive<Traits>(
-                        name_matcher,
-                        field_descriptor->message_type(),
-                        field_name_prefix + field_descriptor->name() + ".",
-                        field_descriptors_without_match);
-                    if (nested_message)
-                    {
-                        message->fields.emplace_back();
-                        auto & current_field = message->fields.back();
-                        current_field.field_number = field_descriptor->number();
-                        current_field.field_descriptor = field_descriptor;
-                        current_field.nested_message = std::move(nested_message);
-                        current_field.nested_message->parent = message.get();
-                    }
-                }
-                else
-                {
-                    size_t column_index = name_matcher.findColumn(field_name_prefix + field_descriptor->name());
-                    if (column_index == static_cast<size_t>(-1))
-                    {
-                        if (field_descriptors_without_match)
-                            field_descriptors_without_match->emplace_back(field_descriptor);
-                    }
-                    else
-                    {
-                        message->fields.emplace_back();
-                        auto & current_field = message->fields.back();
-                        current_field.field_number = field_descriptor->number();
-                        current_field.field_descriptor = field_descriptor;
-                        current_field.column_index = column_index;
-                    }
-                }
-            }
-
-            if (message->fields.empty())
-                return nullptr;
-
-            // Columns should be sorted by field_number, it's necessary for writing protobufs and useful reading protobufs.
-            std::sort(message->fields.begin(), message->fields.end(), [](const Field<Traits> & left, const Field<Traits> & right)
-            {
-                return left.field_number < right.field_number;
-            });
-
-            for (size_t i = 0; i != message->fields.size(); ++i)
-            {
-                auto & field = message->fields[i];
-                if (field.nested_message)
-                    field.nested_message->index_in_parent = i;
-            }
-
-            return message;
-        }
-    }
-
-    template <typename Data>
-    static std::unique_ptr<Message<Data>> matchColumnsImpl(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type,
-        std::vector<const google::protobuf::FieldDescriptor *> * field_descriptors_without_match)
-    {
-        details::ColumnNameMatcher name_matcher(column_names);
-        auto message = details::matchColumnsRecursive<Data>(name_matcher, message_type, "", field_descriptors_without_match);
-        if (!message)
-            details::throwNoCommonColumns();
-        return message;
-    }
-
-    template <typename Data>
-    static std::unique_ptr<Message<Data>> matchColumns(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type)
-    {
-        return matchColumnsImpl<Data>(column_names, message_type, nullptr);
-    }
-
-    template <typename Data>
-    static std::unique_ptr<Message<Data>> matchColumns(
-        const std::vector<String> & column_names,
-        const google::protobuf::Descriptor * message_type,
-        std::vector<const google::protobuf::FieldDescriptor *> & field_descriptors_without_match)
-    {
-        return matchColumnsImpl<Data>(column_names, message_type, &field_descriptors_without_match);
-    }
-}
-
-}
-
-#endif
diff --git a/src/Formats/ProtobufReader.cpp b/src/Formats/ProtobufReader.cpp
index 8f28d279c06..0e05b59badf 100644
--- a/src/Formats/ProtobufReader.cpp
+++ b/src/Formats/ProtobufReader.cpp
@@ -1,14 +1,7 @@
 #include "ProtobufReader.h"
 
 #if USE_PROTOBUF
-#    include <optional>
-#    include <AggregateFunctions/IAggregateFunction.h>
-#    include <DataTypes/DataTypesDecimal.h>
-#    include <IO/ReadBufferFromString.h>
-#    include <IO/ReadHelpers.h>
-#    include <IO/WriteBufferFromVector.h>
-#    include <IO/WriteHelpers.h>
-#    include <boost/numeric/conversion/cast.hpp>
+#   include <IO/ReadHelpers.h>
 
 
 namespace DB
@@ -16,7 +9,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int UNKNOWN_PROTOBUF_FORMAT;
-    extern const int PROTOBUF_BAD_CAST;
 }
 
 
@@ -41,36 +33,21 @@ namespace
     constexpr Int64 END_OF_FILE = -3;
 
     Int64 decodeZigZag(UInt64 n) { return static_cast<Int64>((n >> 1) ^ (~(n & 1) + 1)); }
-
 }
 
 
-// SimpleReader is an utility class to deserialize protobufs.
-// Knows nothing about protobuf schemas, just provides useful functions to deserialize data.
-ProtobufReader::SimpleReader::SimpleReader(ReadBuffer & in_, const bool use_length_delimiters_)
+ProtobufReader::ProtobufReader(ReadBuffer & in_)
     : in(in_)
-    , cursor(0)
-    , current_message_level(0)
-    , current_message_end(0)
-    , field_end(0)
-    , last_string_pos(-1)
-    , use_length_delimiters(use_length_delimiters_)
 {
 }
 
-[[noreturn]] void ProtobufReader::SimpleReader::throwUnknownFormat() const
-{
-    throw Exception(std::string("Protobuf messages are corrupted or don't match the provided schema.") + (use_length_delimiters ? " Please note that Protobuf stream is length-delimited: every message is prefixed by its length in varint." : ""), ErrorCodes::UNKNOWN_PROTOBUF_FORMAT);
-}
-
-bool ProtobufReader::SimpleReader::startMessage()
+void ProtobufReader::startMessage(bool with_length_delimiter_)
 {
     // Start reading a root message.
     assert(!current_message_level);
-    if (unlikely(in.eof()))
-        return false;
 
-    if (use_length_delimiters)
+    root_message_has_length_delimiter = with_length_delimiter_;
+    if (root_message_has_length_delimiter)
     {
         size_t size_of_message = readVarint();
         current_message_end = cursor + size_of_message;
@@ -80,11 +57,11 @@ bool ProtobufReader::SimpleReader::startMessage()
         current_message_end = END_OF_FILE;
     }
     ++current_message_level;
+    field_number = next_field_number = 0;
     field_end = cursor;
-    return true;
 }
 
-void ProtobufReader::SimpleReader::endMessage(bool ignore_errors)
+void ProtobufReader::endMessage(bool ignore_errors)
 {
     if (!current_message_level)
         return;
@@ -94,6 +71,8 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors)
     {
         if (cursor < root_message_end)
             ignore(root_message_end - cursor);
+        else if (root_message_end == END_OF_FILE)
+            ignoreAll();
         else if (ignore_errors)
             moveCursorBackward(cursor - root_message_end);
         else
@@ -104,7 +83,7 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors)
     parent_message_ends.clear();
 }
 
-void ProtobufReader::SimpleReader::startNestedMessage()
+void ProtobufReader::startNestedMessage()
 {
     assert(current_message_level >= 1);
     if ((cursor > field_end) && (field_end != END_OF_GROUP))
@@ -115,10 +94,11 @@ void ProtobufReader::SimpleReader::startNestedMessage()
     parent_message_ends.emplace_back(current_message_end);
     current_message_end = field_end;
     ++current_message_level;
+    field_number = next_field_number = 0;
     field_end = cursor;
 }
 
-void ProtobufReader::SimpleReader::endNestedMessage()
+void ProtobufReader::endNestedMessage()
 {
     assert(current_message_level >= 2);
     if (cursor != current_message_end)
@@ -137,12 +117,20 @@ void ProtobufReader::SimpleReader::endNestedMessage()
     --current_message_level;
     current_message_end = parent_message_ends.back();
     parent_message_ends.pop_back();
+    field_number = next_field_number = 0;
     field_end = cursor;
 }
 
-bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
+bool ProtobufReader::readFieldNumber(int & field_number_)
 {
     assert(current_message_level);
+    if (next_field_number)
+    {
+        field_number_ = field_number = next_field_number;
+        next_field_number = 0;
+        return true;
+    }
+
     if (field_end != cursor)
     {
         if (field_end == END_OF_VARINT)
@@ -183,7 +171,8 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
     if (unlikely(varint & (static_cast<UInt64>(0xFFFFFFFF) << 32)))
         throwUnknownFormat();
     UInt32 key = static_cast<UInt32>(varint);
-    field_number = (key >> 3);
+    field_number_ = field_number = (key >> 3);
+    next_field_number = 0;
     WireType wire_type = static_cast<WireType>(key & 0x07);
     switch (wire_type)
     {
@@ -224,77 +213,91 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
     throwUnknownFormat();
 }
 
-bool ProtobufReader::SimpleReader::readUInt(UInt64 & value)
+UInt64 ProtobufReader::readUInt()
 {
+    UInt64 value;
     if (field_end == END_OF_VARINT)
     {
         value = readVarint();
         field_end = cursor;
-        return true;
     }
-
-    if (unlikely(cursor >= field_end))
-        return false;
-
-    value = readVarint();
-    return true;
+    else
+    {
+        value = readVarint();
+        if (cursor < field_end)
+            next_field_number = field_number;
+        else if (unlikely(cursor) > field_end)
+            throwUnknownFormat();
+    }
+    return value;
 }
 
-bool ProtobufReader::SimpleReader::readInt(Int64 & value)
+Int64 ProtobufReader::readInt()
 {
-    UInt64 varint;
-    if (!readUInt(varint))
-        return false;
-    value = static_cast<Int64>(varint);
-    return true;
+    return static_cast<Int64>(readUInt());
 }
 
-bool ProtobufReader::SimpleReader::readSInt(Int64 & value)
+Int64 ProtobufReader::readSInt()
 {
-    UInt64 varint;
-    if (!readUInt(varint))
-        return false;
-    value = decodeZigZag(varint);
-    return true;
+    return decodeZigZag(readUInt());
 }
 
 template<typename T>
-bool ProtobufReader::SimpleReader::readFixed(T & value)
+T ProtobufReader::readFixed()
 {
-    if (unlikely(cursor >= field_end))
-        return false;
-
+    if (unlikely(cursor + static_cast<Int64>(sizeof(T)) > field_end))
+        throwUnknownFormat();
+    T value;
     readBinary(&value, sizeof(T));
-    return true;
+    if (cursor < field_end)
+        next_field_number = field_number;
+    return value;
 }
 
-bool ProtobufReader::SimpleReader::readStringInto(PaddedPODArray<UInt8> & str)
+template Int32 ProtobufReader::readFixed<Int32>();
+template UInt32 ProtobufReader::readFixed<UInt32>();
+template Int64 ProtobufReader::readFixed<Int64>();
+template UInt64 ProtobufReader::readFixed<UInt64>();
+template Float32 ProtobufReader::readFixed<Float32>();
+template Float64 ProtobufReader::readFixed<Float64>();
+
+void ProtobufReader::readString(String & str)
+{
+    if (unlikely(cursor > field_end))
+        throwUnknownFormat();
+    size_t length = field_end - cursor;
+    str.resize(length);
+    readBinary(reinterpret_cast<char*>(str.data()), length);
+}
+
+void ProtobufReader::readStringAndAppend(PaddedPODArray<UInt8> & str)
 {
-    if (unlikely(cursor == last_string_pos))
-        return false; /// We don't want to read the same empty string again.
-    last_string_pos = cursor;
     if (unlikely(cursor > field_end))
         throwUnknownFormat();
     size_t length = field_end - cursor;
     size_t old_size = str.size();
     str.resize(old_size + length);
     readBinary(reinterpret_cast<char*>(str.data() + old_size), length);
-    return true;
 }
 
-void ProtobufReader::SimpleReader::readBinary(void* data, size_t size)
+void ProtobufReader::readBinary(void* data, size_t size)
 {
     in.readStrict(reinterpret_cast<char*>(data), size);
     cursor += size;
 }
 
-void ProtobufReader::SimpleReader::ignore(UInt64 num_bytes)
+void ProtobufReader::ignore(UInt64 num_bytes)
 {
     in.ignore(num_bytes);
     cursor += num_bytes;
 }
 
-void ProtobufReader::SimpleReader::moveCursorBackward(UInt64 num_bytes)
+void ProtobufReader::ignoreAll()
+{
+    cursor += in.tryIgnore(std::numeric_limits<size_t>::max());
+}
+
+void ProtobufReader::moveCursorBackward(UInt64 num_bytes)
 {
     if (in.offset() < num_bytes)
         throwUnknownFormat();
@@ -302,7 +305,7 @@ void ProtobufReader::SimpleReader::moveCursorBackward(UInt64 num_bytes)
     cursor -= num_bytes;
 }
 
-UInt64 ProtobufReader::SimpleReader::continueReadingVarint(UInt64 first_byte)
+UInt64 ProtobufReader::continueReadingVarint(UInt64 first_byte)
 {
     UInt64 result = (first_byte & ~static_cast<UInt64>(0x80));
     char c;
@@ -342,7 +345,7 @@ UInt64 ProtobufReader::SimpleReader::continueReadingVarint(UInt64 first_byte)
     throwUnknownFormat();
 }
 
-void ProtobufReader::SimpleReader::ignoreVarint()
+void ProtobufReader::ignoreVarint()
 {
     char c;
 
@@ -379,7 +382,7 @@ void ProtobufReader::SimpleReader::ignoreVarint()
     throwUnknownFormat();
 }
 
-void ProtobufReader::SimpleReader::ignoreGroup()
+void ProtobufReader::ignoreGroup()
 {
     size_t level = 1;
     while (true)
@@ -424,803 +427,15 @@ void ProtobufReader::SimpleReader::ignoreGroup()
     }
 }
 
-// Implementation for a converter from any protobuf field type to any DB data type.
-class ProtobufReader::ConverterBaseImpl : public ProtobufReader::IConverter
+[[noreturn]] void ProtobufReader::throwUnknownFormat() const
 {
-public:
-    ConverterBaseImpl(SimpleReader & simple_reader_, const google::protobuf::FieldDescriptor * field_)
-        : simple_reader(simple_reader_), field(field_) {}
-
-    bool readStringInto(PaddedPODArray<UInt8> &) override
-    {
-        cannotConvertType("String");
-    }
-
-    bool readInt8(Int8 &) override
-    {
-        cannotConvertType("Int8");
-    }
-
-    bool readUInt8(UInt8 &) override
-    {
-        cannotConvertType("UInt8");
-    }
-
-    bool readInt16(Int16 &) override
-    {
-        cannotConvertType("Int16");
-    }
-
-    bool readUInt16(UInt16 &) override
-    {
-        cannotConvertType("UInt16");
-    }
-
-    bool readInt32(Int32 &) override
-    {
-        cannotConvertType("Int32");
-    }
-
-    bool readUInt32(UInt32 &) override
-    {
-        cannotConvertType("UInt32");
-    }
-
-    bool readInt64(Int64 &) override
-    {
-        cannotConvertType("Int64");
-    }
-
-    bool readUInt64(UInt64 &) override
-    {
-        cannotConvertType("UInt64");
-    }
-
-    bool readUInt128(UInt128 &) override
-    {
-        cannotConvertType("UInt128");
-    }
-
-    bool readInt128(Int128 &) override { cannotConvertType("Int128"); }
-    bool readInt256(Int256 &) override { cannotConvertType("Int256"); }
-    bool readUInt256(UInt256 &) override { cannotConvertType("UInt256"); }
-
-    bool readFloat32(Float32 &) override
-    {
-        cannotConvertType("Float32");
-    }
-
-    bool readFloat64(Float64 &) override
-    {
-        cannotConvertType("Float64");
-    }
-
-    void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) override {}
-    void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) override {}
-
-    bool readEnum8(Int8 &) override
-    {
-        cannotConvertType("Enum");
-    }
-
-    bool readEnum16(Int16 &) override
-    {
-        cannotConvertType("Enum");
-    }
-
-    bool readUUID(UUID &) override
-    {
-        cannotConvertType("UUID");
-    }
-
-    bool readDate(DayNum &) override
-    {
-        cannotConvertType("Date");
-    }
-
-    bool readDateTime(time_t &) override
-    {
-        cannotConvertType("DateTime");
-    }
-
-    bool readDateTime64(DateTime64 &, UInt32) override
-    {
-        cannotConvertType("DateTime64");
-    }
-
-    bool readDecimal32(Decimal32 &, UInt32, UInt32) override
-    {
-        cannotConvertType("Decimal32");
-    }
-
-    bool readDecimal64(Decimal64 &, UInt32, UInt32) override
-    {
-        cannotConvertType("Decimal64");
-    }
-
-    bool readDecimal128(Decimal128 &, UInt32, UInt32) override
-    {
-        cannotConvertType("Decimal128");
-    }
-
-    bool readDecimal256(Decimal256 &, UInt32, UInt32) override
-    {
-        cannotConvertType("Decimal256");
-    }
-
-
-    bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) override
-    {
-        cannotConvertType("AggregateFunction");
-    }
-
-protected:
-    [[noreturn]] void cannotConvertType(const String & type_name)
-    {
-        throw Exception(
-            String("Could not convert type '") + field->type_name() + "' from protobuf field '" + field->name() + "' to data type '"
-                + type_name + "'",
-            ErrorCodes::PROTOBUF_BAD_CAST);
-    }
-
-    [[noreturn]] void cannotConvertValue(const String & value, const String & type_name)
-    {
-        throw Exception(
-            "Could not convert value '" + value + "' from protobuf field '" + field->name() + "' to data type '" + type_name + "'",
-            ErrorCodes::PROTOBUF_BAD_CAST);
-    }
-
-    template <typename To, typename From>
-    To numericCast(From value)
-    {
-        if constexpr (std::is_same_v<To, From>)
-            return value;
-        To result;
-        try
-        {
-            result = boost::numeric_cast<To>(value);
-        }
-        catch (boost::numeric::bad_numeric_cast &)
-        {
-            cannotConvertValue(toString(value), TypeName<To>::get());
-        }
-        return result;
-    }
-
-    template <typename To>
-    To parseFromString(const PaddedPODArray<UInt8> & str)
-    {
-        try
-        {
-            To result;
-            ReadBufferFromString buf(str);
-            readText(result, buf);
-            return result;
-        }
-        catch (...)
-        {
-            cannotConvertValue(StringRef(str.data(), str.size()).toString(), TypeName<To>::get());
-        }
-    }
-
-    SimpleReader & simple_reader;
-    const google::protobuf::FieldDescriptor * field;
-};
-
-
-class ProtobufReader::ConverterFromString : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    bool readStringInto(PaddedPODArray<UInt8> & str) override { return simple_reader.readStringInto(str); }
-
-    bool readInt8(Int8 & value) override { return readNumeric(value); }
-    bool readUInt8(UInt8 & value) override { return readNumeric(value); }
-    bool readInt16(Int16 & value) override { return readNumeric(value); }
-    bool readUInt16(UInt16 & value) override { return readNumeric(value); }
-    bool readInt32(Int32 & value) override { return readNumeric(value); }
-    bool readUInt32(UInt32 & value) override { return readNumeric(value); }
-    bool readInt64(Int64 & value) override { return readNumeric(value); }
-    bool readUInt64(UInt64 & value) override { return readNumeric(value); }
-    bool readFloat32(Float32 & value) override { return readNumeric(value); }
-    bool readFloat64(Float64 & value) override { return readNumeric(value); }
-
-    void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
-    {
-        prepareEnumNameToValueMap(name_value_pairs);
-    }
-    void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
-    {
-        prepareEnumNameToValueMap(name_value_pairs);
-    }
-
-    bool readEnum8(Int8 & value) override { return readEnum(value); }
-    bool readEnum16(Int16 & value) override { return readEnum(value); }
-
-    bool readUUID(UUID & uuid) override
-    {
-        if (!readTempString())
-            return false;
-        ReadBufferFromString buf(temp_string);
-        readUUIDText(uuid, buf);
-        return true;
-    }
-
-    bool readDate(DayNum & date) override
-    {
-        if (!readTempString())
-            return false;
-        ReadBufferFromString buf(temp_string);
-        readDateText(date, buf);
-        return true;
-    }
-
-    bool readDateTime(time_t & tm) override
-    {
-        if (!readTempString())
-            return false;
-        ReadBufferFromString buf(temp_string);
-        readDateTimeText(tm, buf);
-        return true;
-    }
-
-    bool readDateTime64(DateTime64 & date_time, UInt32 scale) override
-    {
-        if (!readTempString())
-            return false;
-        ReadBufferFromString buf(temp_string);
-        readDateTime64Text(date_time, scale, buf);
-        return true;
-    }
-
-    bool readDecimal32(Decimal32 & decimal, UInt32 precision, UInt32 scale) override { return readDecimal(decimal, precision, scale); }
-    bool readDecimal64(Decimal64 & decimal, UInt32 precision, UInt32 scale) override { return readDecimal(decimal, precision, scale); }
-    bool readDecimal128(Decimal128 & decimal, UInt32 precision, UInt32 scale) override { return readDecimal(decimal, precision, scale); }
-    bool readDecimal256(Decimal256 & decimal, UInt32 precision, UInt32 scale) override { return readDecimal(decimal, precision, scale); }
-
-    bool readAggregateFunction(const AggregateFunctionPtr & function, AggregateDataPtr place, Arena & arena) override
-    {
-        if (!readTempString())
-            return false;
-        ReadBufferFromString buf(temp_string);
-        function->deserialize(place, buf, &arena);
-        return true;
-    }
-
-private:
-    bool readTempString()
-    {
-        temp_string.clear();
-        return simple_reader.readStringInto(temp_string);
-    }
-
-    template <typename T>
-    bool readNumeric(T & value)
-    {
-        if (!readTempString())
-            return false;
-        value = parseFromString<T>(temp_string);
-        return true;
-    }
-
-    template<typename T>
-    bool readEnum(T & value)
-    {
-        if (!readTempString())
-            return false;
-        StringRef ref(temp_string.data(), temp_string.size());
-        auto it = enum_name_to_value_map->find(ref);
-        if (it == enum_name_to_value_map->end())
-            cannotConvertValue(ref.toString(), "Enum");
-        value = static_cast<T>(it->second);
-        return true;
-    }
-
-    template <typename T>
-    bool readDecimal(Decimal<T> & decimal, UInt32 precision, UInt32 scale)
-    {
-        if (!readTempString())
-            return false;
-        ReadBufferFromString buf(temp_string);
-        DataTypeDecimal<Decimal<T>>::readText(decimal, buf, precision, scale);
-        return true;
-    }
-
-    template <typename T>
-    void prepareEnumNameToValueMap(const std::vector<std::pair<String, T>> & name_value_pairs)
-    {
-        if (likely(enum_name_to_value_map.has_value()))
-            return;
-        enum_name_to_value_map.emplace();
-        for (const auto & name_value_pair : name_value_pairs)
-            enum_name_to_value_map->emplace(name_value_pair.first, name_value_pair.second);
-    }
-
-    PaddedPODArray<UInt8> temp_string;
-    std::optional<std::unordered_map<StringRef, Int16>> enum_name_to_value_map;
-};
-
-#    define PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(field_type_id) \
-        template <> \
-        std::unique_ptr<ProtobufReader::IConverter> ProtobufReader::createConverter<field_type_id>( \
-            const google::protobuf::FieldDescriptor * field) \
-        { \
-            return std::make_unique<ConverterFromString>(simple_reader, field); \
-        }
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_STRING)
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_BYTES)
-
-#    undef PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS
-
-
-template <int field_type_id, typename FromType>
-class ProtobufReader::ConverterFromNumber : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    bool readStringInto(PaddedPODArray<UInt8> & str) override
-    {
-        FromType number;
-        if (!readField(number))
-            return false;
-        WriteBufferFromVector<PaddedPODArray<UInt8>> buf(str);
-        writeText(number, buf);
-        return true;
-    }
-
-    bool readInt8(Int8 & value) override { return readNumeric(value); }
-    bool readUInt8(UInt8 & value) override { return readNumeric(value); }
-    bool readInt16(Int16 & value) override { return readNumeric(value); }
-    bool readUInt16(UInt16 & value) override { return readNumeric(value); }
-    bool readInt32(Int32 & value) override { return readNumeric(value); }
-    bool readUInt32(UInt32 & value) override { return readNumeric(value); }
-    bool readInt64(Int64 & value) override { return readNumeric(value); }
-    bool readUInt64(UInt64 & value) override { return readNumeric(value); }
-    bool readFloat32(Float32 & value) override { return readNumeric(value); }
-    bool readFloat64(Float64 & value) override { return readNumeric(value); }
-
-    bool readEnum8(Int8 & value) override { return readEnum(value); }
-    bool readEnum16(Int16 & value) override { return readEnum(value); }
-
-    void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
-    {
-        prepareSetOfEnumValues(name_value_pairs);
-    }
-    void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
-    {
-        prepareSetOfEnumValues(name_value_pairs);
-    }
-
-    bool readDate(DayNum & date) override
-    {
-        UInt16 number;
-        if (!readNumeric(number))
-            return false;
-        date = DayNum(number);
-        return true;
-    }
-
-    bool readDateTime(time_t & tm) override
-    {
-        UInt32 number;
-        if (!readNumeric(number))
-            return false;
-        tm = number;
-        return true;
-    }
-
-    bool readDateTime64(DateTime64 & date_time, UInt32 scale) override
-    {
-        return readDecimal(date_time, scale);
-    }
-
-    bool readDecimal32(Decimal32 & decimal, UInt32, UInt32 scale) override { return readDecimal(decimal, scale); }
-    bool readDecimal64(Decimal64 & decimal, UInt32, UInt32 scale) override { return readDecimal(decimal, scale); }
-    bool readDecimal128(Decimal128 & decimal, UInt32, UInt32 scale) override { return readDecimal(decimal, scale); }
-
-private:
-    template <typename To>
-    bool readNumeric(To & value)
-    {
-        FromType number;
-        if (!readField(number))
-            return false;
-        value = numericCast<To>(number);
-        return true;
-    }
-
-    template<typename EnumType>
-    bool readEnum(EnumType & value)
-    {
-        if constexpr (!is_integer_v<FromType>)
-            cannotConvertType("Enum"); // It's not correct to convert floating point to enum.
-        FromType number;
-        if (!readField(number))
-            return false;
-        value = numericCast<EnumType>(number);
-        if (set_of_enum_values->find(value) == set_of_enum_values->end())
-            cannotConvertValue(toString(value), "Enum");
-        return true;
-    }
-
-    template<typename EnumType>
-    void prepareSetOfEnumValues(const std::vector<std::pair<String, EnumType>> & name_value_pairs)
-    {
-        if (likely(set_of_enum_values.has_value()))
-            return;
-        set_of_enum_values.emplace();
-        for (const auto & name_value_pair : name_value_pairs)
-            set_of_enum_values->emplace(name_value_pair.second);
-    }
-
-    template <typename S>
-    bool readDecimal(Decimal<S> & decimal, UInt32 scale)
-    {
-        FromType number;
-        if (!readField(number))
-            return false;
-        decimal.value = convertToDecimal<DataTypeNumber<FromType>, DataTypeDecimal<Decimal<S>>>(number, scale);
-        return true;
-    }
-
-    bool readField(FromType & value)
-    {
-        if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT32) && std::is_same_v<FromType, Int64>)
-                   || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT64) && std::is_same_v<FromType, Int64>))
-        {
-            return simple_reader.readInt(value);
-        }
-        else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT32) && std::is_same_v<FromType, UInt64>)
-                        || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT64) && std::is_same_v<FromType, UInt64>))
-        {
-            return simple_reader.readUInt(value);
-        }
-
-        else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT32) && std::is_same_v<FromType, Int64>)
-                        || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT64) && std::is_same_v<FromType, Int64>))
-        {
-            return simple_reader.readSInt(value);
-        }
-        else
-        {
-            static_assert(((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED32) && std::is_same_v<FromType, UInt32>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED32) && std::is_same_v<FromType, Int32>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED64) && std::is_same_v<FromType, UInt64>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED64) && std::is_same_v<FromType, Int64>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FLOAT) && std::is_same_v<FromType, float>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_DOUBLE) && std::is_same_v<FromType, double>));
-            return simple_reader.readFixed(value);
-        }
-    }
-
-    std::optional<std::unordered_set<Int16>> set_of_enum_values;
-};
-
-#    define PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(field_type_id, field_type) \
-        template <> \
-        std::unique_ptr<ProtobufReader::IConverter> ProtobufReader::createConverter<field_type_id>( \
-            const google::protobuf::FieldDescriptor * field) \
-        { \
-            return std::make_unique<ConverterFromNumber<field_type_id, field_type>>(simple_reader, field); /* NOLINT */ \
-        }
-
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT32, Int64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT32, Int64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT32, UInt64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT64, Int64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT64, Int64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT64, UInt64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED32, UInt32);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED32, Int32);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED64, UInt64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED64, Int64);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FLOAT, float);
-PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_DOUBLE, double);
-
-#    undef PROTOBUF_READER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS
-
-
-class ProtobufReader::ConverterFromBool : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    bool readStringInto(PaddedPODArray<UInt8> & str) override
-    {
-        bool b;
-        if (!readField(b))
-            return false;
-        StringRef ref(b ? "true" : "false");
-        str.insert(ref.data, ref.data + ref.size);
-        return true;
-    }
-
-    bool readInt8(Int8 & value) override { return readNumeric(value); }
-    bool readUInt8(UInt8 & value) override { return readNumeric(value); }
-    bool readInt16(Int16 & value) override { return readNumeric(value); }
-    bool readUInt16(UInt16 & value) override { return readNumeric(value); }
-    bool readInt32(Int32 & value) override { return readNumeric(value); }
-    bool readUInt32(UInt32 & value) override { return readNumeric(value); }
-    bool readInt64(Int64 & value) override { return readNumeric(value); }
-    bool readUInt64(UInt64 & value) override { return readNumeric(value); }
-    bool readFloat32(Float32 & value) override { return readNumeric(value); }
-    bool readFloat64(Float64 & value) override { return readNumeric(value); }
-    bool readDecimal32(Decimal32 & decimal, UInt32, UInt32) override { return readNumeric(decimal.value); }
-    bool readDecimal64(Decimal64 & decimal, UInt32, UInt32) override { return readNumeric(decimal.value); }
-    bool readDecimal128(Decimal128 & decimal, UInt32, UInt32) override { return readNumeric(decimal.value); }
-
-private:
-    template<typename T>
-    bool readNumeric(T & value)
-    {
-        bool b;
-        if (!readField(b))
-            return false;
-        value = b ? 1 : 0;
-        return true;
-    }
-
-    bool readField(bool & b)
-    {
-        UInt64 number;
-        if (!simple_reader.readUInt(number))
-            return false;
-        b = static_cast<bool>(number);
-        return true;
-    }
-};
-
-template <>
-std::unique_ptr<ProtobufReader::IConverter> ProtobufReader::createConverter<google::protobuf::FieldDescriptor::TYPE_BOOL>(
-    const google::protobuf::FieldDescriptor * field)
-{
-    return std::make_unique<ConverterFromBool>(simple_reader, field);
+    throw Exception(
+        std::string("Protobuf messages are corrupted or don't match the provided schema.")
+            + (root_message_has_length_delimiter
+                   ? " Please note that Protobuf stream is length-delimited: every message is prefixed by its length in varint."
+                   : ""),
+        ErrorCodes::UNKNOWN_PROTOBUF_FORMAT);
 }
-
-
-class ProtobufReader::ConverterFromEnum : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    bool readStringInto(PaddedPODArray<UInt8> & str) override
-    {
-        prepareEnumPbNumberToNameMap();
-        Int64 pbnumber;
-        if (!readField(pbnumber))
-            return false;
-        auto it = enum_pbnumber_to_name_map->find(pbnumber);
-        if (it == enum_pbnumber_to_name_map->end())
-            cannotConvertValue(toString(pbnumber), "Enum");
-        const auto & ref = it->second;
-        str.insert(ref.data, ref.data + ref.size);
-        return true;
-    }
-
-    bool readInt8(Int8 & value) override { return readNumeric(value); }
-    bool readUInt8(UInt8 & value) override { return readNumeric(value); }
-    bool readInt16(Int16 & value) override { return readNumeric(value); }
-    bool readUInt16(UInt16 & value) override { return readNumeric(value); }
-    bool readInt32(Int32 & value) override { return readNumeric(value); }
-    bool readUInt32(UInt32 & value) override { return readNumeric(value); }
-    bool readInt64(Int64 & value) override { return readNumeric(value); }
-    bool readUInt64(UInt64 & value) override { return readNumeric(value); }
-
-    void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
-    {
-        prepareEnumPbNumberToValueMap(name_value_pairs);
-    }
-    void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
-    {
-        prepareEnumPbNumberToValueMap(name_value_pairs);
-    }
-
-    bool readEnum8(Int8 & value) override { return readEnum(value); }
-    bool readEnum16(Int16 & value) override { return readEnum(value); }
-
-private:
-    template <typename T>
-    bool readNumeric(T & value)
-    {
-        Int64 pbnumber;
-        if (!readField(pbnumber))
-            return false;
-        value = numericCast<T>(pbnumber);
-        return true;
-    }
-
-    template<typename T>
-    bool readEnum(T & value)
-    {
-        Int64 pbnumber;
-        if (!readField(pbnumber))
-            return false;
-        if (enum_pbnumber_always_equals_value)
-            value = static_cast<T>(pbnumber);
-        else
-        {
-            auto it = enum_pbnumber_to_value_map->find(pbnumber);
-            if (it == enum_pbnumber_to_value_map->end())
-                cannotConvertValue(toString(pbnumber), "Enum");
-            value = static_cast<T>(it->second);
-        }
-        return true;
-    }
-
-    void prepareEnumPbNumberToNameMap()
-    {
-        if (likely(enum_pbnumber_to_name_map.has_value()))
-            return;
-        enum_pbnumber_to_name_map.emplace();
-        const auto * enum_type = field->enum_type();
-        for (int i = 0; i != enum_type->value_count(); ++i)
-        {
-            const auto * enum_value = enum_type->value(i);
-            enum_pbnumber_to_name_map->emplace(enum_value->number(), enum_value->name());
-        }
-    }
-
-    template <typename T>
-    void prepareEnumPbNumberToValueMap(const std::vector<std::pair<String, T>> & name_value_pairs)
-    {
-        if (likely(enum_pbnumber_to_value_map.has_value()))
-            return;
-        enum_pbnumber_to_value_map.emplace();
-        enum_pbnumber_always_equals_value = true;
-        for (const auto & name_value_pair : name_value_pairs)
-        {
-            Int16 value = name_value_pair.second; // NOLINT
-            const auto * enum_descriptor = field->enum_type()->FindValueByName(name_value_pair.first);
-            if (enum_descriptor)
-            {
-                enum_pbnumber_to_value_map->emplace(enum_descriptor->number(), value);
-                if (enum_descriptor->number() != value)
-                    enum_pbnumber_always_equals_value = false;
-            }
-            else
-                enum_pbnumber_always_equals_value = false;
-        }
-    }
-
-    bool readField(Int64 & enum_pbnumber)
-    {
-        return simple_reader.readInt(enum_pbnumber);
-    }
-
-    std::optional<std::unordered_map<Int64, StringRef>> enum_pbnumber_to_name_map;
-    std::optional<std::unordered_map<Int64, Int16>> enum_pbnumber_to_value_map;
-    bool enum_pbnumber_always_equals_value;
-};
-
-template <>
-std::unique_ptr<ProtobufReader::IConverter> ProtobufReader::createConverter<google::protobuf::FieldDescriptor::TYPE_ENUM>(
-    const google::protobuf::FieldDescriptor * field)
-{
-    return std::make_unique<ConverterFromEnum>(simple_reader, field);
-}
-
-
-ProtobufReader::ProtobufReader(
-    ReadBuffer & in_, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_)
-    : simple_reader(in_, use_length_delimiters_)
-{
-    root_message = ProtobufColumnMatcher::matchColumns<ColumnMatcherTraits>(column_names, message_type);
-    setTraitsDataAfterMatchingColumns(root_message.get());
-}
-
-ProtobufReader::~ProtobufReader() = default;
-
-void ProtobufReader::setTraitsDataAfterMatchingColumns(Message * message)
-{
-    for (Field & field : message->fields)
-    {
-        if (field.nested_message)
-        {
-            setTraitsDataAfterMatchingColumns(field.nested_message.get());
-            continue;
-        }
-        switch (field.field_descriptor->type())
-        {
-#    define PROTOBUF_READER_CONVERTER_CREATING_CASE(field_type_id) \
-        case field_type_id: \
-            field.data.converter = createConverter<field_type_id>(field.field_descriptor); \
-            break
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_STRING);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BYTES);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT32);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT32);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT32);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED32);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED32);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT64);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT64);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT64);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED64);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED64);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FLOAT);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_DOUBLE);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BOOL);
-            PROTOBUF_READER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_ENUM);
-#    undef PROTOBUF_READER_CONVERTER_CREATING_CASE
-            default:
-                __builtin_unreachable();
-        }
-        message->data.field_number_to_field_map.emplace(field.field_number, &field);
-    }
-}
-
-bool ProtobufReader::startMessage()
-{
-    if (!simple_reader.startMessage())
-        return false;
-    current_message = root_message.get();
-    current_field_index = 0;
-    return true;
-}
-
-void ProtobufReader::endMessage(bool try_ignore_errors)
-{
-    simple_reader.endMessage(try_ignore_errors);
-    current_message = nullptr;
-    current_converter = nullptr;
-}
-
-bool ProtobufReader::readColumnIndex(size_t & column_index)
-{
-    while (true)
-    {
-        UInt32 field_number;
-        if (!simple_reader.readFieldNumber(field_number))
-        {
-            if (!current_message->parent)
-            {
-                current_converter = nullptr;
-                return false;
-            }
-            simple_reader.endNestedMessage();
-            current_field_index = current_message->index_in_parent;
-            current_message = current_message->parent;
-            continue;
-        }
-
-        const Field * field = nullptr;
-        for (; current_field_index < current_message->fields.size(); ++current_field_index)
-        {
-            const Field & f = current_message->fields[current_field_index];
-            if (f.field_number == field_number)
-            {
-                field = &f;
-                break;
-            }
-            if (f.field_number > field_number)
-                break;
-        }
-
-        if (!field)
-        {
-            const auto & field_number_to_field_map = current_message->data.field_number_to_field_map;
-            auto it = field_number_to_field_map.find(field_number);
-            if (it == field_number_to_field_map.end())
-                continue;
-            field = it->second;
-        }
-
-        if (field->nested_message)
-        {
-            simple_reader.startNestedMessage();
-            current_message = field->nested_message.get();
-            current_field_index = 0;
-            continue;
-        }
-
-        column_index = field->column_index;
-        current_converter = field->data.converter.get();
-        return true;
-    }
-}
-
 }
 
 #endif
diff --git a/src/Formats/ProtobufReader.h b/src/Formats/ProtobufReader.h
index b2a0714a57a..31d6f9a08e0 100644
--- a/src/Formats/ProtobufReader.h
+++ b/src/Formats/ProtobufReader.h
@@ -1,258 +1,72 @@
 #pragma once
 
-#include <common/DayNum.h>
-#include <Common/PODArray.h>
-#include <Common/UInt128.h>
-#include <Core/UUID.h>
-
 #if !defined(ARCADIA_BUILD)
-#    include "config_formats.h"
+#   include "config_formats.h"
 #endif
 
 #if USE_PROTOBUF
-#    include <memory>
-#    include <IO/ReadBuffer.h>
-#    include <boost/noncopyable.hpp>
-#    include "ProtobufColumnMatcher.h"
+#   include <Common/PODArray.h>
+#   include <IO/ReadBuffer.h>
 
-namespace google
-{
-namespace protobuf
-{
-    class Descriptor;
-}
-}
 
 namespace DB
 {
-class Arena;
-class IAggregateFunction;
 class ReadBuffer;
-using AggregateDataPtr = char *;
-using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
-
-
-/** Deserializes a protobuf, tries to cast data types if necessarily.
-  */
-class ProtobufReader : private boost::noncopyable
-{
-public:
-    ProtobufReader(ReadBuffer & in_, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_);
-    ~ProtobufReader();
-
-    /// Should be called when we start reading a new message.
-    bool startMessage();
-
-    /// Ends reading a message.
-    void endMessage(bool ignore_errors = false);
-
-    /// Reads the column index.
-    /// The function returns false if there are no more columns to read (call endMessage() in this case).
-    bool readColumnIndex(size_t & column_index);
-
-    /// Reads a value which should be put to column at index received with readColumnIndex().
-    /// The function returns false if there are no more values to read now (call readColumnIndex() in this case).
-    bool readNumber(Int8 & value) { return current_converter->readInt8(value); }
-    bool readNumber(UInt8 & value) { return current_converter->readUInt8(value); }
-    bool readNumber(Int16 & value) { return current_converter->readInt16(value); }
-    bool readNumber(UInt16 & value) { return current_converter->readUInt16(value); }
-    bool readNumber(Int32 & value) { return current_converter->readInt32(value); }
-    bool readNumber(UInt32 & value) { return current_converter->readUInt32(value); }
-    bool readNumber(Int64 & value) { return current_converter->readInt64(value); }
-    bool readNumber(UInt64 & value) { return current_converter->readUInt64(value); }
-    bool readNumber(Int128 & value) { return current_converter->readInt128(value); }
-    bool readNumber(UInt128 & value) { return current_converter->readUInt128(value); }
-    bool readNumber(Int256 & value) { return current_converter->readInt256(value); }
-    bool readNumber(UInt256 & value) { return current_converter->readUInt256(value); }
-    bool readNumber(Float32 & value) { return current_converter->readFloat32(value); }
-    bool readNumber(Float64 & value) { return current_converter->readFloat64(value); }
-
-    bool readStringInto(PaddedPODArray<UInt8> & str) { return current_converter->readStringInto(str); }
-
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & name_value_pairs) { current_converter->prepareEnumMapping8(name_value_pairs); }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & name_value_pairs) { current_converter->prepareEnumMapping16(name_value_pairs); }
-    bool readEnum(Int8 & value) { return current_converter->readEnum8(value); }
-    bool readEnum(Int16 & value) { return current_converter->readEnum16(value); }
-
-    bool readUUID(UUID & uuid) { return current_converter->readUUID(uuid); }
-    bool readDate(DayNum & date) { return current_converter->readDate(date); }
-    bool readDateTime(time_t & tm) { return current_converter->readDateTime(tm); }
-    bool readDateTime64(DateTime64 & tm, UInt32 scale) { return current_converter->readDateTime64(tm, scale); }
-
-    bool readDecimal(Decimal32 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal32(decimal, precision, scale); }
-    bool readDecimal(Decimal64 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal64(decimal, precision, scale); }
-    bool readDecimal(Decimal128 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal128(decimal, precision, scale); }
-    bool readDecimal(Decimal256 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal256(decimal, precision, scale); }
-
-    bool readAggregateFunction(const AggregateFunctionPtr & function, AggregateDataPtr place, Arena & arena) { return current_converter->readAggregateFunction(function, place, arena); }
-
-    /// Call it after calling one of the read*() function to determine if there are more values available for reading.
-    bool ALWAYS_INLINE canReadMoreValues() const { return simple_reader.canReadMoreValues(); }
-
-private:
-    class SimpleReader
-    {
-    public:
-        SimpleReader(ReadBuffer & in_, const bool use_length_delimiters_);
-        bool startMessage();
-        void endMessage(bool ignore_errors);
-        void startNestedMessage();
-        void endNestedMessage();
-        bool readFieldNumber(UInt32 & field_number);
-        bool readInt(Int64 & value);
-        bool readSInt(Int64 & value);
-        bool readUInt(UInt64 & value);
-        template<typename T> bool readFixed(T & value);
-        bool readStringInto(PaddedPODArray<UInt8> & str);
-
-        bool ALWAYS_INLINE canReadMoreValues() const { return cursor < field_end; }
-
-    private:
-        void readBinary(void * data, size_t size);
-        void ignore(UInt64 num_bytes);
-        void moveCursorBackward(UInt64 num_bytes);
-
-        UInt64 ALWAYS_INLINE readVarint()
-        {
-            char c;
-            in.readStrict(c);
-            UInt64 first_byte = static_cast<UInt8>(c);
-            ++cursor;
-            if (likely(!(c & 0x80)))
-                return first_byte;
-            return continueReadingVarint(first_byte);
-        }
-
-        UInt64 continueReadingVarint(UInt64 first_byte);
-        void ignoreVarint();
-        void ignoreGroup();
-        [[noreturn]] void throwUnknownFormat() const;
-
-        ReadBuffer & in;
-        Int64 cursor;
-        size_t current_message_level;
-        Int64 current_message_end;
-        std::vector<Int64> parent_message_ends;
-        Int64 field_end;
-        Int64 last_string_pos;
-        const bool use_length_delimiters;
-    };
-
-    class IConverter
-    {
-    public:
-       virtual ~IConverter() = default;
-       virtual bool readStringInto(PaddedPODArray<UInt8> &) = 0;
-       virtual bool readInt8(Int8&) = 0;
-       virtual bool readUInt8(UInt8 &) = 0;
-       virtual bool readInt16(Int16 &) = 0;
-       virtual bool readUInt16(UInt16 &) = 0;
-       virtual bool readInt32(Int32 &) = 0;
-       virtual bool readUInt32(UInt32 &) = 0;
-       virtual bool readInt64(Int64 &) = 0;
-       virtual bool readUInt64(UInt64 &) = 0;
-       virtual bool readInt128(Int128 &) = 0;
-       virtual bool readUInt128(UInt128 &) = 0;
-
-       virtual bool readInt256(Int256 &) = 0;
-       virtual bool readUInt256(UInt256 &) = 0;
-
-       virtual bool readFloat32(Float32 &) = 0;
-       virtual bool readFloat64(Float64 &) = 0;
-       virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) = 0;
-       virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) = 0;
-       virtual bool readEnum8(Int8 &) = 0;
-       virtual bool readEnum16(Int16 &) = 0;
-       virtual bool readUUID(UUID &) = 0;
-       virtual bool readDate(DayNum &) = 0;
-       virtual bool readDateTime(time_t &) = 0;
-       virtual bool readDateTime64(DateTime64 &, UInt32) = 0;
-       virtual bool readDecimal32(Decimal32 &, UInt32, UInt32) = 0;
-       virtual bool readDecimal64(Decimal64 &, UInt32, UInt32) = 0;
-       virtual bool readDecimal128(Decimal128 &, UInt32, UInt32) = 0;
-       virtual bool readDecimal256(Decimal256 &, UInt32, UInt32) = 0;
-       virtual bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) = 0;
-    };
-
-    class ConverterBaseImpl;
-    class ConverterFromString;
-    template<int field_type_id, typename FromType> class ConverterFromNumber;
-    class ConverterFromBool;
-    class ConverterFromEnum;
-
-    struct ColumnMatcherTraits
-    {
-        struct FieldData
-        {
-            std::unique_ptr<IConverter> converter;
-        };
-        struct MessageData
-        {
-            std::unordered_map<UInt32, const ProtobufColumnMatcher::Field<ColumnMatcherTraits>*> field_number_to_field_map;
-        };
-    };
-    using Message = ProtobufColumnMatcher::Message<ColumnMatcherTraits>;
-    using Field = ProtobufColumnMatcher::Field<ColumnMatcherTraits>;
-
-    void setTraitsDataAfterMatchingColumns(Message * message);
-
-    template <int field_type_id>
-    std::unique_ptr<IConverter> createConverter(const google::protobuf::FieldDescriptor * field);
-
-    SimpleReader simple_reader;
-    std::unique_ptr<Message> root_message;
-    Message* current_message = nullptr;
-    size_t current_field_index = 0;
-    IConverter* current_converter = nullptr;
-};
-
-}
-
-#else
-
-namespace DB
-{
-class Arena;
-class IAggregateFunction;
-class ReadBuffer;
-using AggregateDataPtr = char *;
-using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
 
+/// Utility class for reading in the Protobuf format.
+/// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
 class ProtobufReader
 {
 public:
-    bool startMessage() { return false; }
-    void endMessage() {}
-    bool readColumnIndex(size_t &) { return false; }
-    bool readNumber(Int8 &) { return false; }
-    bool readNumber(UInt8 &) { return false; }
-    bool readNumber(Int16 &) { return false; }
-    bool readNumber(UInt16 &) { return false; }
-    bool readNumber(Int32 &) { return false; }
-    bool readNumber(UInt32 &) { return false; }
-    bool readNumber(Int64 &) { return false; }
-    bool readNumber(UInt64 &) { return false; }
-    bool readNumber(Int128 &) { return false; }
-    bool readNumber(UInt128 &) { return false; }
-    bool readNumber(Int256 &) { return false; }
-    bool readNumber(UInt256 &) { return false; }
-    bool readNumber(Float32 &) { return false; }
-    bool readNumber(Float64 &) { return false; }
-    bool readStringInto(PaddedPODArray<UInt8> &) { return false; }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> &) {}
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> &) {}
-    bool readEnum(Int8 &) { return false; }
-    bool readEnum(Int16 &) { return false; }
-    bool readUUID(UUID &) { return false; }
-    bool readDate(DayNum &) { return false; }
-    bool readDateTime(time_t &) { return false; }
-    bool readDateTime64(DateTime64 & /*tm*/, UInt32 /*scale*/) { return false; }
-    bool readDecimal(Decimal32 &, UInt32, UInt32) { return false; }
-    bool readDecimal(Decimal64 &, UInt32, UInt32) { return false; }
-    bool readDecimal(Decimal128 &, UInt32, UInt32) { return false; }
-    bool readDecimal(Decimal256 &, UInt32, UInt32) { return false; }
-    bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) { return false; }
-    bool canReadMoreValues() const { return false; }
+    ProtobufReader(ReadBuffer & in_);
+
+    void startMessage(bool with_length_delimiter_);
+    void endMessage(bool ignore_errors);
+    void startNestedMessage();
+    void endNestedMessage();
+
+    bool readFieldNumber(int & field_number);
+    Int64 readInt();
+    Int64 readSInt();
+    UInt64 readUInt();
+    template<typename T> T readFixed();
+
+    void readString(String & str);
+    void readStringAndAppend(PaddedPODArray<UInt8> & str);
+
+    bool eof() const { return in.eof(); }
+
+private:
+    void readBinary(void * data, size_t size);
+    void ignore(UInt64 num_bytes);
+    void ignoreAll();
+    void moveCursorBackward(UInt64 num_bytes);
+
+    UInt64 ALWAYS_INLINE readVarint()
+    {
+        char c;
+        in.readStrict(c);
+        UInt64 first_byte = static_cast<UInt8>(c);
+        ++cursor;
+        if (likely(!(c & 0x80)))
+            return first_byte;
+        return continueReadingVarint(first_byte);
+    }
+
+    UInt64 continueReadingVarint(UInt64 first_byte);
+    void ignoreVarint();
+    void ignoreGroup();
+    [[noreturn]] void throwUnknownFormat() const;
+
+    ReadBuffer & in;
+    Int64 cursor = 0;
+    bool root_message_has_length_delimiter = false;
+    size_t current_message_level = 0;
+    Int64 current_message_end = 0;
+    std::vector<Int64> parent_message_ends;
+    int field_number = 0;
+    int next_field_number = 0;
+    Int64 field_end = 0;
 };
 
 }
diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp
new file mode 100644
index 00000000000..82149460773
--- /dev/null
+++ b/src/Formats/ProtobufSerializer.cpp
@@ -0,0 +1,2921 @@
+#include <Formats/ProtobufSerializer.h>
+
+#if USE_PROTOBUF
+#   include <Columns/ColumnAggregateFunction.h>
+#   include <Columns/ColumnArray.h>
+#   include <Columns/ColumnDecimal.h>
+#   include <Columns/ColumnLowCardinality.h>
+#   include <Columns/ColumnMap.h>
+#   include <Columns/ColumnNullable.h>
+#   include <Columns/ColumnFixedString.h>
+#   include <Columns/ColumnString.h>
+#   include <Columns/ColumnTuple.h>
+#   include <Columns/ColumnVector.h>
+#   include <Common/PODArray.h>
+#   include <Common/quoteString.h>
+#   include <Core/DecimalComparison.h>
+#   include <DataTypes/DataTypeAggregateFunction.h>
+#   include <DataTypes/DataTypeArray.h>
+#   include <DataTypes/DataTypesDecimal.h>
+#   include <DataTypes/DataTypeDateTime64.h>
+#   include <DataTypes/DataTypeEnum.h>
+#   include <DataTypes/DataTypeFixedString.h>
+#   include <DataTypes/DataTypeLowCardinality.h>
+#   include <DataTypes/DataTypeMap.h>
+#   include <DataTypes/DataTypeNullable.h>
+#   include <DataTypes/DataTypeTuple.h>
+#   include <Formats/ProtobufReader.h>
+#   include <Formats/ProtobufWriter.h>
+#   include <IO/ReadBufferFromString.h>
+#   include <IO/ReadHelpers.h>
+#   include <IO/WriteBufferFromString.h>
+#   include <IO/WriteHelpers.h>
+#   include <ext/range.h>
+#   include <google/protobuf/descriptor.h>
+#   include <google/protobuf/descriptor.pb.h>
+#   include <boost/algorithm/string.hpp>
+#   include <boost/container/flat_map.hpp>
+#   include <boost/container/flat_set.hpp>
+#   include <boost/numeric/conversion/cast.hpp>
+#   include <boost/range/algorithm.hpp>
+
+#   include <common/logger_useful.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS;
+    extern const int MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD;
+    extern const int NO_COLUMN_SERIALIZED_TO_REQUIRED_PROTOBUF_FIELD;
+    extern const int DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD;
+    extern const int PROTOBUF_FIELD_NOT_REPEATED;
+    extern const int PROTOBUF_BAD_CAST;
+    extern const int LOGICAL_ERROR;
+}
+
+namespace
+{
+    using FieldDescriptor = google::protobuf::FieldDescriptor;
+    using MessageDescriptor = google::protobuf::Descriptor;
+    using FieldTypeId = google::protobuf::FieldDescriptor::Type;
+
+
+    /// Compares column's name with protobuf field's name.
+    /// This comparison is case-insensitive and ignores the difference between '.' and '_'
+    struct ColumnNameWithProtobufFieldNameComparator
+    {
+        static bool equals(char c1, char c2)
+        {
+            return convertChar(c1) == convertChar(c2);
+        }
+
+        static bool equals(const std::string_view & s1, const std::string_view & s2)
+        {
+            return (s1.length() == s2.length())
+                && std::equal(s1.begin(), s1.end(), s2.begin(), [](char c1, char c2) { return convertChar(c1) == convertChar(c2); });
+        }
+
+        static bool less(const std::string_view & s1, const std::string_view & s2)
+        {
+            return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(), s2.end(), [](char c1, char c2) { return convertChar(c1) < convertChar(c2); });
+        }
+
+        static bool startsWith(const std::string_view & s1, const std::string_view & s2)
+        {
+            return (s1.length() >= s2.length()) && equals(s1.substr(0, s2.length()), s2);
+        }
+
+        static char convertChar(char c)
+        {
+            c = tolower(c);
+            if (c == '.')
+                c = '_';
+            return c;
+        }
+    };
+
+
+    // Should we omit null values (zero for numbers / empty string for strings) while storing them.
+    bool shouldSkipZeroOrEmpty(const FieldDescriptor & field_descriptor)
+    {
+        if (!field_descriptor.is_optional())
+            return false;
+        if (field_descriptor.containing_type()->options().map_entry())
+            return false;
+        return field_descriptor.message_type() || (field_descriptor.file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3);
+    }
+
+    // Should we pack repeated values while storing them.
+    bool shouldPackRepeated(const FieldDescriptor & field_descriptor)
+    {
+        if (!field_descriptor.is_repeated())
+            return false;
+        switch (field_descriptor.type())
+        {
+            case FieldTypeId::TYPE_INT32:
+            case FieldTypeId::TYPE_UINT32:
+            case FieldTypeId::TYPE_SINT32:
+            case FieldTypeId::TYPE_INT64:
+            case FieldTypeId::TYPE_UINT64:
+            case FieldTypeId::TYPE_SINT64:
+            case FieldTypeId::TYPE_FIXED32:
+            case FieldTypeId::TYPE_SFIXED32:
+            case FieldTypeId::TYPE_FIXED64:
+            case FieldTypeId::TYPE_SFIXED64:
+            case FieldTypeId::TYPE_FLOAT:
+            case FieldTypeId::TYPE_DOUBLE:
+            case FieldTypeId::TYPE_BOOL:
+            case FieldTypeId::TYPE_ENUM:
+                break;
+            default:
+                return false;
+        }
+        if (field_descriptor.options().has_packed())
+            return field_descriptor.options().packed();
+        return field_descriptor.file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3;
+    }
+
+
+    struct ProtobufReaderOrWriter
+    {
+        ProtobufReaderOrWriter(ProtobufReader & reader_) : reader(&reader_) {} // NOLINT(google-explicit-constructor)
+        ProtobufReaderOrWriter(ProtobufWriter & writer_) : writer(&writer_) {} // NOLINT(google-explicit-constructor)
+        ProtobufReader * const reader = nullptr;
+        ProtobufWriter * const writer = nullptr;
+    };
+
+
+    /// Base class for all serializers which serialize a single value.
+    class ProtobufSerializerSingleValue : public ProtobufSerializer
+    {
+    protected:
+        ProtobufSerializerSingleValue(const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_)
+            : field_descriptor(field_descriptor_)
+            , field_typeid(field_descriptor_.type())
+            , field_tag(field_descriptor.number())
+            , reader(reader_or_writer_.reader)
+            , writer(reader_or_writer_.writer)
+            , skip_zero_or_empty(shouldSkipZeroOrEmpty(field_descriptor))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            column = columns[0];
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            column = columns[0]->getPtr();
+        }
+
+        template <typename NumberType>
+        void writeInt(NumberType value)
+        {
+            auto casted = castNumber<Int64>(value);
+            if (casted || !skip_zero_or_empty)
+                writer->writeInt(field_tag, casted);
+        }
+
+        template <typename NumberType>
+        void writeSInt(NumberType value)
+        {
+            auto casted = castNumber<Int64>(value);
+            if (casted || !skip_zero_or_empty)
+                writer->writeSInt(field_tag, casted);
+        }
+
+        template <typename NumberType>
+        void writeUInt(NumberType value)
+        {
+            auto casted = castNumber<UInt64>(value);
+            if (casted || !skip_zero_or_empty)
+                writer->writeUInt(field_tag, casted);
+        }
+
+        template <typename FieldType, typename NumberType>
+        void writeFixed(NumberType value)
+        {
+            auto casted = castNumber<FieldType>(value);
+            if (casted || !skip_zero_or_empty)
+                writer->writeFixed(field_tag, casted);
+        }
+
+        Int64 readInt() { return reader->readInt(); }
+        Int64 readSInt() { return reader->readSInt(); }
+        UInt64 readUInt() { return reader->readUInt(); }
+
+        template <typename FieldType>
+        FieldType readFixed()
+        {
+            return reader->readFixed<FieldType>();
+        }
+
+        void writeStr(const std::string_view & str)
+        {
+            if (!str.empty() || !skip_zero_or_empty)
+                writer->writeString(field_tag, str);
+        }
+
+        void readStr(String & str) { reader->readString(str); }
+        void readStrAndAppend(PaddedPODArray<UInt8> & str) { reader->readStringAndAppend(str); }
+
+        template <typename DestType>
+        DestType parseFromStr(const std::string_view & str) const
+        {
+            try
+            {
+                DestType result;
+                ReadBufferFromMemory buf(str.data(), str.length());
+                readText(result, buf);
+                return result;
+            }
+            catch (...)
+            {
+                cannotConvertValue(str, "String", TypeName<DestType>::get());
+            }
+        }
+
+        template <typename DestType, typename SrcType>
+        DestType castNumber(SrcType value) const
+        {
+            if constexpr (std::is_same_v<DestType, SrcType>)
+                return value;
+            DestType result;
+            try
+            {
+                /// TODO: use accurate::convertNumeric() maybe?
+                result = boost::numeric_cast<DestType>(value);
+            }
+            catch (boost::numeric::bad_numeric_cast &)
+            {
+                cannotConvertValue(toString(value), TypeName<SrcType>::get(), TypeName<DestType>::get());
+            }
+            return result;
+        }
+
+        [[noreturn]] void cannotConvertValue(const std::string_view & src_value, const std::string_view & src_type_name, const std::string_view & dest_type_name) const
+        {
+            throw Exception(
+                "Could not convert value '" + String{src_value} + "' from type " + String{src_type_name} + " to type " + String{dest_type_name} +
+                    " while " + (reader ? "reading" : "writing") + " field " + field_descriptor.name(),
+                ErrorCodes::PROTOBUF_BAD_CAST);
+        }
+
+        const FieldDescriptor & field_descriptor;
+        const FieldTypeId field_typeid;
+        const int field_tag;
+        ProtobufReader * const reader;
+        ProtobufWriter * const writer;
+        ColumnPtr column;
+
+    private:
+        const bool skip_zero_or_empty;
+    };
+
+
+    /// Serializes any ColumnVector<NumberType> to a field of any type except TYPE_MESSAGE, TYPE_GROUP.
+    /// NumberType must be one of the following types: Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64,
+    /// Int128, UInt128, Int256, UInt256, Float32, Float64.
+    /// And the field's type cannot be TYPE_ENUM if NumberType is Float32 or Float64.
+    template <typename NumberType>
+    class ProtobufSerializerNumber : public ProtobufSerializerSingleValue
+    {
+    public:
+        using ColumnType = ColumnVector<NumberType>;
+
+        ProtobufSerializerNumber(const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_)
+        {
+            setFunctions();
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_vector = assert_cast<const ColumnType &>(*column);
+            write_function(column_vector.getElement(row_num));
+        }
+
+        void readRow(size_t row_num) override
+        {
+            NumberType value = read_function();
+            auto & column_vector = assert_cast<ColumnType &>(column->assumeMutableRef());
+            if (row_num < column_vector.size())
+                column_vector.getElement(row_num) = value;
+            else
+                column_vector.insertValue(value);
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_vector = assert_cast<ColumnType &>(column->assumeMutableRef());
+            if (row_num < column_vector.size())
+                return;
+            column_vector.insertValue(getDefaultNumber());
+        }
+
+    private:
+        void setFunctions()
+        {
+            switch (field_typeid)
+            {
+                case FieldTypeId::TYPE_INT32:
+                {
+                    write_function = [this](NumberType value) { writeInt(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SINT32:
+                {
+                    write_function = [this](NumberType value) { writeSInt(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readSInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_UINT32:
+                {
+                    write_function = [this](NumberType value) { writeUInt(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readUInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_uint32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_INT64:
+                {
+                    write_function = [this](NumberType value) { writeInt(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SINT64:
+                {
+                    write_function = [this](NumberType value) { writeSInt(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readSInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_UINT64:
+                {
+                    write_function = [this](NumberType value) { writeUInt(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readUInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_uint64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FIXED32:
+                {
+                    write_function = [this](NumberType value) { writeFixed<UInt32>(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readFixed<UInt32>()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_uint32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SFIXED32:
+                {
+                    write_function = [this](NumberType value) { writeFixed<Int32>(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readFixed<Int32>()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FIXED64:
+                {
+                    write_function = [this](NumberType value) { writeFixed<UInt64>(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readFixed<UInt64>()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_uint64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SFIXED64:
+                {
+                    write_function = [this](NumberType value) { writeFixed<Int64>(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readFixed<Int64>()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FLOAT:
+                {
+                    write_function = [this](NumberType value) { writeFixed<Float32>(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readFixed<Float32>()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_float()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_DOUBLE:
+                {
+                    write_function = [this](NumberType value) { writeFixed<Float64>(value); };
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readFixed<Float64>()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_double()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_BOOL:
+                {
+                    write_function = [this](NumberType value)
+                    {
+                        if (value == 0)
+                            writeUInt(0);
+                        else if (value == 1)
+                            writeUInt(1);
+                        else
+                            cannotConvertValue(toString(value), TypeName<NumberType>::get(), field_descriptor.type_name());
+                    };
+
+                    read_function = [this]() -> NumberType
+                    {
+                        UInt64 u64 = readUInt();
+                        if (u64 < 2)
+                            return static_cast<NumberType>(u64);
+                        else
+                            cannotConvertValue(toString(u64), field_descriptor.type_name(), TypeName<NumberType>::get());
+                    };
+
+                    default_function = [this]() -> NumberType { return static_cast<NumberType>(field_descriptor.default_value_bool()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_STRING:
+                case FieldTypeId::TYPE_BYTES:
+                {
+                    write_function = [this](NumberType value)
+                    {
+                        WriteBufferFromString buf{text_buffer};
+                        writeText(value, buf);
+                        buf.finalize();
+                        writeStr(text_buffer);
+                    };
+
+                    read_function = [this]() -> NumberType
+                    {
+                        readStr(text_buffer);
+                        return parseFromStr<NumberType>(text_buffer);
+                    };
+
+                    default_function = [this]() -> NumberType { return parseFromStr<NumberType>(field_descriptor.default_value_string()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_ENUM:
+                {
+                    if (std::is_floating_point_v<NumberType>)
+                        failedToSetFunctions();
+
+                    write_function = [this](NumberType value)
+                    {
+                        int number = castNumber<int>(value);
+                        checkProtobufEnumValue(number);
+                        writeInt(number);
+                    };
+
+                    read_function = [this]() -> NumberType { return castNumber<NumberType>(readInt()); };
+                    default_function = [this]() -> NumberType { return castNumber<NumberType>(field_descriptor.default_value_enum()->number()); };
+                    break;
+                }
+
+                default:
+                    failedToSetFunctions();
+            }
+        }
+
+        [[noreturn]] void failedToSetFunctions() const
+        {
+            throw Exception(
+                "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                    + " for serialization of the data type " + quoteString(TypeName<NumberType>::get()),
+                ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+        }
+
+        NumberType getDefaultNumber()
+        {
+            if (!default_number)
+                default_number = default_function();
+            return *default_number;
+        }
+
+        void checkProtobufEnumValue(int value) const
+        {
+            const auto * enum_value_descriptor = field_descriptor.enum_type()->FindValueByNumber(value);
+            if (!enum_value_descriptor)
+                cannotConvertValue(toString(value), TypeName<NumberType>::get(), field_descriptor.type_name());
+        }
+
+    protected:
+        std::function<void(NumberType)> write_function;
+        std::function<NumberType()> read_function;
+        std::function<NumberType()> default_function;
+        String text_buffer;
+
+    private:
+        std::optional<NumberType> default_number;
+    };
+
+
+    /// Serializes ColumnString or ColumnFixedString to a field of any type except TYPE_MESSAGE, TYPE_GROUP.
+    template <bool is_fixed_string>
+    class ProtobufSerializerString : public ProtobufSerializerSingleValue
+    {
+    public:
+        using ColumnType = std::conditional_t<is_fixed_string, ColumnFixedString, ColumnString>;
+        using StringDataType = std::conditional_t<is_fixed_string, DataTypeFixedString, DataTypeString>;
+
+        ProtobufSerializerString(
+            const StringDataType & string_data_type_,
+            const google::protobuf::FieldDescriptor & field_descriptor_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_)
+        {
+            static_assert(is_fixed_string, "This constructor for FixedString only");
+            n = string_data_type_.getN();
+            setFunctions();
+            prepareEnumMapping();
+        }
+
+        ProtobufSerializerString(
+            const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_)
+        {
+            static_assert(!is_fixed_string, "This constructor for String only");
+            setFunctions();
+            prepareEnumMapping();
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_string = assert_cast<const ColumnType &>(*column);
+            write_function(std::string_view{column_string.getDataAt(row_num)});
+        }
+
+        void readRow(size_t row_num) override
+        {
+            auto & column_string = assert_cast<ColumnType &>(column->assumeMutableRef());
+            const size_t old_size = column_string.size();
+            typename ColumnType::Chars & data = column_string.getChars();
+            const size_t old_data_size = data.size();
+
+            if (row_num < old_size)
+            {
+                text_buffer.clear();
+                read_function(text_buffer);
+            }
+            else
+            {
+                try
+                {
+                    read_function(data);
+                }
+                catch (...)
+                {
+                    data.resize_assume_reserved(old_data_size);
+                    throw;
+                }
+            }
+
+            if constexpr (is_fixed_string)
+            {
+                if (row_num < old_size)
+                {
+                    ColumnFixedString::alignStringLength(text_buffer, n, 0);
+                    memcpy(data.data() + row_num * n, text_buffer.data(), n);
+                }
+                else
+                    ColumnFixedString::alignStringLength(data, n, old_data_size);
+            }
+            else
+            {
+                if (row_num < old_size)
+                {
+                    if (row_num != old_size - 1)
+                        throw Exception("Cannot replace a string in the middle of ColumnString", ErrorCodes::LOGICAL_ERROR);
+                    column_string.popBack(1);
+                }
+                try
+                {
+                    data.push_back(0 /* terminating zero */);
+                    column_string.getOffsets().push_back(data.size());
+                }
+                catch (...)
+                {
+                    data.resize_assume_reserved(old_data_size);
+                    column_string.getOffsets().resize_assume_reserved(old_size);
+                    throw;
+                }
+            }
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_string = assert_cast<ColumnType &>(column->assumeMutableRef());
+            const size_t old_size = column_string.size();
+            if (row_num < old_size)
+                return;
+
+            const auto & default_str = getDefaultString();
+            typename ColumnType::Chars & data = column_string.getChars();
+            const size_t old_data_size = data.size();
+            try
+            {
+                data.insert(default_str.data(), default_str.data() + default_str.size());
+            }
+            catch (...)
+            {
+                data.resize_assume_reserved(old_data_size);
+                throw;
+            }
+
+            if constexpr (!is_fixed_string)
+            {
+                try
+                {
+                    data.push_back(0 /* terminating zero */);
+                    column_string.getOffsets().push_back(data.size());
+                }
+                catch (...)
+                {
+                    data.resize_assume_reserved(old_data_size);
+                    column_string.getOffsets().resize_assume_reserved(old_size);
+                    throw;
+                }
+            }
+        }
+
+    private:
+        void setFunctions()
+        {
+            switch (field_typeid)
+            {
+                case FieldTypeId::TYPE_INT32:
+                {
+                    write_function = [this](const std::string_view & str) { writeInt(parseFromStr<Int32>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readInt(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SINT32:
+                {
+                    write_function = [this](const std::string_view & str) { writeSInt(parseFromStr<Int32>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readSInt(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_UINT32:
+                {
+                    write_function = [this](const std::string_view & str) { writeUInt(parseFromStr<UInt32>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readUInt(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_uint32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_INT64:
+                {
+                    write_function = [this](const std::string_view & str) { writeInt(parseFromStr<Int64>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readInt(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SINT64:
+                {
+                    write_function = [this](const std::string_view & str) { writeSInt(parseFromStr<Int64>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readSInt(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_UINT64:
+                {
+                    write_function = [this](const std::string_view & str) { writeUInt(parseFromStr<UInt64>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readUInt(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_uint64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FIXED32:
+                {
+                    write_function = [this](const std::string_view & str) { writeFixed<UInt32>(parseFromStr<UInt32>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readFixed<UInt32>(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_uint32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SFIXED32:
+                {
+                    write_function = [this](const std::string_view & str) { writeFixed<Int32>(parseFromStr<Int32>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readFixed<Int32>(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FIXED64:
+                {
+                    write_function = [this](const std::string_view & str) { writeFixed<UInt64>(parseFromStr<UInt64>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readFixed<UInt64>(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_uint64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SFIXED64:
+                {
+                    write_function = [this](const std::string_view & str) { writeFixed<Int64>(parseFromStr<Int64>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readFixed<Int64>(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FLOAT:
+                {
+                    write_function = [this](const std::string_view & str) { writeFixed<Float32>(parseFromStr<Float32>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readFixed<Float32>(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_float()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_DOUBLE:
+                {
+                    write_function = [this](const std::string_view & str) { writeFixed<Float64>(parseFromStr<Float64>(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { toStringAppend(readFixed<Float64>(), str); };
+                    default_function = [this]() -> String { return toString(field_descriptor.default_value_double()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_BOOL:
+                {
+                    write_function = [this](const std::string_view & str)
+                    {
+                        if (str == "true")
+                            writeUInt(1);
+                        else if (str == "false")
+                            writeUInt(0);
+                        else
+                            cannotConvertValue(str, "String", field_descriptor.type_name());
+                    };
+
+                    read_function = [this](PaddedPODArray<UInt8> & str)
+                    {
+                        UInt64 u64 = readUInt();
+                        if (u64 < 2)
+                        {
+                            std::string_view ref(u64 ? "true" : "false");
+                            str.insert(ref.data(), ref.data() + ref.length());
+                        }
+                        else
+                            cannotConvertValue(toString(u64), field_descriptor.type_name(), "String");
+                    };
+
+                    default_function = [this]() -> String
+                    {
+                        return field_descriptor.default_value_bool() ? "true" : "false";
+                    };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_STRING:
+                case FieldTypeId::TYPE_BYTES:
+                {
+                    write_function = [this](const std::string_view & str) { writeStr(str); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { readStrAndAppend(str); };
+                    default_function = [this]() -> String { return field_descriptor.default_value_string(); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_ENUM:
+                {
+                    write_function = [this](const std::string_view & str) { writeInt(stringToProtobufEnumValue(str)); };
+                    read_function = [this](PaddedPODArray<UInt8> & str) { protobufEnumValueToStringAppend(readInt(), str); };
+                    default_function = [this]() -> String { return field_descriptor.default_value_enum()->name(); };
+                    break;
+                }
+
+                default:
+                    failedToSetFunctions();
+            }
+        }
+
+        [[noreturn]] void failedToSetFunctions()
+        {
+            throw Exception(
+                "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                    + " for serialization of the data type " + quoteString(is_fixed_string ? "FixedString" : "String"),
+                ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+        }
+
+        const PaddedPODArray<UInt8> & getDefaultString()
+        {
+            if (!default_string)
+            {
+                PaddedPODArray<UInt8> arr;
+                auto str = default_function();
+                arr.insert(str.data(), str.data() + str.size());
+                if constexpr (is_fixed_string)
+                    ColumnFixedString::alignStringLength(arr, n, 0);
+                default_string = std::move(arr);
+            }
+            return *default_string;
+        }
+
+        template <typename NumberType>
+        void toStringAppend(NumberType value, PaddedPODArray<UInt8> & str)
+        {
+            WriteBufferFromVector buf{str, WriteBufferFromVector<PaddedPODArray<UInt8>>::AppendModeTag{}};
+            writeText(value, buf);
+        }
+
+        void prepareEnumMapping()
+        {
+            if ((field_typeid == google::protobuf::FieldDescriptor::TYPE_ENUM) && writer)
+            {
+                const auto & enum_descriptor = *field_descriptor.enum_type();
+                for (int i = 0; i != enum_descriptor.value_count(); ++i)
+                {
+                    const auto & enum_value_descriptor = *enum_descriptor.value(i);
+                    string_to_protobuf_enum_value_map.emplace(enum_value_descriptor.name(), enum_value_descriptor.number());
+                }
+            }
+        }
+
+        int stringToProtobufEnumValue(const std::string_view & str) const
+        {
+            auto it = string_to_protobuf_enum_value_map.find(str);
+            if (it == string_to_protobuf_enum_value_map.end())
+                cannotConvertValue(str, "String", field_descriptor.type_name());
+            return it->second;
+        }
+
+        std::string_view protobufEnumValueToString(int value) const
+        {
+            const auto * enum_value_descriptor = field_descriptor.enum_type()->FindValueByNumber(value);
+            if (!enum_value_descriptor)
+                cannotConvertValue(toString(value), field_descriptor.type_name(), "String");
+            return enum_value_descriptor->name();
+        }
+
+        void protobufEnumValueToStringAppend(int value, PaddedPODArray<UInt8> & str) const
+        {
+            auto name = protobufEnumValueToString(value);
+            str.insert(name.data(), name.data() + name.length());
+        }
+
+        size_t n = 0;
+        std::function<void(const std::string_view &)> write_function;
+        std::function<void(PaddedPODArray<UInt8> &)> read_function;
+        std::function<String()> default_function;
+        std::unordered_map<std::string_view, int> string_to_protobuf_enum_value_map;
+        PaddedPODArray<UInt8> text_buffer;
+        std::optional<PaddedPODArray<UInt8>> default_string;
+    };
+
+
+    /// Serializes ColumnVector<NumberType> containing enum values to a field of any type
+    /// except TYPE_MESSAGE, TYPE_GROUP, TYPE_FLOAT, TYPE_DOUBLE, TYPE_BOOL.
+    /// NumberType can be either Int8 or Int16.
+    template <typename NumberType>
+    class ProtobufSerializerEnum : public ProtobufSerializerNumber<NumberType>
+    {
+    public:
+        using ColumnType = ColumnVector<NumberType>;
+        using EnumDataType = DataTypeEnum<NumberType>;
+        using BaseClass = ProtobufSerializerNumber<NumberType>;
+
+        ProtobufSerializerEnum(
+            const std::shared_ptr<const EnumDataType> & enum_data_type_,
+            const FieldDescriptor & field_descriptor_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : BaseClass(field_descriptor_, reader_or_writer_), enum_data_type(enum_data_type_)
+        {
+            assert(enum_data_type);
+            setFunctions();
+            prepareEnumMapping();
+        }
+
+    private:
+        void setFunctions()
+        {
+            switch (this->field_typeid)
+            {
+                case FieldTypeId::TYPE_INT32:
+                case FieldTypeId::TYPE_SINT32:
+                case FieldTypeId::TYPE_UINT32:
+                case FieldTypeId::TYPE_INT64:
+                case FieldTypeId::TYPE_SINT64:
+                case FieldTypeId::TYPE_UINT64:
+                case FieldTypeId::TYPE_FIXED32:
+                case FieldTypeId::TYPE_SFIXED32:
+                case FieldTypeId::TYPE_FIXED64:
+                case FieldTypeId::TYPE_SFIXED64:
+                {
+                    auto base_read_function = this->read_function;
+                    this->read_function = [this, base_read_function]() -> NumberType
+                    {
+                        NumberType value = base_read_function();
+                        checkEnumDataTypeValue(value);
+                        return value;
+                    };
+
+                    auto base_default_function = this->default_function;
+                    this->default_function = [this, base_default_function]() -> NumberType
+                    {
+                        auto value = base_default_function();
+                        checkEnumDataTypeValue(value);
+                        return value;
+                    };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_STRING:
+                case FieldTypeId::TYPE_BYTES:
+                {
+                    this->write_function = [this](NumberType value)
+                    {
+                        writeStr(enumDataTypeValueToString(value));
+                    };
+
+                    this->read_function = [this]() -> NumberType
+                    {
+                        readStr(this->text_buffer);
+                        return stringToEnumDataTypeValue(this->text_buffer);
+                    };
+
+                    this->default_function = [this]() -> NumberType
+                    {
+                        return stringToEnumDataTypeValue(this->field_descriptor.default_value_string());
+                    };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_ENUM:
+                {
+                    this->write_function = [this](NumberType value) { writeInt(enumDataTypeValueToProtobufEnumValue(value)); };
+                    this->read_function = [this]() -> NumberType { return protobufEnumValueToEnumDataTypeValue(readInt()); };
+                    this->default_function = [this]() -> NumberType { return protobufEnumValueToEnumDataTypeValue(this->field_descriptor.default_value_enum()->number()); };
+                    break;
+                }
+
+                default:
+                    failedToSetFunctions();
+            }
+        }
+
+        [[noreturn]] void failedToSetFunctions()
+        {
+            throw Exception(
+                "The field " + quoteString(this->field_descriptor.full_name()) + " has an incompatible type " + this->field_descriptor.type_name()
+                    + " for serialization of the data type " + quoteString(enum_data_type->getName()),
+                ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+        }
+
+        void checkEnumDataTypeValue(NumberType value)
+        {
+            enum_data_type->findByValue(value); /// Throws an exception if the value isn't defined in the DataTypeEnum.
+        }
+
+        std::string_view enumDataTypeValueToString(NumberType value) const { return std::string_view{enum_data_type->getNameForValue(value)}; }
+        NumberType stringToEnumDataTypeValue(const String & str) const { return enum_data_type->getValue(str); }
+
+        void prepareEnumMapping()
+        {
+            if (this->field_typeid != FieldTypeId::TYPE_ENUM)
+                return;
+
+            const auto & enum_descriptor = *this->field_descriptor.enum_type();
+
+            /// We have two mappings:
+            /// enum_data_type: "string->NumberType" and protobuf_enum: string->int".
+            /// And here we want to make from those two mapping a new mapping "NumberType->int" (if we're writing protobuf data),
+            /// or "int->NumberType" (if we're reading protobuf data).
+
+            auto add_to_mapping = [&](NumberType enum_data_type_value, int protobuf_enum_value)
+            {
+                if (this->writer)
+                    enum_data_type_value_to_protobuf_enum_value_map.emplace(enum_data_type_value, protobuf_enum_value);
+                else
+                    protobuf_enum_value_to_enum_data_type_value_map.emplace(protobuf_enum_value, enum_data_type_value);
+            };
+
+            auto iless = [](const std::string_view & s1, const std::string_view & s2) { return ColumnNameWithProtobufFieldNameComparator::less(s1, s2); };
+            boost::container::flat_map<std::string_view, int, decltype(iless)> string_to_protobuf_enum_value_map;
+            typename decltype(string_to_protobuf_enum_value_map)::sequence_type string_to_protobuf_enum_value_seq;
+            for (int i : ext::range(enum_descriptor.value_count()))
+                string_to_protobuf_enum_value_seq.emplace_back(enum_descriptor.value(i)->name(), enum_descriptor.value(i)->number());
+            string_to_protobuf_enum_value_map.adopt_sequence(std::move(string_to_protobuf_enum_value_seq));
+
+            std::vector<NumberType> not_found_by_name_values;
+            not_found_by_name_values.reserve(enum_data_type->getValues().size());
+
+            /// Find mapping between enum_data_type and protobuf_enum by name (case insensitively),
+            /// i.e. we add to the mapping
+            /// NumberType(enum_data_type) -> "NAME"(enum_data_type) ->
+            /// -> "NAME"(protobuf_enum, same name) -> int(protobuf_enum)
+            for (const auto & [name, value] : enum_data_type->getValues())
+            {
+                auto it = string_to_protobuf_enum_value_map.find(name);
+                if (it != string_to_protobuf_enum_value_map.end())
+                    add_to_mapping(value, it->second);
+                else
+                    not_found_by_name_values.push_back(value);
+            }
+
+            if (!not_found_by_name_values.empty())
+            {
+                /// Find mapping between two enum_data_type and protobuf_enum by value.
+                /// If the same value has different names in enum_data_type and protobuf_enum
+                /// we can still add it to our mapping, i.e. we add to the mapping
+                /// NumberType(enum_data_type) -> int(protobuf_enum, same value)
+                for (NumberType value : not_found_by_name_values)
+                {
+                    if (enum_descriptor.FindValueByNumber(value))
+                        add_to_mapping(value, value);
+                }
+            }
+
+            size_t num_mapped_values = this->writer ? enum_data_type_value_to_protobuf_enum_value_map.size()
+                                                    : protobuf_enum_value_to_enum_data_type_value_map.size();
+
+            if (!num_mapped_values && !enum_data_type->getValues().empty() && enum_descriptor.value_count())
+            {
+                throw Exception(
+                    "Couldn't find mapping between data type " + enum_data_type->getName() + " and the enum " + quoteString(enum_descriptor.full_name())
+                        + " in the protobuf schema",
+                    ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+            }
+        }
+
+        int enumDataTypeValueToProtobufEnumValue(NumberType value) const
+        {
+            auto it = enum_data_type_value_to_protobuf_enum_value_map.find(value);
+            if (it == enum_data_type_value_to_protobuf_enum_value_map.end())
+                cannotConvertValue(toString(value), enum_data_type->getName(), this->field_descriptor.type_name());
+            return it->second;
+        }
+
+        NumberType protobufEnumValueToEnumDataTypeValue(int value) const
+        {
+            auto it = protobuf_enum_value_to_enum_data_type_value_map.find(value);
+            if (it == protobuf_enum_value_to_enum_data_type_value_map.end())
+               cannotConvertValue(toString(value), this->field_descriptor.type_name(), enum_data_type->getName());
+            return it->second;
+        }
+
+        Int64 readInt() { return ProtobufSerializerSingleValue::readInt(); }
+        void writeInt(Int64 value) { ProtobufSerializerSingleValue::writeInt(value); }
+        void writeStr(const std::string_view & str) { ProtobufSerializerSingleValue::writeStr(str); }
+        void readStr(String & str) { ProtobufSerializerSingleValue::readStr(str); }
+        [[noreturn]] void cannotConvertValue(const std::string_view & src_value, const std::string_view & src_type_name, const std::string_view & dest_type_name) const { ProtobufSerializerSingleValue::cannotConvertValue(src_value, src_type_name, dest_type_name); }
+
+        const std::shared_ptr<const EnumDataType> enum_data_type;
+        std::unordered_map<NumberType, int> enum_data_type_value_to_protobuf_enum_value_map;
+        std::unordered_map<int, NumberType> protobuf_enum_value_to_enum_data_type_value_map;
+    };
+
+
+    /// Serializes a ColumnDecimal<DecimalType> to any field except TYPE_MESSAGE, TYPE_GROUP, TYPE_ENUM.
+    /// DecimalType must be one of the following types: Decimal32, Decimal64, Decimal128, Decimal256, DateTime64.
+    template <typename DecimalType>
+    class ProtobufSerializerDecimal : public ProtobufSerializerSingleValue
+    {
+    public:
+        using ColumnType = ColumnDecimal<DecimalType>;
+
+        ProtobufSerializerDecimal(
+            const DataTypeDecimalBase<DecimalType> & decimal_data_type_,
+            const FieldDescriptor & field_descriptor_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_)
+            , precision(decimal_data_type_.getPrecision())
+            , scale(decimal_data_type_.getScale())
+        {
+            setFunctions();
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_decimal = assert_cast<const ColumnType &>(*column);
+            write_function(column_decimal.getElement(row_num));
+        }
+
+        void readRow(size_t row_num) override
+        {
+            DecimalType decimal = read_function();
+            auto & column_decimal = assert_cast<ColumnType &>(column->assumeMutableRef());
+            if (row_num < column_decimal.size())
+                column_decimal.getElement(row_num) = decimal;
+            else
+                column_decimal.insertValue(decimal);
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_decimal = assert_cast<ColumnType &>(column->assumeMutableRef());
+            if (row_num < column_decimal.size())
+                return;
+            column_decimal.insertValue(getDefaultDecimal());
+        }
+
+    private:
+        void setFunctions()
+        {
+            switch (field_typeid)
+            {
+                case FieldTypeId::TYPE_INT32:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeInt(decimalToNumber<Int32>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readInt()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SINT32:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeSInt(decimalToNumber<Int32>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readSInt()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_UINT32:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeUInt(decimalToNumber<UInt32>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readUInt()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_uint32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_INT64:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeInt(decimalToNumber<Int64>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readInt()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SINT64:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeSInt(decimalToNumber<Int64>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readSInt()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_UINT64:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeUInt(decimalToNumber<UInt64>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readUInt()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_uint64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FIXED32:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeFixed<UInt32>(decimalToNumber<UInt32>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readFixed<UInt32>()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_uint32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SFIXED32:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeFixed<Int32>(decimalToNumber<Int32>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readFixed<Int32>()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_int32()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FIXED64:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeFixed<UInt64>(decimalToNumber<UInt64>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readFixed<UInt64>()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_uint64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_SFIXED64:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeFixed<Int64>(decimalToNumber<Int64>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readFixed<Int64>()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_int64()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_FLOAT:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeFixed<Float32>(decimalToNumber<Float32>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readFixed<Float32>()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_float()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_DOUBLE:
+                {
+                    write_function = [this](const DecimalType & decimal) { writeFixed<Float64>(decimalToNumber<Float64>(decimal)); };
+                    read_function = [this]() -> DecimalType { return numberToDecimal(readFixed<Float64>()); };
+                    default_function = [this]() -> DecimalType { return numberToDecimal(field_descriptor.default_value_double()); };
+                    break;
+                }
+
+                case FieldTypeId::TYPE_BOOL:
+                {
+                    if (std::is_same_v<DecimalType, DateTime64>)
+                        failedToSetFunctions();
+                    else
+                    {
+                        write_function = [this](const DecimalType & decimal)
+                        {
+                            if (decimal.value == 0)
+                                writeInt(0);
+                            else if (DecimalComparison<DecimalType, int, EqualsOp>::compare(decimal, 1, scale, 0))
+                                writeInt(1);
+                            else
+                            {
+                                WriteBufferFromOwnString buf;
+                                writeText(decimal, scale, buf);
+                                cannotConvertValue(buf.str(), TypeName<DecimalType>::get(), field_descriptor.type_name());
+                            }
+                        };
+
+                        read_function = [this]() -> DecimalType
+                        {
+                            UInt64 u64 = readUInt();
+                            if (u64 < 2)
+                                return numberToDecimal(static_cast<UInt64>(u64 != 0));
+                            else
+                                cannotConvertValue(toString(u64), field_descriptor.type_name(), TypeName<DecimalType>::get());
+                        };
+
+                        default_function = [this]() -> DecimalType
+                        {
+                            return numberToDecimal(static_cast<Int64>(field_descriptor.default_value_bool()));
+                        };
+                    }
+                    break;
+                }
+
+                case FieldTypeId::TYPE_STRING:
+                case FieldTypeId::TYPE_BYTES:
+                {
+                    write_function = [this](const DecimalType & decimal)
+                    {
+                        decimalToString(decimal, text_buffer);
+                        writeStr(text_buffer);
+                    };
+
+                    read_function = [this]() -> DecimalType
+                    {
+                        readStr(text_buffer);
+                        return stringToDecimal(text_buffer);
+                    };
+
+                    default_function = [this]() -> DecimalType { return stringToDecimal(field_descriptor.default_value_string()); };
+                    break;
+                }
+
+                default:
+                    failedToSetFunctions();
+            }
+        }
+
+        [[noreturn]] void failedToSetFunctions()
+        {
+            throw Exception(
+                "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                    + " for serialization of the data type " + quoteString(TypeName<DecimalType>::get()),
+                ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+        }
+
+        DecimalType getDefaultDecimal()
+        {
+            if (!default_decimal)
+                default_decimal = default_function();
+            return *default_decimal;
+        }
+
+        template <typename NumberType>
+        DecimalType numberToDecimal(NumberType value) const
+        {
+            return convertToDecimal<DataTypeNumber<NumberType>, DataTypeDecimal<DecimalType>>(value, scale);
+        }
+
+        template <typename NumberType>
+        NumberType decimalToNumber(const DecimalType & decimal) const
+        {
+            return DecimalUtils::convertTo<NumberType>(decimal, scale);
+        }
+
+        void decimalToString(const DecimalType & decimal, String & str) const
+        {
+            WriteBufferFromString buf{str};
+            if constexpr (std::is_same_v<DecimalType, DateTime64>)
+               writeDateTimeText(decimal, scale, buf);
+            else
+                writeText(decimal, scale, buf);
+        }
+
+        DecimalType stringToDecimal(const String & str) const
+        {
+            ReadBufferFromString buf(str);
+            DecimalType decimal{0};
+            if constexpr (std::is_same_v<DecimalType, DateTime64>)
+                readDateTime64Text(decimal, scale, buf);
+            else
+                DataTypeDecimal<DecimalType>::readText(decimal, buf, precision, scale);
+            return decimal;
+        }
+
+        const UInt32 precision;
+        const UInt32 scale;
+        std::function<void(const DecimalType &)> write_function;
+        std::function<DecimalType()> read_function;
+        std::function<DecimalType()> default_function;
+        std::optional<DecimalType> default_decimal;
+        String text_buffer;
+    };
+
+    using ProtobufSerializerDateTime64 = ProtobufSerializerDecimal<DateTime64>;
+
+
+    /// Serializes a ColumnVector<UInt16> containing dates to a field of any type except TYPE_MESSAGE, TYPE_GROUP, TYPE_BOOL, TYPE_ENUM.
+    class ProtobufSerializerDate : public ProtobufSerializerNumber<UInt16>
+    {
+    public:
+        ProtobufSerializerDate(
+            const FieldDescriptor & field_descriptor_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerNumber<UInt16>(field_descriptor_, reader_or_writer_)
+        {
+            setFunctions();
+        }
+
+    private:
+        void setFunctions()
+        {
+            switch (field_typeid)
+            {
+                case FieldTypeId::TYPE_INT32:
+                case FieldTypeId::TYPE_SINT32:
+                case FieldTypeId::TYPE_UINT32:
+                case FieldTypeId::TYPE_INT64:
+                case FieldTypeId::TYPE_SINT64:
+                case FieldTypeId::TYPE_UINT64:
+                case FieldTypeId::TYPE_FIXED32:
+                case FieldTypeId::TYPE_SFIXED32:
+                case FieldTypeId::TYPE_FIXED64:
+                case FieldTypeId::TYPE_SFIXED64:
+                case FieldTypeId::TYPE_FLOAT:
+                case FieldTypeId::TYPE_DOUBLE:
+                    break; /// already set in ProtobufSerializerNumber<UInt16>::setFunctions().
+
+                case FieldTypeId::TYPE_STRING:
+                case FieldTypeId::TYPE_BYTES:
+                {
+                    write_function = [this](UInt16 value)
+                    {
+                        dateToString(static_cast<DayNum>(value), text_buffer);
+                        writeStr(text_buffer);
+                    };
+
+                    read_function = [this]() -> UInt16
+                    {
+                        readStr(text_buffer);
+                        return stringToDate(text_buffer);
+                    };
+
+                    default_function = [this]() -> UInt16 { return stringToDate(field_descriptor.default_value_string()); };
+                    break;
+                }
+
+                default:
+                    failedToSetFunctions();
+            }
+        }
+
+        static void dateToString(DayNum date, String & str)
+        {
+            WriteBufferFromString buf{str};
+            writeText(date, buf);
+        }
+
+        static DayNum stringToDate(const String & str)
+        {
+            DayNum date;
+            ReadBufferFromString buf{str};
+            readDateText(date, buf);
+            return date;
+        }
+
+        [[noreturn]] void failedToSetFunctions()
+        {
+            throw Exception(
+                "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                    + " for serialization of the data type 'Date'",
+                ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+        }
+    };
+
+
+    /// Serializes a ColumnVector<UInt32> containing dates to a field of any type except TYPE_MESSAGE, TYPE_GROUP, TYPE_BOOL, TYPE_ENUM.
+    class ProtobufSerializerDateTime : public ProtobufSerializerNumber<UInt32>
+    {
+    public:
+        ProtobufSerializerDateTime(
+            const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerNumber<UInt32>(field_descriptor_, reader_or_writer_)
+        {
+            setFunctions();
+        }
+
+    protected:
+        void setFunctions()
+        {
+            switch (field_typeid)
+            {
+                case FieldTypeId::TYPE_INT32:
+                case FieldTypeId::TYPE_SINT32:
+                case FieldTypeId::TYPE_UINT32:
+                case FieldTypeId::TYPE_INT64:
+                case FieldTypeId::TYPE_SINT64:
+                case FieldTypeId::TYPE_UINT64:
+                case FieldTypeId::TYPE_FIXED32:
+                case FieldTypeId::TYPE_SFIXED32:
+                case FieldTypeId::TYPE_FIXED64:
+                case FieldTypeId::TYPE_SFIXED64:
+                case FieldTypeId::TYPE_FLOAT:
+                case FieldTypeId::TYPE_DOUBLE:
+                    break; /// already set in ProtobufSerializerNumber<UInt32>::setFunctions().
+
+                case FieldTypeId::TYPE_STRING:
+                case FieldTypeId::TYPE_BYTES:
+                {
+                    write_function = [this](UInt32 value)
+                    {
+                        dateTimeToString(value, text_buffer);
+                        writeStr(text_buffer);
+                    };
+
+                    read_function = [this]() -> UInt32
+                    {
+                        readStr(text_buffer);
+                        return stringToDateTime(text_buffer);
+                    };
+
+                    default_function = [this]() -> UInt32 { return stringToDateTime(field_descriptor.default_value_string()); };
+                    break;
+                }
+
+                default:
+                    failedToSetFunctions();
+            }
+        }
+
+        static void dateTimeToString(time_t tm, String & str)
+        {
+            WriteBufferFromString buf{str};
+            writeDateTimeText(tm, buf);
+        }
+
+        static time_t stringToDateTime(const String & str)
+        {
+            ReadBufferFromString buf{str};
+            time_t tm = 0;
+            readDateTimeText(tm, buf);
+            return tm;
+        }
+
+        [[noreturn]] void failedToSetFunctions()
+        {
+            throw Exception(
+                "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                    + " for serialization of the data type 'DateTime'",
+                ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+        }
+    };
+
+
+    /// Serializes a ColumnVector<UInt128> containing UUIDs to a field of type TYPE_STRING or TYPE_BYTES.
+    class ProtobufSerializerUUID : public ProtobufSerializerNumber<UInt128>
+    {
+    public:
+        ProtobufSerializerUUID(
+            const google::protobuf::FieldDescriptor & field_descriptor_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerNumber<UInt128>(field_descriptor_, reader_or_writer_)
+        {
+            setFunctions();
+        }
+
+    private:
+        void setFunctions()
+        {
+            if ((field_typeid != FieldTypeId::TYPE_STRING) && (field_typeid != FieldTypeId::TYPE_BYTES))
+            {
+                throw Exception(
+                    "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                        + " for serialization of the data type UUID",
+                    ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+            }
+
+            write_function = [this](UInt128 value)
+            {
+                uuidToString(static_cast<UUID>(value), text_buffer);
+                writeStr(text_buffer);
+            };
+
+            read_function = [this]() -> UInt128
+            {
+                readStr(text_buffer);
+                return stringToUUID(text_buffer);
+            };
+
+            default_function = [this]() -> UInt128 { return stringToUUID(field_descriptor.default_value_string()); };
+        }
+
+        static void uuidToString(const UUID & uuid, String & str)
+        {
+            WriteBufferFromString buf{str};
+            writeText(uuid, buf);
+        }
+
+        static UUID stringToUUID(const String & str)
+        {
+            ReadBufferFromString buf{str};
+            UUID uuid;
+            readUUIDText(uuid, buf);
+            return uuid;
+        }
+    };
+
+
+    using ProtobufSerializerInterval = ProtobufSerializerNumber<Int64>;
+
+
+    /// Serializes a ColumnAggregateFunction to a field of type TYPE_STRING or TYPE_BYTES.
+    class ProtobufSerializerAggregateFunction : public ProtobufSerializerSingleValue
+    {
+    public:
+        ProtobufSerializerAggregateFunction(
+            const std::shared_ptr<const DataTypeAggregateFunction> & aggregate_function_data_type_,
+            const google::protobuf::FieldDescriptor & field_descriptor_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_)
+            , aggregate_function_data_type(aggregate_function_data_type_)
+            , aggregate_function(aggregate_function_data_type->getFunction())
+        {
+            if ((field_typeid != FieldTypeId::TYPE_STRING) && (field_typeid != FieldTypeId::TYPE_BYTES))
+            {
+                throw Exception(
+                    "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name()
+                        + " for serialization of the data type " + quoteString(aggregate_function_data_type->getName()),
+                    ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD);
+            }
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_af = assert_cast<const ColumnAggregateFunction &>(*column);
+            dataToString(column_af.getData()[row_num], text_buffer);
+            writeStr(text_buffer);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            auto & column_af = assert_cast<ColumnAggregateFunction &>(column->assumeMutableRef());
+            Arena & arena = column_af.createOrGetArena();
+            AggregateDataPtr data;
+            readStr(text_buffer);
+            data = stringToData(text_buffer, arena);
+
+            if (row_num < column_af.size())
+            {
+                auto * old_data = std::exchange(column_af.getData()[row_num], data);
+                aggregate_function->destroy(old_data);
+            }
+            else
+                column_af.getData().push_back(data);
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_af = assert_cast<ColumnAggregateFunction &>(column->assumeMutableRef());
+            if (row_num < column_af.size())
+                return;
+
+            Arena & arena = column_af.createOrGetArena();
+            AggregateDataPtr data = stringToData(field_descriptor.default_value_string(), arena);
+            column_af.getData().push_back(data);
+        }
+
+    private:
+        void dataToString(ConstAggregateDataPtr data, String & str) const
+        {
+            WriteBufferFromString buf{str};
+            aggregate_function->serialize(data, buf);
+        }
+
+        AggregateDataPtr stringToData(const String & str, Arena & arena) const
+        {
+            size_t size_of_state = aggregate_function->sizeOfData();
+            AggregateDataPtr data = arena.alignedAlloc(size_of_state, aggregate_function->alignOfData());
+            try
+            {
+                aggregate_function->create(data);
+                ReadBufferFromMemory buf(str.data(), str.length());
+                aggregate_function->deserialize(data, buf, &arena);
+                return data;
+            }
+            catch (...)
+            {
+                aggregate_function->destroy(data);
+                throw;
+            }
+        }
+
+        const std::shared_ptr<const DataTypeAggregateFunction> aggregate_function_data_type;
+        const AggregateFunctionPtr aggregate_function;
+        String text_buffer;
+    };
+
+
+    /// Serializes a ColumnNullable.
+    class ProtobufSerializerNullable : public ProtobufSerializer
+    {
+    public:
+        explicit ProtobufSerializerNullable(std::unique_ptr<ProtobufSerializer> nested_serializer_)
+            : nested_serializer(std::move(nested_serializer_))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            column = columns[0];
+            const auto & column_nullable = assert_cast<const ColumnNullable &>(*column);
+            ColumnPtr nested_column = column_nullable.getNestedColumnPtr();
+            nested_serializer->setColumns(&nested_column, 1);
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            ColumnPtr column0 = columns[0]->getPtr();
+            setColumns(&column0, 1);
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_nullable = assert_cast<const ColumnNullable &>(*column);
+            const auto & null_map = column_nullable.getNullMapData();
+            if (!null_map[row_num])
+                nested_serializer->writeRow(row_num);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            auto & column_nullable = assert_cast<ColumnNullable &>(column->assumeMutableRef());
+            auto & nested_column = column_nullable.getNestedColumn();
+            auto & null_map = column_nullable.getNullMapData();
+            size_t old_size = null_map.size();
+
+            nested_serializer->readRow(row_num);
+
+            if (row_num < old_size)
+            {
+                null_map[row_num] = false;
+            }
+            else
+            {
+                size_t new_size = nested_column.size();
+                if (new_size != old_size + 1)
+                    throw Exception("Size of ColumnNullable is unexpected", ErrorCodes::LOGICAL_ERROR);
+                try
+                {
+                    null_map.push_back(false);
+                }
+                catch (...)
+                {
+                    nested_column.popBack(1);
+                    throw;
+                }
+            }
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_nullable = assert_cast<ColumnNullable &>(column->assumeMutableRef());
+            if (row_num < column_nullable.size())
+                return;
+            column_nullable.insertDefault();
+        }
+
+    private:
+        const std::unique_ptr<ProtobufSerializer> nested_serializer;
+        ColumnPtr column;
+    };
+
+
+    /// Serializes a ColumnMap.
+    class ProtobufSerializerMap : public ProtobufSerializer
+    {
+    public:
+        explicit ProtobufSerializerMap(std::unique_ptr<ProtobufSerializer> nested_serializer_)
+            : nested_serializer(std::move(nested_serializer_))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            const auto & column_map = assert_cast<const ColumnMap &>(*columns[0]);
+            ColumnPtr nested_column = column_map.getNestedColumnPtr();
+            nested_serializer->setColumns(&nested_column, 1);
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            ColumnPtr column0 = columns[0]->getPtr();
+            setColumns(&column0, 1);
+        }
+
+        void writeRow(size_t row_num) override { nested_serializer->writeRow(row_num); }
+        void readRow(size_t row_num) override { nested_serializer->readRow(row_num); }
+        void insertDefaults(size_t row_num) override { nested_serializer->insertDefaults(row_num); }
+
+    private:
+        const std::unique_ptr<ProtobufSerializer> nested_serializer;
+    };
+
+
+    /// Serializes a ColumnLowCardinality.
+    class ProtobufSerializerLowCardinality : public ProtobufSerializer
+    {
+    public:
+        explicit ProtobufSerializerLowCardinality(std::unique_ptr<ProtobufSerializer> nested_serializer_)
+            : nested_serializer(std::move(nested_serializer_))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            column = columns[0];
+            const auto & column_lc = assert_cast<const ColumnLowCardinality &>(*column);
+            ColumnPtr nested_column = column_lc.getDictionary().getNestedColumn();
+            nested_serializer->setColumns(&nested_column, 1);
+            read_value_column_set = false;
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            ColumnPtr column0 = columns[0]->getPtr();
+            setColumns(&column0, 1);
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_lc = assert_cast<const ColumnLowCardinality &>(*column);
+            size_t unique_row_number = column_lc.getIndexes().getUInt(row_num);
+            nested_serializer->writeRow(unique_row_number);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            auto & column_lc = assert_cast<ColumnLowCardinality &>(column->assumeMutableRef());
+
+            if (!read_value_column_set)
+            {
+                if (!read_value_column)
+                {
+                    ColumnPtr nested_column = column_lc.getDictionary().getNestedColumn();
+                    read_value_column = nested_column->cloneEmpty();
+                }
+                nested_serializer->setColumns(&read_value_column, 1);
+                read_value_column_set = true;
+            }
+
+            read_value_column->popBack(read_value_column->size());
+            nested_serializer->readRow(0);
+
+            if (row_num < column_lc.size())
+            {
+                if (row_num != column_lc.size() - 1)
+                    throw Exception("Cannot replace an element in the middle of ColumnLowCardinality", ErrorCodes::LOGICAL_ERROR);
+                column_lc.popBack(1);
+            }
+
+            column_lc.insertFromFullColumn(*read_value_column, 0);
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_lc = assert_cast<ColumnLowCardinality &>(column->assumeMutableRef());
+            if (row_num < column_lc.size())
+                return;
+
+            if (!default_value_column)
+            {
+                ColumnPtr nested_column = column_lc.getDictionary().getNestedColumn();
+                default_value_column = nested_column->cloneEmpty();
+                nested_serializer->setColumns(&default_value_column, 1);
+                nested_serializer->insertDefaults(0);
+                read_value_column_set = false;
+            }
+
+            column_lc.insertFromFullColumn(*default_value_column, 0);
+        }
+
+    private:
+        const std::unique_ptr<ProtobufSerializer> nested_serializer;
+        ColumnPtr column;
+        MutableColumnPtr read_value_column;
+        bool read_value_column_set = false;
+        MutableColumnPtr default_value_column;
+    };
+
+
+    /// Serializes a ColumnArray to a repeated field.
+    class ProtobufSerializerArray : public ProtobufSerializer
+    {
+    public:
+        explicit ProtobufSerializerArray(std::unique_ptr<ProtobufSerializer> element_serializer_)
+            : element_serializer(std::move(element_serializer_))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            column = columns[0];
+            const auto & column_array = assert_cast<const ColumnArray &>(*column);
+            ColumnPtr data_column = column_array.getDataPtr();
+            element_serializer->setColumns(&data_column, 1);
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            ColumnPtr column0 = columns[0]->getPtr();
+            setColumns(&column0, 1);
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & column_array = assert_cast<const ColumnArray &>(*column);
+            const auto & offsets = column_array.getOffsets();
+            for (size_t i : ext::range(offsets[row_num - 1], offsets[row_num]))
+                element_serializer->writeRow(i);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            auto & column_array = assert_cast<ColumnArray &>(column->assumeMutableRef());
+            auto & offsets = column_array.getOffsets();
+            size_t old_size = offsets.size();
+            if (row_num + 1 < old_size)
+                throw Exception("Cannot replace an element in the middle of ColumnArray", ErrorCodes::LOGICAL_ERROR);
+            auto data_column = column_array.getDataPtr();
+            size_t old_data_size = data_column->size();
+
+            try
+            {
+                element_serializer->readRow(old_data_size);
+                size_t data_size = data_column->size();
+                if (data_size != old_data_size + 1)
+                    throw Exception("Size of ColumnArray is unexpected", ErrorCodes::LOGICAL_ERROR);
+
+                if (row_num < old_size)
+                    offsets.back() = data_size;
+                else
+                    offsets.push_back(data_size);
+            }
+            catch (...)
+            {
+                if (data_column->size() > old_data_size)
+                    data_column->assumeMutableRef().popBack(data_column->size() - old_data_size);
+                if (offsets.size() > old_size)
+                    column_array.getOffsetsColumn().popBack(offsets.size() - old_size);
+                throw;
+            }
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_array = assert_cast<ColumnArray &>(column->assumeMutableRef());
+            if (row_num < column_array.size())
+                return;
+            column_array.insertDefault();
+        }
+
+    private:
+        const std::unique_ptr<ProtobufSerializer> element_serializer;
+        ColumnPtr column;
+    };
+
+
+    /// Serializes a ColumnTuple as a repeated field (just like we serialize arrays).
+    class ProtobufSerializerTupleAsArray : public ProtobufSerializer
+    {
+    public:
+        ProtobufSerializerTupleAsArray(
+            const std::shared_ptr<const DataTypeTuple> & tuple_data_type_,
+            const FieldDescriptor & field_descriptor_,
+            std::vector<std::unique_ptr<ProtobufSerializer>> element_serializers_)
+            : tuple_data_type(tuple_data_type_)
+            , tuple_size(tuple_data_type->getElements().size())
+            , field_descriptor(field_descriptor_)
+            , element_serializers(std::move(element_serializers_))
+        {
+            assert(tuple_size);
+            assert(tuple_size == element_serializers.size());
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            column = columns[0];
+            const auto & column_tuple = assert_cast<const ColumnTuple &>(*column);
+            for (size_t i : ext::range(tuple_size))
+            {
+                auto element_column = column_tuple.getColumnPtr(i);
+                element_serializers[i]->setColumns(&element_column, 1);
+            }
+            current_element_index = 0;
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            ColumnPtr column0 = columns[0]->getPtr();
+            setColumns(&column0, 1);
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            for (size_t i : ext::range(tuple_size))
+                element_serializers[i]->writeRow(row_num);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            auto & column_tuple = assert_cast<ColumnTuple &>(column->assumeMutableRef());
+
+            size_t old_size = column_tuple.size();
+            if (row_num >= old_size)
+                current_element_index = 0;
+
+            insertDefaults(row_num);
+
+            if (current_element_index >= tuple_size)
+            {
+                throw Exception(
+                    "Too many (" + std::to_string(current_element_index) + ") elements was read from the field "
+                        + field_descriptor.full_name() + " to fit in the data type " + tuple_data_type->getName(),
+                    ErrorCodes::PROTOBUF_BAD_CAST);
+            }
+
+            element_serializers[current_element_index]->readRow(row_num);
+            ++current_element_index;
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            auto & column_tuple = assert_cast<ColumnTuple &>(column->assumeMutableRef());
+            size_t old_size = column_tuple.size();
+
+            if (row_num > old_size)
+                return;
+
+            try
+            {
+                for (size_t i : ext::range(tuple_size))
+                    element_serializers[i]->insertDefaults(row_num);
+            }
+            catch (...)
+            {
+                for (size_t i : ext::range(tuple_size))
+                {
+                    auto element_column = column_tuple.getColumnPtr(i)->assumeMutable();
+                    if (element_column->size() > old_size)
+                        element_column->popBack(element_column->size() - old_size);
+                }
+                throw;
+            }
+        }
+
+    private:
+        const std::shared_ptr<const DataTypeTuple> tuple_data_type;
+        const size_t tuple_size;
+        const FieldDescriptor & field_descriptor;
+        const std::vector<std::unique_ptr<ProtobufSerializer>> element_serializers;
+        ColumnPtr column;
+        size_t current_element_index = 0;
+    };
+
+
+    /// Serializes a message (root or nested) in the protobuf schema.
+    class ProtobufSerializerMessage : public ProtobufSerializer
+    {
+    public:
+        struct FieldDesc
+        {
+            size_t column_index;
+            size_t num_columns;
+            const FieldDescriptor * field_descriptor;
+            std::unique_ptr<ProtobufSerializer> field_serializer;
+        };
+
+        ProtobufSerializerMessage(
+            std::vector<FieldDesc> field_descs_,
+            const FieldDescriptor * parent_field_descriptor_,
+            bool with_length_delimiter_,
+            const ProtobufReaderOrWriter & reader_or_writer_)
+            : parent_field_descriptor(parent_field_descriptor_)
+            , with_length_delimiter(with_length_delimiter_)
+            , should_skip_if_empty(parent_field_descriptor ? shouldSkipZeroOrEmpty(*parent_field_descriptor) : false)
+            , reader(reader_or_writer_.reader)
+            , writer(reader_or_writer_.writer)
+        {
+            field_infos.reserve(field_descs_.size());
+            for (auto & desc : field_descs_)
+                field_infos.emplace_back(desc.column_index, desc.num_columns, *desc.field_descriptor, std::move(desc.field_serializer));
+
+            std::sort(field_infos.begin(), field_infos.end(),
+                      [](const FieldInfo & lhs, const FieldInfo & rhs) { return lhs.field_tag < rhs.field_tag; });
+
+            for (size_t i : ext::range(field_infos.size()))
+                field_index_by_field_tag.emplace(field_infos[i].field_tag, i);
+        }
+
+        void setColumns(const ColumnPtr * columns_, size_t num_columns_) override
+        {
+            columns.assign(columns_, columns_ + num_columns_);
+
+            for (const FieldInfo & info : field_infos)
+                info.field_serializer->setColumns(columns.data() + info.column_index, info.num_columns);
+
+            if (reader)
+            {
+                missing_column_indices.clear();
+                missing_column_indices.reserve(num_columns_);
+                size_t current_idx = 0;
+                for (const FieldInfo & info : field_infos)
+                {
+                    while (current_idx < info.column_index)
+                        missing_column_indices.push_back(current_idx++);
+                    current_idx = info.column_index + info.num_columns;
+                }
+                while (current_idx < num_columns_)
+                    missing_column_indices.push_back(current_idx++);
+            }
+        }
+
+        void setColumns(const MutableColumnPtr * columns_, size_t num_columns_) override
+        {
+            Columns cols;
+            cols.reserve(num_columns_);
+            for (size_t i : ext::range(num_columns_))
+                cols.push_back(columns_[i]->getPtr());
+            setColumns(cols.data(), cols.size());
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            if (parent_field_descriptor)
+                writer->startNestedMessage();
+            else
+                writer->startMessage();
+
+            for (const FieldInfo & info : field_infos)
+            {
+                if (info.should_pack_repeated)
+                    writer->startRepeatedPack();
+                info.field_serializer->writeRow(row_num);
+                if (info.should_pack_repeated)
+                    writer->endRepeatedPack(info.field_tag, true);
+            }
+
+            if (parent_field_descriptor)
+            {
+                bool is_group = (parent_field_descriptor->type() == FieldTypeId::TYPE_GROUP);
+                writer->endNestedMessage(parent_field_descriptor->number(), is_group, should_skip_if_empty);
+            }
+            else
+                writer->endMessage(with_length_delimiter);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            if (parent_field_descriptor)
+                reader->startNestedMessage();
+            else
+                reader->startMessage(with_length_delimiter);
+
+            if (!field_infos.empty())
+            {
+                last_field_index = 0;
+                last_field_tag = field_infos[0].field_tag;
+                size_t old_size = columns.empty() ? 0 : columns[0]->size();
+
+                try
+                {
+                    int field_tag;
+                    while (reader->readFieldNumber(field_tag))
+                    {
+                        size_t field_index = findFieldIndexByFieldTag(field_tag);
+                        if (field_index == static_cast<size_t>(-1))
+                            continue;
+                        auto * field_serializer = field_infos[field_index].field_serializer.get();
+                        field_serializer->readRow(row_num);
+                        field_infos[field_index].field_read = true;
+                    }
+
+                    for (auto & info : field_infos)
+                    {
+                        if (info.field_read)
+                            info.field_read = false;
+                        else
+                            info.field_serializer->insertDefaults(row_num);
+                    }
+                }
+                catch (...)
+                {
+                    for (auto & column : columns)
+                    {
+                        if (column->size() > old_size)
+                            column->assumeMutableRef().popBack(column->size() - old_size);
+                    }
+                    throw;
+                }
+            }
+
+            if (parent_field_descriptor)
+                reader->endNestedMessage();
+            else
+                reader->endMessage(false);
+            addDefaultsToMissingColumns(row_num);
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            for (const FieldInfo & info : field_infos)
+                info.field_serializer->insertDefaults(row_num);
+            addDefaultsToMissingColumns(row_num);
+        }
+
+    private:
+        size_t findFieldIndexByFieldTag(int field_tag)
+        {
+            while (true)
+            {
+                if (field_tag == last_field_tag)
+                    return last_field_index;
+                if (field_tag < last_field_tag)
+                    break;
+                if (++last_field_index >= field_infos.size())
+                    break;
+                last_field_tag = field_infos[last_field_index].field_tag;
+            }
+            last_field_tag = field_tag;
+            auto it = field_index_by_field_tag.find(field_tag);
+            if (it == field_index_by_field_tag.end())
+                last_field_index = static_cast<size_t>(-1);
+            else
+                last_field_index = it->second;
+            return last_field_index;
+        }
+
+        void addDefaultsToMissingColumns(size_t row_num)
+        {
+            for (size_t column_idx : missing_column_indices)
+            {
+                auto & column = columns[column_idx];
+                size_t old_size = column->size();
+                if (row_num >= old_size)
+                    column->assumeMutableRef().insertDefault();
+            }
+        }
+
+        struct FieldInfo
+        {
+            FieldInfo(
+                size_t column_index_,
+                size_t num_columns_,
+                const FieldDescriptor & field_descriptor_,
+                std::unique_ptr<ProtobufSerializer> field_serializer_)
+                : column_index(column_index_)
+                , num_columns(num_columns_)
+                , field_descriptor(&field_descriptor_)
+                , field_tag(field_descriptor_.number())
+                , should_pack_repeated(shouldPackRepeated(field_descriptor_))
+                , field_serializer(std::move(field_serializer_))
+            {
+            }
+            size_t column_index;
+            size_t num_columns;
+            const FieldDescriptor * field_descriptor;
+            int field_tag;
+            bool should_pack_repeated;
+            std::unique_ptr<ProtobufSerializer> field_serializer;
+            bool field_read = false;
+        };
+
+        const FieldDescriptor * const parent_field_descriptor;
+        const bool with_length_delimiter;
+        const bool should_skip_if_empty;
+        ProtobufReader * const reader;
+        ProtobufWriter * const writer;
+        std::vector<FieldInfo> field_infos;
+        std::unordered_map<int, size_t> field_index_by_field_tag;
+        Columns columns;
+        std::vector<size_t> missing_column_indices;
+        int last_field_tag = 0;
+        size_t last_field_index = static_cast<size_t>(-1);
+    };
+
+
+    /// Serializes a tuple with explicit names as a nested message.
+    class ProtobufSerializerTupleAsNestedMessage : public ProtobufSerializer
+    {
+    public:
+        explicit ProtobufSerializerTupleAsNestedMessage(std::unique_ptr<ProtobufSerializerMessage> nested_message_serializer_)
+            : nested_message_serializer(std::move(nested_message_serializer_))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            const auto & column_tuple = assert_cast<const ColumnTuple &>(*columns[0]);
+            size_t tuple_size = column_tuple.tupleSize();
+            assert(tuple_size);
+            Columns element_columns;
+            element_columns.reserve(tuple_size);
+            for (size_t i : ext::range(tuple_size))
+                element_columns.emplace_back(column_tuple.getColumnPtr(i));
+            nested_message_serializer->setColumns(element_columns.data(), element_columns.size());
+        }
+
+        void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override
+        {
+            assert(num_columns == 1);
+            ColumnPtr column0 = columns[0]->getPtr();
+            setColumns(&column0, 1);
+        }
+
+        void writeRow(size_t row_num) override { nested_message_serializer->writeRow(row_num); }
+        void readRow(size_t row_num) override { nested_message_serializer->readRow(row_num); }
+        void insertDefaults(size_t row_num) override { nested_message_serializer->insertDefaults(row_num); }
+
+    private:
+        const std::unique_ptr<ProtobufSerializerMessage> nested_message_serializer;
+    };
+
+
+    /// Serializes a flattened Nested data type (an array of tuples with explicit names)
+    /// as a repeated nested message.
+    class ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages : public ProtobufSerializer
+    {
+    public:
+        explicit ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages(
+            std::unique_ptr<ProtobufSerializerMessage> nested_message_serializer_)
+            : nested_message_serializer(std::move(nested_message_serializer_))
+        {
+        }
+
+        void setColumns(const ColumnPtr * columns, size_t num_columns) override
+        {
+            assert(num_columns);
+            data_columns.clear();
+            data_columns.reserve(num_columns);
+            offset_columns.clear();
+            offset_columns.reserve(num_columns);
+
+            for (size_t i : ext::range(num_columns))
+            {
+                const auto & column_array = assert_cast<const ColumnArray &>(*columns[i]);
+                data_columns.emplace_back(column_array.getDataPtr());
+                offset_columns.emplace_back(column_array.getOffsetsPtr());
+            }
+
+            std::sort(offset_columns.begin(), offset_columns.end());
+            offset_columns.erase(std::unique(offset_columns.begin(), offset_columns.end()), offset_columns.end());
+
+            nested_message_serializer->setColumns(data_columns.data(), data_columns.size());
+        }
+
+        void setColumns(const MutableColumnPtr * columns, size_t num_columns) override
+        {
+            Columns cols;
+            cols.reserve(num_columns);
+            for (size_t i : ext::range(num_columns))
+                cols.push_back(columns[i]->getPtr());
+            setColumns(cols.data(), cols.size());
+        }
+
+        void writeRow(size_t row_num) override
+        {
+            const auto & offset_column0 = assert_cast<const ColumnArray::ColumnOffsets &>(*offset_columns[0]);
+            size_t start_offset = offset_column0.getElement(row_num - 1);
+            size_t end_offset = offset_column0.getElement(row_num);
+            for (size_t i : ext::range(1, offset_columns.size()))
+            {
+                const auto & offset_column = assert_cast<const ColumnArray::ColumnOffsets &>(*offset_columns[i]);
+                if (offset_column.getElement(row_num) != end_offset)
+                    throw Exception("Components of FlattenedNested have different sizes", ErrorCodes::PROTOBUF_BAD_CAST);
+            }
+            for (size_t i : ext::range(start_offset, end_offset))
+                nested_message_serializer->writeRow(i);
+        }
+
+        void readRow(size_t row_num) override
+        {
+            size_t old_size = offset_columns[0]->size();
+            if (row_num + 1 < old_size)
+                throw Exception("Cannot replace an element in the middle of ColumnArray", ErrorCodes::LOGICAL_ERROR);
+
+            size_t old_data_size = data_columns[0]->size();
+
+            try
+            {
+                nested_message_serializer->readRow(old_data_size);
+                size_t data_size = data_columns[0]->size();
+                if (data_size != old_data_size + 1)
+                    throw Exception("Unexpected number of elements of ColumnArray has been read", ErrorCodes::LOGICAL_ERROR);
+
+                if (row_num < old_size)
+                {
+                    for (auto & offset_column : offset_columns)
+                        assert_cast<ColumnArray::ColumnOffsets &>(offset_column->assumeMutableRef()).getData().back() = data_size;
+                }
+                else
+                {
+                    for (auto & offset_column : offset_columns)
+                        assert_cast<ColumnArray::ColumnOffsets &>(offset_column->assumeMutableRef()).getData().push_back(data_size);
+                }
+            }
+            catch (...)
+            {
+                for (auto & data_column : data_columns)
+                {
+                    if (data_column->size() > old_data_size)
+                        data_column->assumeMutableRef().popBack(data_column->size() - old_data_size);
+                }
+                for (auto & offset_column : offset_columns)
+                {
+                    if (offset_column->size() > old_size)
+                        offset_column->assumeMutableRef().popBack(offset_column->size() - old_size);
+                }
+                throw;
+            }
+        }
+
+        void insertDefaults(size_t row_num) override
+        {
+            size_t old_size = offset_columns[0]->size();
+            if (row_num < old_size)
+                return;
+
+            try
+            {
+                size_t data_size = data_columns[0]->size();
+                for (auto & offset_column : offset_columns)
+                    assert_cast<ColumnArray::ColumnOffsets &>(offset_column->assumeMutableRef()).getData().push_back(data_size);
+            }
+            catch (...)
+            {
+                for (auto & offset_column : offset_columns)
+                {
+                    if (offset_column->size() > old_size)
+                        offset_column->assumeMutableRef().popBack(offset_column->size() - old_size);
+                }
+                throw;
+            }
+        }
+
+    private:
+        const std::unique_ptr<ProtobufSerializerMessage> nested_message_serializer;
+        Columns data_columns;
+        Columns offset_columns;
+    };
+
+
+    /// Produces a tree of ProtobufSerializers which serializes a row as a protobuf message.
+    class ProtobufSerializerBuilder
+    {
+    public:
+        explicit ProtobufSerializerBuilder(const ProtobufReaderOrWriter & reader_or_writer_) : reader_or_writer(reader_or_writer_) {}
+
+        std::unique_ptr<ProtobufSerializerMessage> buildMessageSerializer(
+            const Strings & column_names,
+            const DataTypes & data_types,
+            std::vector<size_t> & missing_column_indices,
+            const MessageDescriptor & message_descriptor,
+            bool with_length_delimiter)
+        {
+            std::vector<size_t> used_column_indices;
+            auto serializer = buildMessageSerializerImpl(
+                /* num_columns = */ column_names.size(),
+                column_names.data(),
+                data_types.data(),
+                used_column_indices,
+                message_descriptor,
+                with_length_delimiter,
+                /* parent_field_descriptor = */ nullptr);
+
+            if (!serializer)
+            {
+                throw Exception(
+                    "Not found matches between the names of the columns {" + boost::algorithm::join(column_names, ", ")
+                        + "} and the fields {" + boost::algorithm::join(getFieldNames(message_descriptor), ", ") + "} of the message "
+                        + quoteString(message_descriptor.full_name()) + " in the protobuf schema",
+                    ErrorCodes::NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS);
+            }
+
+            missing_column_indices.clear();
+            missing_column_indices.reserve(column_names.size() - used_column_indices.size());
+            boost::range::set_difference(ext::range(column_names.size()), used_column_indices,
+                                         std::back_inserter(missing_column_indices));
+
+            return serializer;
+        }
+
+    private:
+        /// Collects all field names from the message (used only to format error messages).
+        static Strings getFieldNames(const MessageDescriptor & message_descriptor)
+        {
+            Strings field_names;
+            field_names.reserve(message_descriptor.field_count());
+            for (int i : ext::range(message_descriptor.field_count()))
+                field_names.emplace_back(message_descriptor.field(i)->name());
+            return field_names;
+        }
+
+        static bool columnNameEqualsToFieldName(const std::string_view & column_name, const FieldDescriptor & field_descriptor)
+        {
+            std::string_view suffix;
+            return columnNameStartsWithFieldName(column_name, field_descriptor, suffix) && suffix.empty();
+        }
+
+        /// Checks if a passed column's name starts with a specified field's name.
+        /// The function also assigns `suffix` to the rest part of the column's name
+        /// which doesn't match to the field's name.
+        /// The function requires that rest part of the column's name to be started with a dot '.' or underline '_',
+        /// but doesn't include those '.' or '_' characters into `suffix`.
+        static bool columnNameStartsWithFieldName(const std::string_view & column_name, const FieldDescriptor & field_descriptor, std::string_view & suffix)
+        {
+            size_t matching_length = 0;
+            const MessageDescriptor & containing_type = *field_descriptor.containing_type();
+            if (containing_type.options().map_entry())
+            {
+                /// Special case. Elements of the data type Map are named as "keys" and "values",
+                /// but they're internally named as "key" and "value" in protobuf schema.
+                if (field_descriptor.number() == 1)
+                {
+                    if (ColumnNameWithProtobufFieldNameComparator::startsWith(column_name, "keys"))
+                        matching_length = strlen("keys");
+                    else if (ColumnNameWithProtobufFieldNameComparator::startsWith(column_name, "key"))
+                        matching_length = strlen("key");
+                }
+                else if (field_descriptor.number() == 2)
+                {
+                    if (ColumnNameWithProtobufFieldNameComparator::startsWith(column_name, "values"))
+                        matching_length = strlen("values");
+                    else if (ColumnNameWithProtobufFieldNameComparator::startsWith(column_name, "value"))
+                        matching_length = strlen("value");
+                }
+            }
+            if (!matching_length && ColumnNameWithProtobufFieldNameComparator::startsWith(column_name, field_descriptor.name()))
+            {
+                matching_length = field_descriptor.name().length();
+            }
+            if (column_name.length() == matching_length)
+                return true;
+            if ((column_name.length() < matching_length + 2) || !field_descriptor.message_type())
+                return false;
+            char first_char_after_matching = column_name[matching_length];
+            if (!ColumnNameWithProtobufFieldNameComparator::equals(first_char_after_matching, '.'))
+                return false;
+            suffix = column_name.substr(matching_length + 1);
+            return true;
+        }
+
+        /// Finds fields in the protobuf message which can be considered as matching
+        /// for a specified column's name. The found fields can be nested messages,
+        /// for that case suffixes are also returned.
+        /// This is only the first filter, buildMessageSerializerImpl() does other checks after calling this function.
+        static bool findFieldsByColumnName(
+            const std::string_view & column_name,
+            const MessageDescriptor & message_descriptor,
+            std::vector<std::pair<const FieldDescriptor *, std::string_view /* suffix */>> & out_field_descriptors_with_suffixes)
+        {
+            out_field_descriptors_with_suffixes.clear();
+
+            /// Find all fields which have the same name as column's name (case-insensitively); i.e. we're checking
+            /// field_name == column_name.
+            for (int i : ext::range(message_descriptor.field_count()))
+            {
+                const auto & field_descriptor = *message_descriptor.field(i);
+                if (columnNameEqualsToFieldName(column_name, field_descriptor))
+                {
+                    out_field_descriptors_with_suffixes.emplace_back(&field_descriptor, std::string_view{});
+                    break;
+                }
+            }
+
+            if (!out_field_descriptors_with_suffixes.empty())
+                return true; /// We have an exact match, no need to compare prefixes.
+
+            /// Find all fields which name is used as prefix in column's name; i.e. we're checking
+            /// column_name == field_name + '.' + nested_message_field_name
+            for (int i : ext::range(message_descriptor.field_count()))
+            {
+                const auto & field_descriptor = *message_descriptor.field(i);
+                std::string_view suffix;
+                if (columnNameStartsWithFieldName(column_name, field_descriptor, suffix))
+                {
+                    out_field_descriptors_with_suffixes.emplace_back(&field_descriptor, suffix);
+                }
+            }
+
+            /// Shorter suffixes first.
+            std::sort(out_field_descriptors_with_suffixes.begin(), out_field_descriptors_with_suffixes.end(),
+                      [](const std::pair<const FieldDescriptor *, std::string_view /* suffix */> & f1,
+                         const std::pair<const FieldDescriptor *, std::string_view /* suffix */> & f2)
+            {
+                return f1.second.length() < f2.second.length();
+            });
+
+            return !out_field_descriptors_with_suffixes.empty();
+        }
+
+        /// Builds a serializer for a protobuf message (root or nested).
+        template <typename StringOrStringViewT>
+        std::unique_ptr<ProtobufSerializerMessage> buildMessageSerializerImpl(
+            size_t num_columns,
+            const StringOrStringViewT * column_names,
+            const DataTypePtr * data_types,
+            std::vector<size_t> & used_column_indices,
+            const MessageDescriptor & message_descriptor,
+            bool with_length_delimiter,
+            const FieldDescriptor * parent_field_descriptor)
+        {
+            std::vector<ProtobufSerializerMessage::FieldDesc> field_descs;
+            boost::container::flat_map<const FieldDescriptor *, std::string_view> field_descriptors_in_use;
+
+            used_column_indices.clear();
+            used_column_indices.reserve(num_columns);
+
+            auto add_field_serializer = [&](size_t column_index_,
+                                            const std::string_view & column_name_,
+                                            size_t num_columns_,
+                                            const FieldDescriptor & field_descriptor_,
+                                            std::unique_ptr<ProtobufSerializer> field_serializer_)
+            {
+                auto it = field_descriptors_in_use.find(&field_descriptor_);
+                if (it != field_descriptors_in_use.end())
+                {
+                    throw Exception(
+                        "Multiple columns (" + backQuote(StringRef{field_descriptors_in_use[&field_descriptor_]}) + ", "
+                            + backQuote(StringRef{column_name_}) + ") cannot be serialized to a single protobuf field "
+                            + quoteString(field_descriptor_.full_name()),
+                        ErrorCodes::MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD);
+                }
+
+                field_descs.push_back({column_index_, num_columns_, &field_descriptor_, std::move(field_serializer_)});
+                field_descriptors_in_use.emplace(&field_descriptor_, column_name_);
+            };
+
+            std::vector<std::pair<const FieldDescriptor *, std::string_view>> field_descriptors_with_suffixes;
+
+            /// We're going through all the passed columns.
+            size_t column_idx = 0;
+            size_t next_column_idx = 1;
+            for (; column_idx != num_columns; column_idx = next_column_idx++)
+            {
+                auto column_name = column_names[column_idx];
+                const auto & data_type = data_types[column_idx];
+
+                if (!findFieldsByColumnName(column_name, message_descriptor, field_descriptors_with_suffixes))
+                    continue;
+
+                if ((field_descriptors_with_suffixes.size() == 1) && field_descriptors_with_suffixes[0].second.empty())
+                {
+                    /// Simple case: one column is serialized as one field.
+                    const auto & field_descriptor = *field_descriptors_with_suffixes[0].first;
+                    auto field_serializer = buildFieldSerializer(column_name, data_type, field_descriptor, field_descriptor.is_repeated());
+
+                    if (field_serializer)
+                    {
+                        add_field_serializer(column_idx, column_name, 1, field_descriptor, std::move(field_serializer));
+                        used_column_indices.push_back(column_idx);
+                        continue;
+                    }
+                }
+
+                for (const auto & [field_descriptor, suffix] : field_descriptors_with_suffixes)
+                {
+                    if (!suffix.empty())
+                    {
+                        /// Complex case: one or more columns are serialized as a nested message.
+                        std::vector<std::string_view> names_relative_to_nested_message;
+                        names_relative_to_nested_message.reserve(num_columns - column_idx);
+                        names_relative_to_nested_message.emplace_back(suffix);
+
+                        for (size_t j : ext::range(column_idx + 1, num_columns))
+                        {
+                            std::string_view next_suffix;
+                            if (!columnNameStartsWithFieldName(column_names[j], *field_descriptor, next_suffix))
+                                break;
+                            names_relative_to_nested_message.emplace_back(next_suffix);
+                        }
+
+                        /// Now we have up to `names_relative_to_nested_message.size()` sequential columns
+                        /// which can be serialized as a nested message.
+
+                        /// Calculate how many of those sequential columns are arrays.
+                        size_t num_arrays = 0;
+                        for (size_t j : ext::range(column_idx, column_idx + names_relative_to_nested_message.size()))
+                        {
+                            if (data_types[j]->getTypeId() != TypeIndex::Array)
+                                break;
+                            ++num_arrays;
+                        }
+
+                        /// We will try to serialize the sequential columns as one nested message,
+                        /// then, if failed, as an array of nested messages (on condition those columns are array).
+                        bool has_fallback_to_array_of_nested_messages = num_arrays && field_descriptor->is_repeated();
+
+                        /// Try to serialize the sequential columns as one nested message.
+                        try
+                        {
+                            std::vector<size_t> used_column_indices_in_nested;
+                            auto nested_message_serializer = buildMessageSerializerImpl(
+                                names_relative_to_nested_message.size(),
+                                names_relative_to_nested_message.data(),
+                                &data_types[column_idx],
+                                used_column_indices_in_nested,
+                                *field_descriptor->message_type(),
+                                false,
+                                field_descriptor);
+
+                            if (nested_message_serializer)
+                            {
+                                for (size_t & idx_in_nested : used_column_indices_in_nested)
+                                    used_column_indices.push_back(idx_in_nested + column_idx);
+
+                                next_column_idx = used_column_indices.back() + 1;
+                                add_field_serializer(column_idx, column_name, next_column_idx - column_idx, *field_descriptor, std::move(nested_message_serializer));
+                                break;
+                            }
+                        }
+                        catch (Exception & e)
+                        {
+                            if ((e.code() != ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED) || !has_fallback_to_array_of_nested_messages)
+                                throw;
+                        }
+
+                        if (has_fallback_to_array_of_nested_messages)
+                        {
+                            /// Try to serialize the sequential columns as an array of nested messages.
+                            DataTypes array_nested_data_types;
+                            array_nested_data_types.reserve(num_arrays);
+                            for (size_t j : ext::range(column_idx, column_idx + num_arrays))
+                                array_nested_data_types.emplace_back(assert_cast<const DataTypeArray &>(*data_types[j]).getNestedType());
+
+                            std::vector<size_t> used_column_indices_in_nested;
+                            auto nested_message_serializer = buildMessageSerializerImpl(
+                                array_nested_data_types.size(),
+                                names_relative_to_nested_message.data(),
+                                array_nested_data_types.data(),
+                                used_column_indices_in_nested,
+                                *field_descriptor->message_type(),
+                                false,
+                                field_descriptor);
+
+                            if (nested_message_serializer)
+                            {
+                                auto field_serializer = std::make_unique<ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages>(std::move(nested_message_serializer));
+
+                                for (size_t & idx_in_nested : used_column_indices_in_nested)
+                                    used_column_indices.push_back(idx_in_nested + column_idx);
+
+                                next_column_idx = used_column_indices.back() + 1;
+                                add_field_serializer(column_idx, column_name, next_column_idx - column_idx, *field_descriptor, std::move(field_serializer));
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+
+            /// Check that we've found matching columns for all the required fields.
+            if ((message_descriptor.file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO2)
+                    && reader_or_writer.writer)
+            {
+                for (int i : ext::range(message_descriptor.field_count()))
+                {
+                    const auto & field_descriptor = *message_descriptor.field(i);
+                    if (field_descriptor.is_required() && !field_descriptors_in_use.count(&field_descriptor))
+                        throw Exception(
+                            "Field " + quoteString(field_descriptor.full_name()) + " is required to be set",
+                            ErrorCodes::NO_COLUMN_SERIALIZED_TO_REQUIRED_PROTOBUF_FIELD);
+                }
+            }
+
+            if (field_descs.empty())
+                return nullptr;
+
+            return std::make_unique<ProtobufSerializerMessage>(
+                std::move(field_descs), parent_field_descriptor, with_length_delimiter, reader_or_writer);
+        }
+
+        /// Builds a serializer for one-to-one match:
+        /// one column is serialized as one field in the protobuf message.
+        std::unique_ptr<ProtobufSerializer> buildFieldSerializer(
+            const std::string_view & column_name,
+            const DataTypePtr & data_type,
+            const FieldDescriptor & field_descriptor,
+            bool allow_repeat)
+        {
+            auto data_type_id = data_type->getTypeId();
+            switch (data_type_id)
+            {
+                case TypeIndex::UInt8: return std::make_unique<ProtobufSerializerNumber<UInt8>>(field_descriptor, reader_or_writer);
+                case TypeIndex::UInt16: return std::make_unique<ProtobufSerializerNumber<UInt16>>(field_descriptor, reader_or_writer);
+                case TypeIndex::UInt32: return std::make_unique<ProtobufSerializerNumber<UInt32>>(field_descriptor, reader_or_writer);
+                case TypeIndex::UInt64: return std::make_unique<ProtobufSerializerNumber<UInt64>>(field_descriptor, reader_or_writer);
+                case TypeIndex::UInt128: return std::make_unique<ProtobufSerializerNumber<UInt128>>(field_descriptor, reader_or_writer);
+                case TypeIndex::UInt256: return std::make_unique<ProtobufSerializerNumber<UInt256>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Int8: return std::make_unique<ProtobufSerializerNumber<Int8>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Int16: return std::make_unique<ProtobufSerializerNumber<Int16>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Int32: return std::make_unique<ProtobufSerializerNumber<Int32>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Int64: return std::make_unique<ProtobufSerializerNumber<Int64>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Int128: return std::make_unique<ProtobufSerializerNumber<Int128>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Int256: return std::make_unique<ProtobufSerializerNumber<Int256>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Float32: return std::make_unique<ProtobufSerializerNumber<Float32>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Float64: return std::make_unique<ProtobufSerializerNumber<Float64>>(field_descriptor, reader_or_writer);
+                case TypeIndex::Date: return std::make_unique<ProtobufSerializerDate>(field_descriptor, reader_or_writer);
+                case TypeIndex::DateTime: return std::make_unique<ProtobufSerializerDateTime>(field_descriptor, reader_or_writer);
+                case TypeIndex::DateTime64: return std::make_unique<ProtobufSerializerDateTime64>(assert_cast<const DataTypeDateTime64 &>(*data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::String: return std::make_unique<ProtobufSerializerString<false>>(field_descriptor, reader_or_writer);
+                case TypeIndex::FixedString: return std::make_unique<ProtobufSerializerString<true>>(assert_cast<const DataTypeFixedString &>(*data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::Enum8: return std::make_unique<ProtobufSerializerEnum<Int8>>(typeid_cast<std::shared_ptr<const DataTypeEnum8>>(data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::Enum16: return std::make_unique<ProtobufSerializerEnum<Int16>>(typeid_cast<std::shared_ptr<const DataTypeEnum16>>(data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::Decimal32: return std::make_unique<ProtobufSerializerDecimal<Decimal32>>(assert_cast<const DataTypeDecimal<Decimal32> &>(*data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::Decimal64: return std::make_unique<ProtobufSerializerDecimal<Decimal64>>(assert_cast<const DataTypeDecimal<Decimal64> &>(*data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::Decimal128: return std::make_unique<ProtobufSerializerDecimal<Decimal128>>(assert_cast<const DataTypeDecimal<Decimal128> &>(*data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::Decimal256: return std::make_unique<ProtobufSerializerDecimal<Decimal256>>(assert_cast<const DataTypeDecimal<Decimal256> &>(*data_type), field_descriptor, reader_or_writer);
+                case TypeIndex::UUID: return std::make_unique<ProtobufSerializerUUID>(field_descriptor, reader_or_writer);
+                case TypeIndex::Interval: return std::make_unique<ProtobufSerializerInterval>(field_descriptor, reader_or_writer);
+                case TypeIndex::AggregateFunction: return std::make_unique<ProtobufSerializerAggregateFunction>(typeid_cast<std::shared_ptr<const DataTypeAggregateFunction>>(data_type), field_descriptor, reader_or_writer);
+
+                case TypeIndex::Nullable:
+                {
+                    const auto & nullable_data_type = assert_cast<const DataTypeNullable &>(*data_type);
+                    auto nested_serializer = buildFieldSerializer(column_name, nullable_data_type.getNestedType(), field_descriptor, allow_repeat);
+                    if (!nested_serializer)
+                        return nullptr;
+                    return std::make_unique<ProtobufSerializerNullable>(std::move(nested_serializer));
+                }
+
+                case TypeIndex::LowCardinality:
+                {
+                    const auto & low_cardinality_data_type = assert_cast<const DataTypeLowCardinality &>(*data_type);
+                    auto nested_serializer
+                        = buildFieldSerializer(column_name, low_cardinality_data_type.getDictionaryType(), field_descriptor, allow_repeat);
+                    if (!nested_serializer)
+                        return nullptr;
+                    return std::make_unique<ProtobufSerializerLowCardinality>(std::move(nested_serializer));
+                }
+
+                case TypeIndex::Map:
+                {
+                    const auto & map_data_type = assert_cast<const DataTypeMap &>(*data_type);
+                    auto nested_serializer = buildFieldSerializer(column_name, map_data_type.getNestedType(), field_descriptor, allow_repeat);
+                    if (!nested_serializer)
+                        return nullptr;
+                    return std::make_unique<ProtobufSerializerMap>(std::move(nested_serializer));
+                }
+
+                case TypeIndex::Array:
+                {
+                    /// Array is serialized as a repeated field.
+                    const auto & array_data_type = assert_cast<const DataTypeArray &>(*data_type);
+
+                    if (!allow_repeat)
+                    {
+                        throw Exception(
+                            "The field " + quoteString(field_descriptor.full_name())
+                                + " must be repeated in the protobuf schema to match the column " + backQuote(StringRef{column_name}),
+                            ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
+                    }
+
+                    auto nested_serializer = buildFieldSerializer(column_name, array_data_type.getNestedType(), field_descriptor,
+                                                                  /* allow_repeat = */ false); // We do our repeating now, so for nested type we forget about the repeating.
+                    if (!nested_serializer)
+                        return nullptr;
+                    return std::make_unique<ProtobufSerializerArray>(std::move(nested_serializer));
+                }
+
+                case TypeIndex::Tuple:
+                {
+                    /// Tuple is serialized in one of two ways:
+                    /// 1) If the tuple has explicit names then it can be serialized as a nested message.
+                    /// 2) Any tuple can be serialized as a repeated field, just like Array.
+                    const auto & tuple_data_type = assert_cast<const DataTypeTuple &>(*data_type);
+                    size_t size_of_tuple = tuple_data_type.getElements().size();
+
+                    if (tuple_data_type.haveExplicitNames() && field_descriptor.message_type())
+                    {
+                        /// Try to serialize as a nested message.
+                        std::vector<size_t> used_column_indices;
+                        auto nested_message_serializer = buildMessageSerializerImpl(
+                            size_of_tuple,
+                            tuple_data_type.getElementNames().data(),
+                            tuple_data_type.getElements().data(),
+                            used_column_indices,
+                            *field_descriptor.message_type(),
+                            false,
+                            &field_descriptor);
+
+                        if (!nested_message_serializer)
+                        {
+                            throw Exception(
+                                "Not found matches between the names of the tuple's elements {"
+                                    + boost::algorithm::join(tuple_data_type.getElementNames(), ", ") + "} and the fields {"
+                                    + boost::algorithm::join(getFieldNames(*field_descriptor.message_type()), ", ") + "} of the message "
+                                    + quoteString(field_descriptor.message_type()->full_name()) + " in the protobuf schema",
+                                ErrorCodes::NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS);
+                        }
+
+                        return std::make_unique<ProtobufSerializerTupleAsNestedMessage>(std::move(nested_message_serializer));
+                    }
+
+                    /// Serialize as a repeated field.
+                    if (!allow_repeat && (size_of_tuple > 1))
+                    {
+                        throw Exception(
+                            "The field " + quoteString(field_descriptor.full_name())
+                                + " must be repeated in the protobuf schema to match the column " + backQuote(StringRef{column_name}),
+                            ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
+                    }
+
+                    std::vector<std::unique_ptr<ProtobufSerializer>> nested_serializers;
+                    for (const auto & nested_data_type : tuple_data_type.getElements())
+                    {
+                        auto nested_serializer = buildFieldSerializer(column_name, nested_data_type, field_descriptor,
+                                                                      /* allow_repeat = */ false); // We do our repeating now, so for nested type we forget about the repeating.
+                        if (!nested_serializer)
+                            break;
+                        nested_serializers.push_back(std::move(nested_serializer));
+                    }
+
+                    if (nested_serializers.size() != size_of_tuple)
+                        return nullptr;
+
+                    return std::make_unique<ProtobufSerializerTupleAsArray>(
+                        typeid_cast<std::shared_ptr<const DataTypeTuple>>(data_type),
+                        field_descriptor,
+                        std::move(nested_serializers));
+                }
+
+                default:
+                    throw Exception("Unknown data type: " + data_type->getName(), ErrorCodes::LOGICAL_ERROR);
+            }
+        }
+
+        const ProtobufReaderOrWriter reader_or_writer;
+    };
+}
+
+
+std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
+    const Strings & column_names,
+    const DataTypes & data_types,
+    std::vector<size_t> & missing_column_indices,
+    const google::protobuf::Descriptor & message_descriptor,
+    bool with_length_delimiter,
+    ProtobufReader & reader)
+{
+    return ProtobufSerializerBuilder(reader).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter);
+}
+
+std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
+    const Strings & column_names,
+    const DataTypes & data_types,
+    const google::protobuf::Descriptor & message_descriptor,
+    bool with_length_delimiter,
+    ProtobufWriter & writer)
+{
+    std::vector<size_t> missing_column_indices;
+    return ProtobufSerializerBuilder(writer).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter);
+}
+}
+#endif
diff --git a/src/Formats/ProtobufSerializer.h b/src/Formats/ProtobufSerializer.h
new file mode 100644
index 00000000000..86a2f2f36dd
--- /dev/null
+++ b/src/Formats/ProtobufSerializer.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#if !defined(ARCADIA_BUILD)
+#    include "config_formats.h"
+#endif
+
+#if USE_PROTOBUF
+#   include <Columns/IColumn.h>
+
+
+namespace google::protobuf { class Descriptor; }
+
+namespace DB
+{
+class ProtobufReader;
+class ProtobufWriter;
+class IDataType;
+using DataTypePtr = std::shared_ptr<const IDataType>;
+using DataTypes = std::vector<DataTypePtr>;
+
+
+/// Utility class, does all the work for serialization in the Protobuf format.
+class ProtobufSerializer
+{
+public:
+    virtual ~ProtobufSerializer() = default;
+
+    virtual void setColumns(const ColumnPtr * columns, size_t num_columns) = 0;
+    virtual void writeRow(size_t row_num) = 0;
+
+    virtual void setColumns(const MutableColumnPtr * columns, size_t num_columns) = 0;
+    virtual void readRow(size_t row_num) = 0;
+    virtual void insertDefaults(size_t row_num) = 0;
+
+    static std::unique_ptr<ProtobufSerializer> create(
+        const Strings & column_names,
+        const DataTypes & data_types,
+        std::vector<size_t> & missing_column_indices,
+        const google::protobuf::Descriptor & message_descriptor,
+        bool with_length_delimiter,
+        ProtobufReader & reader);
+
+    static std::unique_ptr<ProtobufSerializer> create(
+        const Strings & column_names,
+        const DataTypes & data_types,
+        const google::protobuf::Descriptor & message_descriptor,
+        bool with_length_delimiter,
+        ProtobufWriter & writer);
+};
+
+}
+#endif
diff --git a/src/Formats/ProtobufWriter.cpp b/src/Formats/ProtobufWriter.cpp
index e62d8fc4a58..ece4f78b1c8 100644
--- a/src/Formats/ProtobufWriter.cpp
+++ b/src/Formats/ProtobufWriter.cpp
@@ -1,29 +1,11 @@
 #include "ProtobufWriter.h"
 
 #if USE_PROTOBUF
-#    include <cassert>
-#    include <optional>
-#    include <math.h>
-#    include <AggregateFunctions/IAggregateFunction.h>
-#    include <DataTypes/DataTypesDecimal.h>
-#    include <IO/ReadHelpers.h>
-#    include <IO/WriteHelpers.h>
-#    include <boost/numeric/conversion/cast.hpp>
-#    include <google/protobuf/descriptor.h>
-#    include <google/protobuf/descriptor.pb.h>
+#   include <IO/WriteHelpers.h>
 
 
 namespace DB
 {
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-    extern const int NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD;
-    extern const int PROTOBUF_BAD_CAST;
-    extern const int PROTOBUF_FIELD_NOT_REPEATED;
-}
-
-
 namespace
 {
     constexpr size_t MAX_VARINT_SIZE = 10;
@@ -81,66 +63,24 @@ namespace
     }
 
     void writeFieldNumber(UInt32 field_number, WireType wire_type, PODArray<UInt8> & buf) { writeVarint((field_number << 3) | wire_type, buf); }
-
-    // Should we pack repeated values while storing them.
-    // It depends on type of the field in the protobuf schema and the syntax of that schema.
-    bool shouldPackRepeated(const google::protobuf::FieldDescriptor * field)
-    {
-        if (!field->is_repeated())
-            return false;
-        switch (field->type())
-        {
-            case google::protobuf::FieldDescriptor::TYPE_INT32:
-            case google::protobuf::FieldDescriptor::TYPE_UINT32:
-            case google::protobuf::FieldDescriptor::TYPE_SINT32:
-            case google::protobuf::FieldDescriptor::TYPE_INT64:
-            case google::protobuf::FieldDescriptor::TYPE_UINT64:
-            case google::protobuf::FieldDescriptor::TYPE_SINT64:
-            case google::protobuf::FieldDescriptor::TYPE_FIXED32:
-            case google::protobuf::FieldDescriptor::TYPE_SFIXED32:
-            case google::protobuf::FieldDescriptor::TYPE_FIXED64:
-            case google::protobuf::FieldDescriptor::TYPE_SFIXED64:
-            case google::protobuf::FieldDescriptor::TYPE_FLOAT:
-            case google::protobuf::FieldDescriptor::TYPE_DOUBLE:
-            case google::protobuf::FieldDescriptor::TYPE_BOOL:
-            case google::protobuf::FieldDescriptor::TYPE_ENUM:
-                break;
-            default:
-                return false;
-        }
-        if (field->options().has_packed())
-            return field->options().packed();
-        return field->file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3;
-    }
-
-    // Should we omit null values (zero for numbers / empty string for strings) while storing them.
-    bool shouldSkipNullValue(const google::protobuf::FieldDescriptor * field)
-    {
-        return field->is_optional() && (field->file()->syntax() == google::protobuf::FileDescriptor::SYNTAX_PROTO3);
-    }
 }
 
 
-// SimpleWriter is an utility class to serialize protobufs.
-// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
-ProtobufWriter::SimpleWriter::SimpleWriter(WriteBuffer & out_, const bool use_length_delimiters_)
+ProtobufWriter::ProtobufWriter(WriteBuffer & out_)
     : out(out_)
-    , current_piece_start(0)
-    , num_bytes_skipped(0)
-    , use_length_delimiters(use_length_delimiters_)
 {
 }
 
-ProtobufWriter::SimpleWriter::~SimpleWriter() = default;
+ProtobufWriter::~ProtobufWriter() = default;
 
-void ProtobufWriter::SimpleWriter::startMessage()
+void ProtobufWriter::startMessage()
 {
 }
 
-void ProtobufWriter::SimpleWriter::endMessage()
+void ProtobufWriter::endMessage(bool with_length_delimiter)
 {
     pieces.emplace_back(current_piece_start, buffer.size());
-    if (use_length_delimiters)
+    if (with_length_delimiter)
     {
         size_t size_of_message = buffer.size() - num_bytes_skipped;
         writeVarint(size_of_message, out);
@@ -154,7 +94,7 @@ void ProtobufWriter::SimpleWriter::endMessage()
     current_piece_start = 0;
 }
 
-void ProtobufWriter::SimpleWriter::startNestedMessage()
+void ProtobufWriter::startNestedMessage()
 {
     nested_infos.emplace_back(pieces.size(), num_bytes_skipped);
     pieces.emplace_back(current_piece_start, buffer.size());
@@ -167,7 +107,7 @@ void ProtobufWriter::SimpleWriter::startNestedMessage()
     num_bytes_skipped = NESTED_MESSAGE_PADDING;
 }
 
-void ProtobufWriter::SimpleWriter::endNestedMessage(UInt32 field_number, bool is_group, bool skip_if_empty)
+void ProtobufWriter::endNestedMessage(int field_number, bool is_group, bool skip_if_empty)
 {
     const auto & nested_info = nested_infos.back();
     size_t num_pieces_at_start = nested_info.num_pieces_at_start;
@@ -203,8 +143,13 @@ void ProtobufWriter::SimpleWriter::endNestedMessage(UInt32 field_number, bool is
     num_bytes_skipped += num_bytes_skipped_at_start - num_bytes_inserted;
 }
 
-void ProtobufWriter::SimpleWriter::writeUInt(UInt32 field_number, UInt64 value)
+void ProtobufWriter::writeUInt(int field_number, UInt64 value)
 {
+    if (in_repeated_pack)
+    {
+        writeVarint(value, buffer);
+        return;
+    }
     size_t old_size = buffer.size();
     buffer.reserve(old_size + 2 * MAX_VARINT_SIZE);
     UInt8 * ptr = buffer.data() + old_size;
@@ -213,20 +158,27 @@ void ProtobufWriter::SimpleWriter::writeUInt(UInt32 field_number, UInt64 value)
     buffer.resize_assume_reserved(ptr - buffer.data());
 }
 
-void ProtobufWriter::SimpleWriter::writeInt(UInt32 field_number, Int64 value)
+void ProtobufWriter::writeInt(int field_number, Int64 value)
 {
     writeUInt(field_number, static_cast<UInt64>(value));
 }
 
-void ProtobufWriter::SimpleWriter::writeSInt(UInt32 field_number, Int64 value)
+void ProtobufWriter::writeSInt(int field_number, Int64 value)
 {
     writeUInt(field_number, encodeZigZag(value));
 }
 
 template <typename T>
-void ProtobufWriter::SimpleWriter::writeFixed(UInt32 field_number, T value)
+void ProtobufWriter::writeFixed(int field_number, T value)
 {
     static_assert((sizeof(T) == 4) || (sizeof(T) == 8));
+    if (in_repeated_pack)
+    {
+        size_t old_size = buffer.size();
+        buffer.resize(old_size + sizeof(T));
+        memcpy(buffer.data() + old_size, &value, sizeof(T));
+        return;
+    }
     constexpr WireType wire_type = (sizeof(T) == 4) ? BITS32 : BITS64;
     size_t old_size = buffer.size();
     buffer.reserve(old_size + MAX_VARINT_SIZE + sizeof(T));
@@ -237,19 +189,27 @@ void ProtobufWriter::SimpleWriter::writeFixed(UInt32 field_number, T value)
     buffer.resize_assume_reserved(ptr - buffer.data());
 }
 
-void ProtobufWriter::SimpleWriter::writeString(UInt32 field_number, const StringRef & str)
+template void ProtobufWriter::writeFixed<Int32>(int field_number, Int32 value);
+template void ProtobufWriter::writeFixed<UInt32>(int field_number, UInt32 value);
+template void ProtobufWriter::writeFixed<Int64>(int field_number, Int64 value);
+template void ProtobufWriter::writeFixed<UInt64>(int field_number, UInt64 value);
+template void ProtobufWriter::writeFixed<Float32>(int field_number, Float32 value);
+template void ProtobufWriter::writeFixed<Float64>(int field_number, Float64 value);
+
+void ProtobufWriter::writeString(int field_number, const std::string_view & str)
 {
+    size_t length = str.length();
     size_t old_size = buffer.size();
-    buffer.reserve(old_size + 2 * MAX_VARINT_SIZE + str.size);
+    buffer.reserve(old_size + 2 * MAX_VARINT_SIZE + length);
     UInt8 * ptr = buffer.data() + old_size;
     ptr = writeFieldNumber(field_number, LENGTH_DELIMITED, ptr);
-    ptr = writeVarint(str.size, ptr);
-    memcpy(ptr, str.data, str.size);
-    ptr += str.size;
+    ptr = writeVarint(length, ptr);
+    memcpy(ptr, str.data(), length);
+    ptr += length;
     buffer.resize_assume_reserved(ptr - buffer.data());
 }
 
-void ProtobufWriter::SimpleWriter::startRepeatedPack()
+void ProtobufWriter::startRepeatedPack()
 {
     pieces.emplace_back(current_piece_start, buffer.size());
 
@@ -259,17 +219,19 @@ void ProtobufWriter::SimpleWriter::startRepeatedPack()
     current_piece_start = buffer.size() + REPEATED_PACK_PADDING;
     buffer.resize(current_piece_start);
     num_bytes_skipped += REPEATED_PACK_PADDING;
+    in_repeated_pack = true;
 }
 
-void ProtobufWriter::SimpleWriter::endRepeatedPack(UInt32 field_number)
+void ProtobufWriter::endRepeatedPack(int field_number, bool skip_if_empty)
 {
     size_t size = buffer.size() - current_piece_start;
-    if (!size)
+    if (!size && skip_if_empty)
     {
         current_piece_start = pieces.back().start;
         buffer.resize(pieces.back().end);
         pieces.pop_back();
         num_bytes_skipped -= REPEATED_PACK_PADDING;
+        in_repeated_pack = false;
         return;
     }
     UInt8 * ptr = &buffer[pieces.back().end];
@@ -278,726 +240,7 @@ void ProtobufWriter::SimpleWriter::endRepeatedPack(UInt32 field_number)
     size_t num_bytes_inserted = endptr - ptr;
     pieces.back().end += num_bytes_inserted;
     num_bytes_skipped -= num_bytes_inserted;
-}
-
-void ProtobufWriter::SimpleWriter::addUIntToRepeatedPack(UInt64 value)
-{
-    writeVarint(value, buffer);
-}
-
-void ProtobufWriter::SimpleWriter::addIntToRepeatedPack(Int64 value)
-{
-    writeVarint(static_cast<UInt64>(value), buffer);
-}
-
-void ProtobufWriter::SimpleWriter::addSIntToRepeatedPack(Int64 value)
-{
-    writeVarint(encodeZigZag(value), buffer);
-}
-
-template <typename T>
-void ProtobufWriter::SimpleWriter::addFixedToRepeatedPack(T value)
-{
-    static_assert((sizeof(T) == 4) || (sizeof(T) == 8));
-    size_t old_size = buffer.size();
-    buffer.resize(old_size + sizeof(T));
-    memcpy(buffer.data() + old_size, &value, sizeof(T));
-}
-
-
-// Implementation for a converter from any DB data type to any protobuf field type.
-class ProtobufWriter::ConverterBaseImpl : public IConverter
-{
-public:
-    ConverterBaseImpl(SimpleWriter & simple_writer_, const google::protobuf::FieldDescriptor * field_)
-        : simple_writer(simple_writer_), field(field_)
-    {
-        field_number = field->number();
-    }
-
-    virtual void writeString(const StringRef &) override { cannotConvertType("String"); }
-    virtual void writeInt8(Int8) override { cannotConvertType("Int8"); }
-    virtual void writeUInt8(UInt8) override { cannotConvertType("UInt8"); }
-    virtual void writeInt16(Int16) override { cannotConvertType("Int16"); }
-    virtual void writeUInt16(UInt16) override { cannotConvertType("UInt16"); }
-    virtual void writeInt32(Int32) override { cannotConvertType("Int32"); }
-    virtual void writeUInt32(UInt32) override { cannotConvertType("UInt32"); }
-    virtual void writeInt64(Int64) override { cannotConvertType("Int64"); }
-    virtual void writeUInt64(UInt64) override { cannotConvertType("UInt64"); }
-    virtual void writeInt128(Int128) override { cannotConvertType("Int128"); }
-    virtual void writeUInt128(const UInt128 &) override { cannotConvertType("UInt128"); }
-    virtual void writeInt256(const Int256 &) override { cannotConvertType("Int256"); }
-    virtual void writeUInt256(const UInt256 &) override { cannotConvertType("UInt256"); }
-    virtual void writeFloat32(Float32) override { cannotConvertType("Float32"); }
-    virtual void writeFloat64(Float64) override { cannotConvertType("Float64"); }
-    virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) override {}
-    virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) override {}
-    virtual void writeEnum8(Int8) override { cannotConvertType("Enum"); }
-    virtual void writeEnum16(Int16) override { cannotConvertType("Enum"); }
-    virtual void writeUUID(const UUID &) override { cannotConvertType("UUID"); }
-    virtual void writeDate(DayNum) override { cannotConvertType("Date"); }
-    virtual void writeDateTime(time_t) override { cannotConvertType("DateTime"); }
-    virtual void writeDateTime64(DateTime64, UInt32) override { cannotConvertType("DateTime64"); }
-    virtual void writeDecimal32(Decimal32, UInt32) override { cannotConvertType("Decimal32"); }
-    virtual void writeDecimal64(Decimal64, UInt32) override { cannotConvertType("Decimal64"); }
-    virtual void writeDecimal128(const Decimal128 &, UInt32) override { cannotConvertType("Decimal128"); }
-    virtual void writeDecimal256(const Decimal256 &, UInt32) override { cannotConvertType("Decimal256"); }
-
-    virtual void writeAggregateFunction(const AggregateFunctionPtr &, ConstAggregateDataPtr) override { cannotConvertType("AggregateFunction"); }
-
-protected:
-    [[noreturn]] void cannotConvertType(const String & type_name)
-    {
-        throw Exception(
-            "Could not convert data type '" + type_name + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
-            ErrorCodes::PROTOBUF_BAD_CAST);
-    }
-
-    [[noreturn]] void cannotConvertValue(const String & value)
-    {
-        throw Exception(
-            "Could not convert value '" + value + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
-            ErrorCodes::PROTOBUF_BAD_CAST);
-    }
-
-    template <typename To, typename From>
-    To numericCast(From value)
-    {
-        if constexpr (std::is_same_v<To, From>)
-            return value;
-        To result;
-        try
-        {
-            result = boost::numeric_cast<To>(value);
-        }
-        catch (boost::numeric::bad_numeric_cast &)
-        {
-            cannotConvertValue(toString(value));
-        }
-        return result;
-    }
-
-    template <typename To>
-    To parseFromString(const StringRef & str)
-    {
-        To result;
-        try
-        {
-            result = ::DB::parse<To>(str.data, str.size);
-        }
-        catch (...)
-        {
-            cannotConvertValue(str.toString());
-        }
-        return result;
-    }
-
-    SimpleWriter & simple_writer;
-    const google::protobuf::FieldDescriptor * field;
-    UInt32 field_number;
-};
-
-
-template <bool skip_null_value>
-class ProtobufWriter::ConverterToString : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    void writeString(const StringRef & str) override { writeField(str); }
-
-    void writeInt8(Int8 value) override { convertToStringAndWriteField(value); }
-    void writeUInt8(UInt8 value) override { convertToStringAndWriteField(value); }
-    void writeInt16(Int16 value) override { convertToStringAndWriteField(value); }
-    void writeUInt16(UInt16 value) override { convertToStringAndWriteField(value); }
-    void writeInt32(Int32 value) override { convertToStringAndWriteField(value); }
-    void writeUInt32(UInt32 value) override { convertToStringAndWriteField(value); }
-    void writeInt64(Int64 value) override { convertToStringAndWriteField(value); }
-    void writeUInt64(UInt64 value) override { convertToStringAndWriteField(value); }
-    void writeFloat32(Float32 value) override { convertToStringAndWriteField(value); }
-    void writeFloat64(Float64 value) override { convertToStringAndWriteField(value); }
-
-    void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
-    {
-        prepareEnumValueToNameMap(name_value_pairs);
-    }
-    void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
-    {
-        prepareEnumValueToNameMap(name_value_pairs);
-    }
-
-    void writeEnum8(Int8 value) override { writeEnum16(value); }
-
-    void writeEnum16(Int16 value) override
-    {
-        auto it = enum_value_to_name_map->find(value);
-        if (it == enum_value_to_name_map->end())
-            cannotConvertValue(toString(value));
-        writeField(it->second);
-    }
-
-    void writeUUID(const UUID & uuid) override { convertToStringAndWriteField(uuid); }
-    void writeDate(DayNum date) override { convertToStringAndWriteField(date); }
-
-    void writeDateTime(time_t tm) override
-    {
-        writeDateTimeText(tm, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-    void writeDateTime64(DateTime64 date_time, UInt32 scale) override
-    {
-        writeDateTimeText(date_time, scale, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-    void writeDecimal32(Decimal32 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-    void writeDecimal64(Decimal64 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-    void writeDecimal128(const Decimal128 & decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-
-    void writeAggregateFunction(const AggregateFunctionPtr & function, ConstAggregateDataPtr place) override
-    {
-        function->serialize(place, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-private:
-    template <typename T>
-    void convertToStringAndWriteField(T value)
-    {
-        writeText(value, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-    template <typename T>
-    void writeDecimal(const Decimal<T> & decimal, UInt32 scale)
-    {
-        writeText(decimal, scale, text_buffer);
-        writeField(text_buffer.stringRef());
-        text_buffer.restart();
-    }
-
-    template <typename T>
-    void prepareEnumValueToNameMap(const std::vector<std::pair<String, T>> & name_value_pairs)
-    {
-        if (enum_value_to_name_map.has_value())
-            return;
-        enum_value_to_name_map.emplace();
-        for (const auto & name_value_pair : name_value_pairs)
-            enum_value_to_name_map->emplace(name_value_pair.second, name_value_pair.first);
-    }
-
-    void writeField(const StringRef & str)
-    {
-        if constexpr (skip_null_value)
-        {
-            if (!str.size)
-                return;
-        }
-        simple_writer.writeString(field_number, str);
-    }
-
-    WriteBufferFromOwnString text_buffer;
-    std::optional<std::unordered_map<Int16, String>> enum_value_to_name_map;
-};
-
-#    define PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(field_type_id) \
-        template <> \
-        std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<field_type_id>( \
-            const google::protobuf::FieldDescriptor * field) \
-        { \
-            if (shouldSkipNullValue(field)) \
-                return std::make_unique<ConverterToString<true>>(simple_writer, field); \
-            else \
-                return std::make_unique<ConverterToString<false>>(simple_writer, field); \
-        }
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_STRING)
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS(google::protobuf::FieldDescriptor::TYPE_BYTES)
-#    undef PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_STRINGS
-
-
-template <int field_type_id, typename ToType, bool skip_null_value, bool pack_repeated>
-class ProtobufWriter::ConverterToNumber : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    void writeString(const StringRef & str) override { writeField(parseFromString<ToType>(str)); }
-
-    void writeInt8(Int8 value) override { castNumericAndWriteField(value); }
-    void writeUInt8(UInt8 value) override { castNumericAndWriteField(value); }
-    void writeInt16(Int16 value) override { castNumericAndWriteField(value); }
-    void writeUInt16(UInt16 value) override { castNumericAndWriteField(value); }
-    void writeInt32(Int32 value) override { castNumericAndWriteField(value); }
-    void writeUInt32(UInt32 value) override { castNumericAndWriteField(value); }
-    void writeInt64(Int64 value) override { castNumericAndWriteField(value); }
-    void writeUInt64(UInt64 value) override { castNumericAndWriteField(value); }
-    void writeFloat32(Float32 value) override { castNumericAndWriteField(value); }
-    void writeFloat64(Float64 value) override { castNumericAndWriteField(value); }
-
-    void writeEnum8(Int8 value) override { writeEnum16(value); }
-
-    void writeEnum16(Int16 value) override
-    {
-        if constexpr (!is_integer_v<ToType>)
-            cannotConvertType("Enum"); // It's not correct to convert enum to floating point.
-        castNumericAndWriteField(value);
-    }
-
-    void writeDate(DayNum date) override { castNumericAndWriteField(static_cast<UInt16>(date)); }
-    void writeDateTime(time_t tm) override { castNumericAndWriteField(tm); }
-    void writeDateTime64(DateTime64 date_time, UInt32 scale) override { writeDecimal(date_time, scale); }
-    void writeDecimal32(Decimal32 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-    void writeDecimal64(Decimal64 decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-    void writeDecimal128(const Decimal128 & decimal, UInt32 scale) override { writeDecimal(decimal, scale); }
-
-private:
-    template <typename FromType>
-    void castNumericAndWriteField(FromType value)
-    {
-        writeField(numericCast<ToType>(value));
-    }
-
-    template <typename S>
-    void writeDecimal(const Decimal<S> & decimal, UInt32 scale)
-    {
-        castNumericAndWriteField(DecimalUtils::convertTo<ToType>(decimal, scale));
-    }
-
-    void writeField(ToType value)
-    {
-        if constexpr (skip_null_value)
-        {
-            if (value == 0)
-                return;
-        }
-        if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT32) && std::is_same_v<ToType, Int32>)
-                   || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_INT64) && std::is_same_v<ToType, Int64>))
-        {
-            if constexpr (pack_repeated)
-                simple_writer.addIntToRepeatedPack(value);
-            else
-                simple_writer.writeInt(field_number, value);
-        }
-        else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT32) && std::is_same_v<ToType, Int32>)
-                        || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SINT64) && std::is_same_v<ToType, Int64>))
-        {
-            if constexpr (pack_repeated)
-                simple_writer.addSIntToRepeatedPack(value);
-            else
-                simple_writer.writeSInt(field_number, value);
-        }
-        else if constexpr (((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT32) && std::is_same_v<ToType, UInt32>)
-                        || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_UINT64) && std::is_same_v<ToType, UInt64>))
-        {
-            if constexpr (pack_repeated)
-                simple_writer.addUIntToRepeatedPack(value);
-            else
-                simple_writer.writeUInt(field_number, value);
-        }
-        else
-        {
-            static_assert(((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED32) && std::is_same_v<ToType, UInt32>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED32) && std::is_same_v<ToType, Int32>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FIXED64) && std::is_same_v<ToType, UInt64>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_SFIXED64) && std::is_same_v<ToType, Int64>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_FLOAT) && std::is_same_v<ToType, float>)
-                       || ((field_type_id == google::protobuf::FieldDescriptor::TYPE_DOUBLE) && std::is_same_v<ToType, double>));
-            if constexpr (pack_repeated)
-                simple_writer.addFixedToRepeatedPack(value);
-            else
-                simple_writer.writeFixed(field_number, value);
-        }
-    }
-};
-
-#    define PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(field_type_id, field_type) \
-        template <> \
-        std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<field_type_id>( \
-            const google::protobuf::FieldDescriptor * field) \
-        { \
-            if (shouldSkipNullValue(field)) \
-                return std::make_unique<ConverterToNumber<field_type_id, field_type, true, false>>(simple_writer, field); \
-            else if (shouldPackRepeated(field)) \
-                return std::make_unique<ConverterToNumber<field_type_id, field_type, false, true>>(simple_writer, field); \
-            else \
-                return std::make_unique<ConverterToNumber<field_type_id, field_type, false, false>>(simple_writer, field); \
-        }
-
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT32, Int32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT32, Int32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT32, UInt32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_INT64, Int64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SINT64, Int64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_UINT64, UInt64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED32, UInt32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED32, Int32);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FIXED64, UInt64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_SFIXED64, Int64);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_FLOAT, float);
-PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS(google::protobuf::FieldDescriptor::TYPE_DOUBLE, double);
-#    undef PROTOBUF_WRITER_CREATE_CONVERTER_SPECIALIZATION_FOR_NUMBERS
-
-
-template <bool skip_null_value, bool pack_repeated>
-class ProtobufWriter::ConverterToBool : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    void writeString(const StringRef & str) override
-    {
-        if (str == "true")
-            writeField(true);
-        else if (str == "false")
-            writeField(false);
-        else
-            cannotConvertValue(str.toString());
-    }
-
-    void writeInt8(Int8 value) override { convertToBoolAndWriteField(value); }
-    void writeUInt8(UInt8 value) override { convertToBoolAndWriteField(value); }
-    void writeInt16(Int16 value) override { convertToBoolAndWriteField(value); }
-    void writeUInt16(UInt16 value) override { convertToBoolAndWriteField(value); }
-    void writeInt32(Int32 value) override { convertToBoolAndWriteField(value); }
-    void writeUInt32(UInt32 value) override { convertToBoolAndWriteField(value); }
-    void writeInt64(Int64 value) override { convertToBoolAndWriteField(value); }
-    void writeUInt64(UInt64 value) override { convertToBoolAndWriteField(value); }
-    void writeFloat32(Float32 value) override { convertToBoolAndWriteField(value); }
-    void writeFloat64(Float64 value) override { convertToBoolAndWriteField(value); }
-    void writeDecimal32(Decimal32 decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
-    void writeDecimal64(Decimal64 decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
-    void writeDecimal128(const Decimal128 & decimal, UInt32) override { convertToBoolAndWriteField(decimal.value); }
-
-private:
-    template <typename T>
-    void convertToBoolAndWriteField(T value)
-    {
-        writeField(static_cast<bool>(value));
-    }
-
-    void writeField(bool b)
-    {
-        if constexpr (skip_null_value)
-        {
-            if (!b)
-                return;
-        }
-        if constexpr (pack_repeated)
-            simple_writer.addUIntToRepeatedPack(b);
-        else
-            simple_writer.writeUInt(field_number, b);
-    }
-};
-
-template <>
-std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<google::protobuf::FieldDescriptor::TYPE_BOOL>(
-    const google::protobuf::FieldDescriptor * field)
-{
-    if (shouldSkipNullValue(field))
-        return std::make_unique<ConverterToBool<true, false>>(simple_writer, field);
-    else if (shouldPackRepeated(field))
-        return std::make_unique<ConverterToBool<false, true>>(simple_writer, field);
-    else
-        return std::make_unique<ConverterToBool<false, false>>(simple_writer, field);
-}
-
-
-template <bool skip_null_value, bool pack_repeated>
-class ProtobufWriter::ConverterToEnum : public ConverterBaseImpl
-{
-public:
-    using ConverterBaseImpl::ConverterBaseImpl;
-
-    void writeString(const StringRef & str) override
-    {
-        prepareEnumNameToPbNumberMap();
-        auto it = enum_name_to_pbnumber_map->find(str);
-        if (it == enum_name_to_pbnumber_map->end())
-            cannotConvertValue(str.toString());
-        writeField(it->second);
-    }
-
-    void writeInt8(Int8 value) override { convertToEnumAndWriteField(value); }
-    void writeUInt8(UInt8 value) override { convertToEnumAndWriteField(value); }
-    void writeInt16(Int16 value) override { convertToEnumAndWriteField(value); }
-    void writeUInt16(UInt16 value) override { convertToEnumAndWriteField(value); }
-    void writeInt32(Int32 value) override { convertToEnumAndWriteField(value); }
-    void writeUInt32(UInt32 value) override { convertToEnumAndWriteField(value); }
-    void writeInt64(Int64 value) override { convertToEnumAndWriteField(value); }
-    void writeUInt64(UInt64 value) override { convertToEnumAndWriteField(value); }
-
-    void prepareEnumMapping8(const std::vector<std::pair<String, Int8>> & name_value_pairs) override
-    {
-        prepareEnumValueToPbNumberMap(name_value_pairs);
-    }
-    void prepareEnumMapping16(const std::vector<std::pair<String, Int16>> & name_value_pairs) override
-    {
-        prepareEnumValueToPbNumberMap(name_value_pairs);
-    }
-
-    void writeEnum8(Int8 value) override { writeEnum16(value); }
-
-    void writeEnum16(Int16 value) override
-    {
-        int pbnumber;
-        if (enum_value_always_equals_pbnumber)
-            pbnumber = value;
-        else
-        {
-            auto it = enum_value_to_pbnumber_map->find(value);
-            if (it == enum_value_to_pbnumber_map->end())
-                cannotConvertValue(toString(value));
-            pbnumber = it->second;
-        }
-        writeField(pbnumber);
-    }
-
-private:
-    template <typename T>
-    void convertToEnumAndWriteField(T value)
-    {
-        const auto * enum_descriptor = field->enum_type()->FindValueByNumber(numericCast<int>(value));
-        if (!enum_descriptor)
-            cannotConvertValue(toString(value));
-        writeField(enum_descriptor->number());
-    }
-
-    void prepareEnumNameToPbNumberMap()
-    {
-        if (enum_name_to_pbnumber_map.has_value())
-            return;
-        enum_name_to_pbnumber_map.emplace();
-        const auto * enum_type = field->enum_type();
-        for (int i = 0; i != enum_type->value_count(); ++i)
-        {
-            const auto * enum_value = enum_type->value(i);
-            enum_name_to_pbnumber_map->emplace(enum_value->name(), enum_value->number());
-        }
-    }
-
-    template <typename T>
-    void prepareEnumValueToPbNumberMap(const std::vector<std::pair<String, T>> & name_value_pairs)
-    {
-        if (enum_value_to_pbnumber_map.has_value())
-            return;
-        enum_value_to_pbnumber_map.emplace();
-        enum_value_always_equals_pbnumber = true;
-        for (const auto & name_value_pair : name_value_pairs)
-        {
-            Int16 value = name_value_pair.second; // NOLINT
-            const auto * enum_descriptor = field->enum_type()->FindValueByName(name_value_pair.first);
-            if (enum_descriptor)
-            {
-                enum_value_to_pbnumber_map->emplace(value, enum_descriptor->number());
-                if (value != enum_descriptor->number())
-                    enum_value_always_equals_pbnumber = false;
-            }
-            else
-                enum_value_always_equals_pbnumber = false;
-        }
-    }
-
-    void writeField(int enum_pbnumber)
-    {
-        if constexpr (skip_null_value)
-        {
-            if (!enum_pbnumber)
-                return;
-        }
-        if constexpr (pack_repeated)
-            simple_writer.addUIntToRepeatedPack(enum_pbnumber);
-        else
-            simple_writer.writeUInt(field_number, enum_pbnumber);
-    }
-
-    std::optional<std::unordered_map<StringRef, int>> enum_name_to_pbnumber_map;
-    std::optional<std::unordered_map<Int16, int>> enum_value_to_pbnumber_map;
-    bool enum_value_always_equals_pbnumber;
-};
-
-template <>
-std::unique_ptr<ProtobufWriter::IConverter> ProtobufWriter::createConverter<google::protobuf::FieldDescriptor::TYPE_ENUM>(
-    const google::protobuf::FieldDescriptor * field)
-{
-    if (shouldSkipNullValue(field))
-        return std::make_unique<ConverterToEnum<true, false>>(simple_writer, field);
-    else if (shouldPackRepeated(field))
-        return std::make_unique<ConverterToEnum<false, true>>(simple_writer, field);
-    else
-        return std::make_unique<ConverterToEnum<false, false>>(simple_writer, field);
-}
-
-
-ProtobufWriter::ProtobufWriter(
-    WriteBuffer & out, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_)
-    : simple_writer(out, use_length_delimiters_)
-{
-    std::vector<const google::protobuf::FieldDescriptor *> field_descriptors_without_match;
-    root_message = ProtobufColumnMatcher::matchColumns<ColumnMatcherTraits>(column_names, message_type, field_descriptors_without_match);
-    for (const auto * field_descriptor_without_match : field_descriptors_without_match)
-    {
-        if (field_descriptor_without_match->is_required())
-            throw Exception(
-                "Output doesn't have a column named '" + field_descriptor_without_match->name()
-                    + "' which is required to write the output in the protobuf format.",
-                ErrorCodes::NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD);
-    }
-    setTraitsDataAfterMatchingColumns(root_message.get());
-}
-
-ProtobufWriter::~ProtobufWriter() = default;
-
-void ProtobufWriter::setTraitsDataAfterMatchingColumns(Message * message)
-{
-    Field * parent_field = message->parent ? &message->parent->fields[message->index_in_parent] : nullptr;
-    message->data.parent_field_number = parent_field ? parent_field->field_number : 0;
-    message->data.is_required = parent_field && parent_field->data.is_required;
-
-    if (parent_field && parent_field->data.is_repeatable)
-        message->data.repeatable_container_message = message;
-    else if (message->parent)
-        message->data.repeatable_container_message = message->parent->data.repeatable_container_message;
-    else
-        message->data.repeatable_container_message = nullptr;
-
-    message->data.is_group = parent_field && (parent_field->field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_GROUP);
-
-    for (auto & field : message->fields)
-    {
-        field.data.is_repeatable = field.field_descriptor->is_repeated();
-        field.data.is_required = field.field_descriptor->is_required();
-        field.data.repeatable_container_message = message->data.repeatable_container_message;
-        field.data.should_pack_repeated = shouldPackRepeated(field.field_descriptor);
-
-        if (field.nested_message)
-        {
-            setTraitsDataAfterMatchingColumns(field.nested_message.get());
-            continue;
-        }
-        switch (field.field_descriptor->type())
-        {
-#    define PROTOBUF_WRITER_CONVERTER_CREATING_CASE(field_type_id) \
-        case field_type_id: \
-            field.data.converter = createConverter<field_type_id>(field.field_descriptor); \
-            break
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_STRING);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BYTES);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED32);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_INT64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SINT64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_UINT64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FIXED64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_SFIXED64);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_FLOAT);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_DOUBLE);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_BOOL);
-            PROTOBUF_WRITER_CONVERTER_CREATING_CASE(google::protobuf::FieldDescriptor::TYPE_ENUM);
-#    undef PROTOBUF_WRITER_CONVERTER_CREATING_CASE
-            default:
-                throw Exception(
-                    String("Protobuf type '") + field.field_descriptor->type_name() + "' isn't supported", ErrorCodes::NOT_IMPLEMENTED);
-        }
-    }
-}
-
-void ProtobufWriter::startMessage()
-{
-    current_message = root_message.get();
-    current_field_index = 0;
-    simple_writer.startMessage();
-}
-
-void ProtobufWriter::endMessage()
-{
-    if (!current_message)
-        return;
-    endWritingField();
-    while (current_message->parent)
-    {
-        simple_writer.endNestedMessage(
-            current_message->data.parent_field_number, current_message->data.is_group, !current_message->data.is_required);
-        current_message = current_message->parent;
-    }
-    simple_writer.endMessage();
-    current_message = nullptr;
-}
-
-bool ProtobufWriter::writeField(size_t & column_index)
-{
-    endWritingField();
-    while (true)
-    {
-        if (current_field_index < current_message->fields.size())
-        {
-            Field & field = current_message->fields[current_field_index];
-            if (!field.nested_message)
-            {
-                current_field = &current_message->fields[current_field_index];
-                current_converter = current_field->data.converter.get();
-                column_index = current_field->column_index;
-                if (current_field->data.should_pack_repeated)
-                    simple_writer.startRepeatedPack();
-                return true;
-            }
-            simple_writer.startNestedMessage();
-            current_message = field.nested_message.get();
-            current_message->data.need_repeat = false;
-            current_field_index = 0;
-            continue;
-        }
-        if (current_message->parent)
-        {
-            simple_writer.endNestedMessage(
-                current_message->data.parent_field_number, current_message->data.is_group, !current_message->data.is_required);
-            if (current_message->data.need_repeat)
-            {
-                simple_writer.startNestedMessage();
-                current_message->data.need_repeat = false;
-                current_field_index = 0;
-                continue;
-            }
-            current_field_index = current_message->index_in_parent + 1;
-            current_message = current_message->parent;
-            continue;
-        }
-        return false;
-    }
-}
-
-void ProtobufWriter::endWritingField()
-{
-    if (!current_field)
-        return;
-    if (current_field->data.should_pack_repeated)
-        simple_writer.endRepeatedPack(current_field->field_number);
-    else if ((num_values == 0) && current_field->data.is_required)
-        throw Exception(
-            "No data for the required field '" + current_field->field_descriptor->name() + "'",
-            ErrorCodes::NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD);
-
-    current_field = nullptr;
-    current_converter = nullptr;
-    num_values = 0;
-    ++current_field_index;
-}
-
-void ProtobufWriter::setNestedMessageNeedsRepeat()
-{
-    if (current_field->data.repeatable_container_message)
-        current_field->data.repeatable_container_message->data.need_repeat = true;
-    else
-        throw Exception(
-            "Cannot write more than single value to the non-repeated field '" + current_field->field_descriptor->name() + "'",
-            ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
+    in_repeated_pack = false;
 }
 
 }
diff --git a/src/Formats/ProtobufWriter.h b/src/Formats/ProtobufWriter.h
index 52bb453aa73..6af1a237fbd 100644
--- a/src/Formats/ProtobufWriter.h
+++ b/src/Formats/ProtobufWriter.h
@@ -1,290 +1,68 @@
 #pragma once
 
-#include <Core/UUID.h>
-#include <common/DayNum.h>
-#include <memory>
-
 #if !defined(ARCADIA_BUILD)
 #    include "config_formats.h"
 #endif
 
 #if USE_PROTOBUF
-#    include <IO/WriteBufferFromString.h>
-#    include <boost/noncopyable.hpp>
-#    include <Common/PODArray.h>
-#    include "ProtobufColumnMatcher.h"
-
-
-namespace google
-{
-namespace protobuf
-{
-    class Descriptor;
-    class FieldDescriptor;
-}
-}
-
-namespace DB
-{
-class IAggregateFunction;
-using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
-using ConstAggregateDataPtr = const char *;
-
-
-/** Serializes a protobuf, tries to cast types if necessarily.
-  */
-class ProtobufWriter : private boost::noncopyable
-{
-public:
-    ProtobufWriter(WriteBuffer & out, const google::protobuf::Descriptor * message_type, const std::vector<String> & column_names, const bool use_length_delimiters_);
-    ~ProtobufWriter();
-
-    /// Should be called at the beginning of writing a message.
-    void startMessage();
-
-    /// Should be called at the end of writing a message.
-    void endMessage();
-
-    /// Prepares for writing values of a field.
-    /// Returns true and sets 'column_index' to the corresponding column's index.
-    /// Returns false if there are no more fields to write in the message type (call endMessage() in this case).
-    bool writeField(size_t & column_index);
-
-    /// Writes a value. This function should be called one or multiple times after writeField().
-    /// Returns false if there are no more place for the values in the protobuf's field.
-    /// This can happen if the protobuf's field is not declared as repeated in the protobuf schema.
-    bool writeNumber(Int8 value) { return writeValueIfPossible(&IConverter::writeInt8, value); }
-    bool writeNumber(UInt8 value) { return writeValueIfPossible(&IConverter::writeUInt8, value); }
-    bool writeNumber(Int16 value) { return writeValueIfPossible(&IConverter::writeInt16, value); }
-    bool writeNumber(UInt16 value) { return writeValueIfPossible(&IConverter::writeUInt16, value); }
-    bool writeNumber(Int32 value) { return writeValueIfPossible(&IConverter::writeInt32, value); }
-    bool writeNumber(UInt32 value) { return writeValueIfPossible(&IConverter::writeUInt32, value); }
-    bool writeNumber(Int64 value) { return writeValueIfPossible(&IConverter::writeInt64, value); }
-    bool writeNumber(UInt64 value) { return writeValueIfPossible(&IConverter::writeUInt64, value); }
-    bool writeNumber(Int128 value) { return writeValueIfPossible(&IConverter::writeInt128, value); }
-    bool writeNumber(UInt128 value) { return writeValueIfPossible(&IConverter::writeUInt128, value); }
-
-    bool writeNumber(Int256 value) { return writeValueIfPossible(&IConverter::writeInt256, value); }
-    bool writeNumber(UInt256 value) { return writeValueIfPossible(&IConverter::writeUInt256, value); }
-
-    bool writeNumber(Float32 value) { return writeValueIfPossible(&IConverter::writeFloat32, value); }
-    bool writeNumber(Float64 value) { return writeValueIfPossible(&IConverter::writeFloat64, value); }
-    bool writeString(const StringRef & str) { return writeValueIfPossible(&IConverter::writeString, str); }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & enum_values) { current_converter->prepareEnumMapping8(enum_values); }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & enum_values) { current_converter->prepareEnumMapping16(enum_values); }
-    bool writeEnum(Int8 value) { return writeValueIfPossible(&IConverter::writeEnum8, value); }
-    bool writeEnum(Int16 value) { return writeValueIfPossible(&IConverter::writeEnum16, value); }
-    bool writeUUID(const UUID & uuid) { return writeValueIfPossible(&IConverter::writeUUID, uuid); }
-    bool writeDate(DayNum date) { return writeValueIfPossible(&IConverter::writeDate, date); }
-    bool writeDateTime(time_t tm) { return writeValueIfPossible(&IConverter::writeDateTime, tm); }
-    bool writeDateTime64(DateTime64 tm, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDateTime64, tm, scale); }
-    bool writeDecimal(Decimal32 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal32, decimal, scale); }
-    bool writeDecimal(Decimal64 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal64, decimal, scale); }
-    bool writeDecimal(const Decimal128 & decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal128, decimal, scale); }
-    bool writeDecimal(const Decimal256 & decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal256, decimal, scale); }
-    bool writeAggregateFunction(const AggregateFunctionPtr & function, ConstAggregateDataPtr place) { return writeValueIfPossible(&IConverter::writeAggregateFunction, function, place); }
-
-private:
-    class SimpleWriter
-    {
-    public:
-        SimpleWriter(WriteBuffer & out_, const bool use_length_delimiters_);
-        ~SimpleWriter();
-
-        void startMessage();
-        void endMessage();
-
-        void startNestedMessage();
-        void endNestedMessage(UInt32 field_number, bool is_group, bool skip_if_empty);
-
-        void writeInt(UInt32 field_number, Int64 value);
-        void writeUInt(UInt32 field_number, UInt64 value);
-        void writeSInt(UInt32 field_number, Int64 value);
-        template <typename T>
-        void writeFixed(UInt32 field_number, T value);
-        void writeString(UInt32 field_number, const StringRef & str);
-
-        void startRepeatedPack();
-        void addIntToRepeatedPack(Int64 value);
-        void addUIntToRepeatedPack(UInt64 value);
-        void addSIntToRepeatedPack(Int64 value);
-        template <typename T>
-        void addFixedToRepeatedPack(T value);
-        void endRepeatedPack(UInt32 field_number);
-
-    private:
-        struct Piece
-        {
-            size_t start;
-            size_t end;
-            Piece(size_t start_, size_t end_) : start(start_), end(end_) {}
-            Piece() = default;
-        };
-
-        struct NestedInfo
-        {
-            size_t num_pieces_at_start;
-            size_t num_bytes_skipped_at_start;
-            NestedInfo(size_t num_pieces_at_start_, size_t num_bytes_skipped_at_start_)
-                : num_pieces_at_start(num_pieces_at_start_), num_bytes_skipped_at_start(num_bytes_skipped_at_start_)
-            {
-            }
-        };
-
-        WriteBuffer & out;
-        PODArray<UInt8> buffer;
-        std::vector<Piece> pieces;
-        size_t current_piece_start;
-        size_t num_bytes_skipped;
-        std::vector<NestedInfo> nested_infos;
-        const bool use_length_delimiters;
-    };
-
-    class IConverter
-    {
-    public:
-        virtual ~IConverter() = default;
-        virtual void writeString(const StringRef &) = 0;
-        virtual void writeInt8(Int8) = 0;
-        virtual void writeUInt8(UInt8) = 0;
-        virtual void writeInt16(Int16) = 0;
-        virtual void writeUInt16(UInt16) = 0;
-        virtual void writeInt32(Int32) = 0;
-        virtual void writeUInt32(UInt32) = 0;
-        virtual void writeInt64(Int64) = 0;
-        virtual void writeUInt64(UInt64) = 0;
-        virtual void writeInt128(Int128) = 0;
-        virtual void writeUInt128(const UInt128 &) = 0;
-
-        virtual void writeInt256(const Int256 &) = 0;
-        virtual void writeUInt256(const UInt256 &) = 0;
-
-        virtual void writeFloat32(Float32) = 0;
-        virtual void writeFloat64(Float64) = 0;
-        virtual void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) = 0;
-        virtual void prepareEnumMapping16(const std::vector<std::pair<std::string, Int16>> &) = 0;
-        virtual void writeEnum8(Int8) = 0;
-        virtual void writeEnum16(Int16) = 0;
-        virtual void writeUUID(const UUID &) = 0;
-        virtual void writeDate(DayNum) = 0;
-        virtual void writeDateTime(time_t) = 0;
-        virtual void writeDateTime64(DateTime64, UInt32 scale) = 0;
-        virtual void writeDecimal32(Decimal32, UInt32) = 0;
-        virtual void writeDecimal64(Decimal64, UInt32) = 0;
-        virtual void writeDecimal128(const Decimal128 &, UInt32) = 0;
-        virtual void writeDecimal256(const Decimal256 &, UInt32) = 0;
-        virtual void writeAggregateFunction(const AggregateFunctionPtr &, ConstAggregateDataPtr) = 0;
-    };
-
-    class ConverterBaseImpl;
-    template <bool skip_null_value>
-    class ConverterToString;
-    template <int field_type_id, typename ToType, bool skip_null_value, bool pack_repeated>
-    class ConverterToNumber;
-    template <bool skip_null_value, bool pack_repeated>
-    class ConverterToBool;
-    template <bool skip_null_value, bool pack_repeated>
-    class ConverterToEnum;
-
-    struct ColumnMatcherTraits
-    {
-        struct FieldData
-        {
-            std::unique_ptr<IConverter> converter;
-            bool is_required;
-            bool is_repeatable;
-            bool should_pack_repeated;
-            ProtobufColumnMatcher::Message<ColumnMatcherTraits> * repeatable_container_message;
-        };
-        struct MessageData
-        {
-            UInt32 parent_field_number;
-            bool is_group;
-            bool is_required;
-            ProtobufColumnMatcher::Message<ColumnMatcherTraits> * repeatable_container_message;
-            bool need_repeat;
-        };
-    };
-    using Message = ProtobufColumnMatcher::Message<ColumnMatcherTraits>;
-    using Field = ProtobufColumnMatcher::Field<ColumnMatcherTraits>;
-
-    void setTraitsDataAfterMatchingColumns(Message * message);
-
-    template <int field_type_id>
-    std::unique_ptr<IConverter> createConverter(const google::protobuf::FieldDescriptor * field);
-
-    template <typename... Params>
-    using WriteValueFunctionPtr = void (IConverter::*)(Params...);
-
-    template <typename... Params, typename... Args>
-    bool writeValueIfPossible(WriteValueFunctionPtr<Params...> func, Args &&... args)
-    {
-        if (num_values && !current_field->data.is_repeatable)
-        {
-            setNestedMessageNeedsRepeat();
-            return false;
-        }
-        (current_converter->*func)(std::forward<Args>(args)...);
-        ++num_values;
-        return true;
-    }
-
-    void setNestedMessageNeedsRepeat();
-    void endWritingField();
-
-    SimpleWriter simple_writer;
-    std::unique_ptr<Message> root_message;
-
-    Message * current_message;
-    size_t current_field_index = 0;
-    const Field * current_field = nullptr;
-    IConverter * current_converter = nullptr;
-    size_t num_values = 0;
-};
-
-}
-
-#else
-#    include <common/StringRef.h>
+#   include <Core/Types.h>
+#   include <Common/PODArray.h>
 
 
 namespace DB
 {
-class IAggregateFunction;
-using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
-using ConstAggregateDataPtr = const char *;
+class WriteBuffer;
 
+/// Utility class for writing in the Protobuf format.
+/// Knows nothing about protobuf schemas, just provides useful functions to serialize data.
 class ProtobufWriter
 {
 public:
-    bool writeNumber(Int8 /* value */) { return false; }
-    bool writeNumber(UInt8 /* value */) { return false; }
-    bool writeNumber(Int16 /* value */) { return false; }
-    bool writeNumber(UInt16 /* value */) { return false; }
-    bool writeNumber(Int32 /* value */) { return false; }
-    bool writeNumber(UInt32 /* value */) { return false; }
-    bool writeNumber(Int64 /* value */) { return false; }
-    bool writeNumber(UInt64 /* value */) { return false; }
-    bool writeNumber(Int128 /* value */) { return false; }
-    bool writeNumber(UInt128 /* value */) { return false; }
-    bool writeNumber(Int256 /* value */) { return false; }
-    bool writeNumber(UInt256 /* value */) { return false; }
-    bool writeNumber(Float32 /* value */) { return false; }
-    bool writeNumber(Float64 /* value */) { return false; }
-    bool writeString(const StringRef & /* value */) { return false; }
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int8>> & /* name_value_pairs */) {}
-    void prepareEnumMapping(const std::vector<std::pair<std::string, Int16>> & /* name_value_pairs */) {}
-    bool writeEnum(Int8 /* value */) { return false; }
-    bool writeEnum(Int16 /* value */) { return false; }
-    bool writeUUID(const UUID & /* value */) { return false; }
-    bool writeDate(DayNum /* date */) { return false; }
-    bool writeDateTime(time_t /* tm */) { return false; }
-    bool writeDateTime64(DateTime64 /*tm*/, UInt32 /*scale*/) { return false; }
-    bool writeDecimal(Decimal32 /* decimal */, UInt32 /* scale */) { return false; }
-    bool writeDecimal(Decimal64 /* decimal */, UInt32 /* scale */) { return false; }
-    bool writeDecimal(const Decimal128 & /* decimal */, UInt32 /* scale */) { return false; }
-    bool writeDecimal(const Decimal256 & /* decimal */, UInt32 /* scale */) { return false; }
-    bool writeAggregateFunction(const AggregateFunctionPtr & /* function */, ConstAggregateDataPtr /* place */) { return false; }
+    ProtobufWriter(WriteBuffer & out_);
+    ~ProtobufWriter();
+
+    void startMessage();
+    void endMessage(bool with_length_delimiter);
+
+    void startNestedMessage();
+    void endNestedMessage(int field_number, bool is_group, bool skip_if_empty);
+
+    void writeInt(int field_number, Int64 value);
+    void writeUInt(int field_number, UInt64 value);
+    void writeSInt(int field_number, Int64 value);
+    template <typename T>
+    void writeFixed(int field_number, T value);
+    void writeString(int field_number, const std::string_view & str);
+
+    void startRepeatedPack();
+    void endRepeatedPack(int field_number, bool skip_if_empty);
+
+private:
+    struct Piece
+    {
+        size_t start;
+        size_t end;
+        Piece(size_t start_, size_t end_) : start(start_), end(end_) {}
+        Piece() = default;
+    };
+
+    struct NestedInfo
+    {
+        size_t num_pieces_at_start;
+        size_t num_bytes_skipped_at_start;
+        NestedInfo(size_t num_pieces_at_start_, size_t num_bytes_skipped_at_start_)
+            : num_pieces_at_start(num_pieces_at_start_), num_bytes_skipped_at_start(num_bytes_skipped_at_start_)
+        {
+        }
+    };
+
+    WriteBuffer & out;
+    PODArray<UInt8> buffer;
+    std::vector<Piece> pieces;
+    size_t current_piece_start = 0;
+    size_t num_bytes_skipped = 0;
+    std::vector<NestedInfo> nested_infos;
+    bool in_repeated_pack = false;
 };
 
 }
diff --git a/src/Formats/ya.make b/src/Formats/ya.make
index 6b72ec397d5..8fe938be125 100644
--- a/src/Formats/ya.make
+++ b/src/Formats/ya.make
@@ -20,9 +20,9 @@ SRCS(
     NativeFormat.cpp
     NullFormat.cpp
     ParsedTemplateFormatString.cpp
-    ProtobufColumnMatcher.cpp
     ProtobufReader.cpp
     ProtobufSchemas.cpp
+    ProtobufSerializer.cpp
     ProtobufWriter.cpp
     registerFormats.cpp
     verbosePrintString.cpp
diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
index d1420d0d38e..22a758b80f6 100644
--- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
@@ -1,57 +1,48 @@
 #include "ProtobufRowInputFormat.h"
 
 #if USE_PROTOBUF
-#include <Core/Block.h>
-#include <Formats/FormatFactory.h>
-#include <Formats/FormatSchemaInfo.h>
-#include <Formats/ProtobufSchemas.h>
-#include <Interpreters/Context.h>
+#   include <Core/Block.h>
+#   include <Formats/FormatFactory.h>
+#   include <Formats/FormatSchemaInfo.h>
+#   include <Formats/ProtobufReader.h>
+#   include <Formats/ProtobufSchemas.h>
+#   include <Formats/ProtobufSerializer.h>
+#   include <Interpreters/Context.h>
+#   include <ext/range.h>
 
 
 namespace DB
 {
-
-ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSchemaInfo & info_, const bool use_length_delimiters_)
+ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_)
     : IRowInputFormat(header_, in_, params_)
-    , data_types(header_.getDataTypes())
-    , reader(in, ProtobufSchemas::instance().getMessageTypeForFormatSchema(info_), header_.getNames(), use_length_delimiters_)
+    , reader(std::make_unique<ProtobufReader>(in_))
+    , serializer(ProtobufSerializer::create(
+          header_.getNames(),
+          header_.getDataTypes(),
+          missing_column_indices,
+          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_),
+          with_length_delimiter_,
+         *reader))
 {
 }
 
 ProtobufRowInputFormat::~ProtobufRowInputFormat() = default;
 
-bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & extra)
+bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & row_read_extension)
 {
-    if (!reader.startMessage())
-        return false; // EOF reached, no more messages.
+    if (reader->eof())
+        return false;
 
-    // Set of columns for which the values were read. The rest will be filled with default values.
-    auto & read_columns = extra.read_columns;
-    read_columns.assign(columns.size(), false);
+    size_t row_num = columns.empty() ? 0 : columns[0]->size();
+    if (!row_num)
+        serializer->setColumns(columns.data(), columns.size());
 
-    // Read values from this message and put them to the columns while it's possible.
-    size_t column_index;
-    while (reader.readColumnIndex(column_index))
-    {
-        bool allow_add_row = !static_cast<bool>(read_columns[column_index]);
-        do
-        {
-            bool row_added;
-            data_types[column_index]->deserializeProtobuf(*columns[column_index], reader, allow_add_row, row_added);
-            if (row_added)
-            {
-                read_columns[column_index] = true;
-                allow_add_row = false;
-            }
-        } while (reader.canReadMoreValues());
-    }
+    serializer->readRow(row_num);
 
-    // Fill non-visited columns with the default values.
-    for (column_index = 0; column_index < read_columns.size(); ++column_index)
-        if (!read_columns[column_index])
-            data_types[column_index]->insertDefaultInto(*columns[column_index]);
-
-    reader.endMessage();
+    row_read_extension.read_columns.clear();
+    row_read_extension.read_columns.resize(columns.size(), true);
+    for (size_t column_idx : missing_column_indices)
+        row_read_extension.read_columns[column_idx] = false;
     return true;
 }
 
@@ -62,14 +53,14 @@ bool ProtobufRowInputFormat::allowSyncAfterError() const
 
 void ProtobufRowInputFormat::syncAfterError()
 {
-    reader.endMessage(true);
+    reader->endMessage(true);
 }
 
 void registerInputFormatProcessorProtobuf(FormatFactory & factory)
 {
-    for (bool use_length_delimiters : {false, true})
+    for (bool with_length_delimiter : {false, true})
     {
-        factory.registerInputFormatProcessor(use_length_delimiters ? "Protobuf" : "ProtobufSingle", [use_length_delimiters](
+        factory.registerInputFormatProcessor(with_length_delimiter ? "Protobuf" : "ProtobufSingle", [with_length_delimiter](
             ReadBuffer & buf,
             const Block & sample,
             IRowInputFormat::Params params,
@@ -78,7 +69,7 @@ void registerInputFormatProcessorProtobuf(FormatFactory & factory)
             return std::make_shared<ProtobufRowInputFormat>(buf, sample, std::move(params),
                 FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true,
                                 settings.schema.is_server, settings.schema.format_schema_path),
-                use_length_delimiters);
+                with_length_delimiter);
         });
     }
 }
diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
index c6bc350e893..b2eabd4f37c 100644
--- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
+++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
@@ -5,14 +5,14 @@
 #endif
 
 #if USE_PROTOBUF
-#    include <DataTypes/IDataType.h>
-#    include <Formats/ProtobufReader.h>
 #    include <Processors/Formats/IRowInputFormat.h>
 
 namespace DB
 {
 class Block;
 class FormatSchemaInfo;
+class ProtobufReader;
+class ProtobufSerializer;
 
 
 /** Stream designed to deserialize data from the google protobuf format.
@@ -29,18 +29,19 @@ class FormatSchemaInfo;
 class ProtobufRowInputFormat : public IRowInputFormat
 {
 public:
-    ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSchemaInfo & info_, const bool use_length_delimiters_);
+    ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_);
     ~ProtobufRowInputFormat() override;
 
     String getName() const override { return "ProtobufRowInputFormat"; }
 
-    bool readRow(MutableColumns & columns, RowReadExtension & extra) override;
+    bool readRow(MutableColumns & columns, RowReadExtension &) override;
     bool allowSyncAfterError() const override;
     void syncAfterError() override;
 
 private:
-    DataTypes data_types;
-    ProtobufReader reader;
+    std::unique_ptr<ProtobufReader> reader;
+    std::vector<size_t> missing_column_indices;
+    std::unique_ptr<ProtobufSerializer> serializer;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
index 3c885e80e31..d3b9a0124c1 100644
--- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
@@ -1,13 +1,13 @@
-#include <Formats/FormatFactory.h>
 #include "ProtobufRowOutputFormat.h"
 
 #if USE_PROTOBUF
-
-#include <Core/Block.h>
-#include <Formats/FormatSchemaInfo.h>
-#include <Formats/ProtobufSchemas.h>
-#include <Interpreters/Context.h>
-#include <google/protobuf/descriptor.h>
+#   include <Formats/FormatFactory.h>
+#   include <Core/Block.h>
+#   include <Formats/FormatSchemaInfo.h>
+#   include <Formats/ProtobufSchemas.h>
+#   include <Formats/ProtobufSerializer.h>
+#   include <Formats/ProtobufWriter.h>
+#   include <google/protobuf/descriptor.h>
 
 
 namespace DB
@@ -20,58 +20,55 @@ namespace ErrorCodes
 
 ProtobufRowOutputFormat::ProtobufRowOutputFormat(
     WriteBuffer & out_,
-    const Block & header,
+    const Block & header_,
     const RowOutputFormatParams & params_,
-    const FormatSchemaInfo & format_schema,
-    const FormatSettings & settings)
-    : IRowOutputFormat(header, out_, params_)
-    , data_types(header.getDataTypes())
-    , writer(out,
-        ProtobufSchemas::instance().getMessageTypeForFormatSchema(format_schema),
-        header.getNames(), settings.protobuf.write_row_delimiters)
-    , allow_only_one_row(
-        !settings.protobuf.write_row_delimiters
-            && !settings.protobuf.allow_many_rows_no_delimiters)
+    const FormatSchemaInfo & schema_info_,
+    const FormatSettings & settings_,
+    bool with_length_delimiter_)
+    : IRowOutputFormat(header_, out_, params_)
+    , writer(std::make_unique<ProtobufWriter>(out))
+    , serializer(ProtobufSerializer::create(
+          header_.getNames(),
+          header_.getDataTypes(),
+          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_),
+          with_length_delimiter_,
+          *writer))
+    , allow_multiple_rows(with_length_delimiter_ || settings_.protobuf.allow_multiple_rows_without_delimiter)
 {
-    value_indices.resize(header.columns());
 }
 
 void ProtobufRowOutputFormat::write(const Columns & columns, size_t row_num)
 {
-    if (allow_only_one_row && !first_row)
-    {
-        throw Exception("The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.", ErrorCodes::NO_ROW_DELIMITER);
-    }
+    if (!allow_multiple_rows && !first_row)
+        throw Exception(
+            "The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.",
+            ErrorCodes::NO_ROW_DELIMITER);
 
-    writer.startMessage();
-    std::fill(value_indices.begin(), value_indices.end(), 0);
-    size_t column_index;
-    while (writer.writeField(column_index))
-        data_types[column_index]->serializeProtobuf(
-                *columns[column_index], row_num, writer, value_indices[column_index]);
-    writer.endMessage();
+    if (!row_num)
+        serializer->setColumns(columns.data(), columns.size());
+
+    serializer->writeRow(row_num);
 }
 
 
 void registerOutputFormatProcessorProtobuf(FormatFactory & factory)
 {
-    for (bool write_row_delimiters : {false, true})
+    for (bool with_length_delimiter : {false, true})
     {
         factory.registerOutputFormatProcessor(
-            write_row_delimiters ? "Protobuf" : "ProtobufSingle",
-            [write_row_delimiters](WriteBuffer & buf,
+            with_length_delimiter ? "Protobuf" : "ProtobufSingle",
+            [with_length_delimiter](WriteBuffer & buf,
                const Block & header,
                const RowOutputFormatParams & params,
-               const FormatSettings & _settings)
+               const FormatSettings & settings)
             {
-                FormatSettings settings = _settings;
-                settings.protobuf.write_row_delimiters = write_row_delimiters;
                 return std::make_shared<ProtobufRowOutputFormat>(
                     buf, header, params,
                     FormatSchemaInfo(settings.schema.format_schema, "Protobuf",
                         true, settings.schema.is_server,
                         settings.schema.format_schema_path),
-                    settings);
+                    settings,
+                    with_length_delimiter);
             });
     }
 }
diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h
index 847f7607ff5..5f82950e891 100644
--- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h
+++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h
@@ -8,21 +8,16 @@
 #    include <Core/Block.h>
 #    include <Formats/FormatSchemaInfo.h>
 #    include <Formats/FormatSettings.h>
-#    include <Formats/ProtobufWriter.h>
 #    include <Processors/Formats/IRowOutputFormat.h>
 
 
-namespace google
-{
-namespace protobuf
-{
-    class Message;
-}
-}
-
-
 namespace DB
 {
+class ProtobufWriter;
+class ProtobufSerializer;
+class FormatSchemaInfo;
+struct FormatSettings;
+
 /** Stream designed to serialize data in the google protobuf format.
   * Each row is written as a separated message.
   *
@@ -38,10 +33,11 @@ class ProtobufRowOutputFormat : public IRowOutputFormat
 public:
     ProtobufRowOutputFormat(
         WriteBuffer & out_,
-        const Block & header,
+        const Block & header_,
         const RowOutputFormatParams & params_,
-        const FormatSchemaInfo & format_schema,
-        const FormatSettings & settings);
+        const FormatSchemaInfo & schema_info_,
+        const FormatSettings & settings_,
+        bool with_length_delimiter_);
 
     String getName() const override { return "ProtobufRowOutputFormat"; }
 
@@ -50,10 +46,9 @@ public:
     std::string getContentType() const override { return "application/octet-stream"; }
 
 private:
-    DataTypes data_types;
-    ProtobufWriter writer;
-    std::vector<size_t> value_indices;
-    const bool allow_only_one_row;
+    std::unique_ptr<ProtobufWriter> writer;
+    std::unique_ptr<ProtobufSerializer> serializer;
+    const bool allow_multiple_rows;
 };
 
 }
diff --git a/src/Storages/Kafka/KafkaBlockOutputStream.cpp b/src/Storages/Kafka/KafkaBlockOutputStream.cpp
index cfbb7ad2523..2cb0fd98c71 100644
--- a/src/Storages/Kafka/KafkaBlockOutputStream.cpp
+++ b/src/Storages/Kafka/KafkaBlockOutputStream.cpp
@@ -26,7 +26,7 @@ void KafkaBlockOutputStream::writePrefix()
     buffer = storage.createWriteBuffer(getHeader());
 
     auto format_settings = getFormatSettings(*context);
-    format_settings.protobuf.allow_many_rows_no_delimiters = true;
+    format_settings.protobuf.allow_multiple_rows_without_delimiter = true;
 
     child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer,
         getHeader(), *context,
diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp
index d239586bb65..a987fff3c64 100644
--- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp
@@ -34,7 +34,7 @@ void RabbitMQBlockOutputStream::writePrefix()
     buffer->activateWriting();
 
     auto format_settings = getFormatSettings(context);
-    format_settings.protobuf.allow_many_rows_no_delimiters = true;
+    format_settings.protobuf.allow_multiple_rows_without_delimiter = true;
 
     child = FormatFactory::instance().getOutputStream(storage.getFormatName(), *buffer,
         getHeader(), context,
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto
new file mode 100644
index 00000000000..8673924c929
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto
@@ -0,0 +1,14 @@
+syntax = "proto3";
+
+message ABC
+{
+    message nested
+    {
+        message nested
+        {
+            repeated int32 c = 1;
+        }
+        repeated nested b = 1;
+    }
+    repeated nested a = 1;
+}
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.reference b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.reference
new file mode 100644
index 00000000000..69e7d5e1da8
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.reference
@@ -0,0 +1,52 @@
+[[],[[]],[[1]],[[2,3],[4]]]
+[[[5,6,7]],[[8,9,10]]]
+
+Binary representation:
+00000000  1a 0a 00 0a 02 0a 00 0a  05 0a 03 0a 01 01 0a 0b  |................|
+00000010  0a 04 0a 02 02 03 0a 03  0a 01 04 12 0a 07 0a 05  |................|
+00000020  0a 03 05 06 07 0a 07 0a  05 0a 03 08 09 0a        |..............|
+0000002e
+
+MESSAGE #1 AT 0x00000001
+a {
+}
+a {
+  b {
+  }
+}
+a {
+  b {
+    c: 1
+  }
+}
+a {
+  b {
+    c: 2
+    c: 3
+  }
+  b {
+    c: 4
+  }
+}
+MESSAGE #2 AT 0x0000001C
+a {
+  b {
+    c: 5
+    c: 6
+    c: 7
+  }
+}
+a {
+  b {
+    c: 8
+    c: 9
+    c: 10
+  }
+}
+
+Binary representation is as expected
+
+[[],[[]],[[1]],[[2,3],[4]]]
+[[[5,6,7]],[[8,9,10]]]
+[[],[[]],[[1]],[[2,3],[4]]]
+[[[5,6,7]],[[8,9,10]]]
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
new file mode 100755
index 00000000000..903217ca939
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+DROP TABLE IF EXISTS array_3dim_protobuf_00825;
+
+CREATE TABLE array_3dim_protobuf_00825
+(
+    `a_b_c` Array(Array(Array(Int32)))
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO array_3dim_protobuf_00825 VALUES ([[], [[]], [[1]], [[2,3],[4]]]), ([[[5, 6, 7]], [[8, 9, 10]]]);
+
+SELECT * FROM array_3dim_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_3dim.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_3dim:ABC'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_3dim:ABC" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_3dim:ABC'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto
new file mode 100644
index 00000000000..8f84164da2a
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto
@@ -0,0 +1,9 @@
+syntax = "proto3";
+
+message AA {
+    message nested_array {
+	    repeated double c = 2;
+    }
+    string a = 1;
+    repeated nested_array b = 2;
+}
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.reference b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.reference
new file mode 100644
index 00000000000..5ea6780a3ba
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.reference
@@ -0,0 +1,41 @@
+one	[[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
+
+Binary representation:
+00000000  6b 0a 03 6f 6e 65 12 1a  12 18 00 00 00 00 00 00  |k..one..........|
+00000010  f0 3f 00 00 00 00 00 00  00 40 00 00 00 00 00 00  |.?.......@......|
+00000020  08 40 12 12 12 10 00 00  00 00 00 00 e0 3f 00 00  |.@...........?..|
+00000030  00 00 00 00 d0 3f 12 00  12 12 12 10 00 00 00 00  |.....?..........|
+00000040  00 00 10 40 00 00 00 00  00 00 14 40 12 12 12 10  |...@.......@....|
+00000050  00 00 00 00 00 00 c0 3f  00 00 00 00 00 00 b0 3f  |.......?.......?|
+00000060  12 0a 12 08 00 00 00 00  00 00 18 40              |...........@|
+0000006c
+
+MESSAGE #1 AT 0x00000001
+a: "one"
+b {
+  c: 1
+  c: 2
+  c: 3
+}
+b {
+  c: 0.5
+  c: 0.25
+}
+b {
+}
+b {
+  c: 4
+  c: 5
+}
+b {
+  c: 0.125
+  c: 0.0625
+}
+b {
+  c: 6
+}
+
+Binary representation is as expected
+
+one	[[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
+one	[[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]
diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
new file mode 100755
index 00000000000..0b386723091
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+# https://github.com/ClickHouse/ClickHouse/issues/9069
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+CREATE TABLE array_of_arrays_protobuf_00825
+(
+    `a` String,
+    `b` Nested (
+        `c` Array(Float64)
+    )
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO array_of_arrays_protobuf_00825 VALUES ('one', [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]);
+
+SELECT * FROM array_of_arrays_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_of_arrays.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_of_arrays:AA'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_of_arrays:AA" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_of_arrays:AA'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto
new file mode 100644
index 00000000000..ba558dbbadb
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto
@@ -0,0 +1,13 @@
+syntax = "proto3";
+
+message Message
+{
+  enum Enum
+  {
+    FIRST = 0;
+    SECOND = 1;
+    TEN = 10;
+    HUNDRED = 100;
+  };
+  Enum x = 1;
+};
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.reference b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.reference
new file mode 100644
index 00000000000..ef8059bac28
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.reference
@@ -0,0 +1,31 @@
+Second
+Third
+First
+First
+Second
+
+Binary representation:
+00000000  02 08 01 02 08 64 00 00  02 08 01                 |.....d.....|
+0000000b
+
+MESSAGE #1 AT 0x00000001
+x: SECOND
+MESSAGE #2 AT 0x00000004
+x: HUNDRED
+MESSAGE #3 AT 0x00000007
+MESSAGE #4 AT 0x00000008
+MESSAGE #5 AT 0x00000009
+x: SECOND
+
+Binary representation is as expected
+
+Second
+Third
+First
+First
+Second
+Second
+Third
+First
+First
+Second
diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
new file mode 100755
index 00000000000..cbb387a62a5
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# https://github.com/ClickHouse/ClickHouse/issues/7438
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+DROP TABLE IF EXISTS enum_mapping_protobuf_00825;
+
+CREATE TABLE enum_mapping_protobuf_00825
+(
+  x Enum16('First'=-100, 'Second'=0, 'Third'=100)
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO enum_mapping_protobuf_00825 VALUES ('Second'), ('Third'), ('First'), ('First'), ('Second');
+
+SELECT * FROM enum_mapping_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_enum_mapping.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_enum_mapping:Message'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_enum_mapping:Message" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_enum_mapping:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.proto b/tests/queries/0_stateless/00825_protobuf_format_map.proto
new file mode 100644
index 00000000000..561b409b733
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_map.proto
@@ -0,0 +1,5 @@
+syntax = "proto3";
+
+message Message {
+  map<string, uint32> a = 1;
+};
diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.reference b/tests/queries/0_stateless/00825_protobuf_format_map.reference
new file mode 100644
index 00000000000..e3f17cb1095
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_map.reference
@@ -0,0 +1,19 @@
+{'x':5,'y':7}
+{'z':11}
+{'temp':0}
+{'':0}
+
+Binary representation:
+00000000  0e 0a 05 0a 01 78 10 05  0a 05 0a 01 79 10 07 07  |.....x......y...|
+00000010  0a 05 0a 01 7a 10 0b 0a  0a 08 0a 04 74 65 6d 70  |....z.......temp|
+00000020  10 00 06 0a 04 0a 00 10  00                       |.........|
+00000029
+
+{'x':5,'y':7}
+{'z':11}
+{'temp':0}
+{'':0}
+{'x':5,'y':7}
+{'z':11}
+{'temp':0}
+{'':0}
diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.sh b/tests/queries/0_stateless/00825_protobuf_format_map.sh
new file mode 100755
index 00000000000..5df25c41750
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_map.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+# https://github.com/ClickHouse/ClickHouse/issues/6497
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+SET allow_experimental_map_type = 1;
+
+DROP TABLE IF EXISTS map_00825;
+
+CREATE TABLE map_00825
+(
+  a Map(String, UInt32)
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO map_00825 VALUES ({'x':5, 'y':7}), ({'z':11}), ({'temp':0}), ({'':0});
+
+SELECT * FROM map_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_map.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_map:Message'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+echo "Binary representation:"
+hexdump -C $BINARY_FILE_PATH
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO map_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_map:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825"
+
+rm "$BINARY_FILE_PATH"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto
new file mode 100644
index 00000000000..052741f504b
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto
@@ -0,0 +1,10 @@
+syntax = "proto3";
+
+message Repeated {
+  string foo = 1;
+  int64 bar = 2;
+}
+
+message Message {
+  repeated Repeated messages = 1;
+};
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.reference b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.reference
new file mode 100644
index 00000000000..6cdd56a5b7f
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.reference
@@ -0,0 +1,25 @@
+['1']	[0]
+['1','']	[0,1]
+
+Binary representation:
+00000000  05 0a 03 0a 01 31 09 0a  03 0a 01 31 0a 02 10 01  |.....1.....1....|
+00000010
+
+MESSAGE #1 AT 0x00000001
+messages {
+  foo: "1"
+}
+MESSAGE #2 AT 0x00000007
+messages {
+  foo: "1"
+}
+messages {
+  bar: 1
+}
+
+Binary representation is as expected
+
+['1']	[0]
+['1','']	[0,1]
+['1']	[0]
+['1','']	[0,1]
diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
new file mode 100755
index 00000000000..58ded92f2c1
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# https://github.com/ClickHouse/ClickHouse/issues/6497
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+DROP TABLE IF EXISTS nested_optional_protobuf_00825;
+
+CREATE TABLE nested_optional_protobuf_00825
+(
+  messages Nested
+  (
+    foo String,
+    bar Int64
+  )
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO nested_optional_protobuf_00825 VALUES (['1'], [0]), (['1', ''], [0, 1]);
+
+SELECT * FROM nested_optional_protobuf_00825;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_nested_optional.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_nested_optional:Message'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_nested_optional:Message" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_nested_optional:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825"
+
+rm "$BINARY_FILE_PATH"
diff --git a/tests/queries/0_stateless/00825_protobuf_format_table_default.proto b/tests/queries/0_stateless/00825_protobuf_format_table_default.proto
new file mode 100644
index 00000000000..08e6049ffe0
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.proto
@@ -0,0 +1,6 @@
+syntax = "proto3";
+
+message Message {
+  sint32 x = 1;
+  sint32 z = 2;
+};
\ No newline at end of file
diff --git a/tests/queries/0_stateless/00825_protobuf_format_table_default.reference b/tests/queries/0_stateless/00825_protobuf_format_table_default.reference
new file mode 100644
index 00000000000..5472f3bfa14
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.reference
@@ -0,0 +1,37 @@
+0	0	0
+2	4	8
+3	9	27
+5	25	125
+101	102	103
+
+Binary representation:
+00000000  00 04 08 04 10 10 04 08  06 10 36 05 08 0a 10 fa  |..........6.....|
+00000010  01 06 08 ca 01 10 ce 01                           |........|
+00000018
+
+MESSAGE #1 AT 0x00000001
+MESSAGE #2 AT 0x00000002
+x: 2
+z: 8
+MESSAGE #3 AT 0x00000007
+x: 3
+z: 27
+MESSAGE #4 AT 0x0000000C
+x: 5
+z: 125
+MESSAGE #5 AT 0x00000012
+x: 101
+z: 103
+
+Binary representation is as expected
+
+0	0	0
+0	0	0
+2	4	8
+2	4	8
+3	9	27
+3	9	27
+5	25	125
+5	25	125
+101	102	103
+101	10201	103
diff --git a/tests/queries/0_stateless/00825_protobuf_format_table_default.sh b/tests/queries/0_stateless/00825_protobuf_format_table_default.sh
new file mode 100755
index 00000000000..97f7769269a
--- /dev/null
+++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -eo pipefail
+
+# Run the client.
+$CLICKHOUSE_CLIENT --multiquery <<'EOF'
+DROP TABLE IF EXISTS table_default_protobuf_00825;
+
+CREATE TABLE table_default_protobuf_00825
+(
+  x Int64,
+  y Int64 DEFAULT x * x,
+  z Int64 DEFAULT x * x * x
+) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO table_default_protobuf_00825 (x) VALUES (0), (2), (3), (5);
+INSERT INTO table_default_protobuf_00825 VALUES (101, 102, 103);
+
+SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z;
+EOF
+
+BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_table_default.XXXXXX.binary")
+$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_table_default:Message'" > "$BINARY_FILE_PATH"
+
+# Check the output in the protobuf format
+echo
+$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_table_default:Message" --input "$BINARY_FILE_PATH"
+
+# Check the input in the protobuf format (now the table contains the same data twice).
+echo
+$CLICKHOUSE_CLIENT --query "INSERT INTO table_default_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_table_default:Message'" < "$BINARY_FILE_PATH"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z"
+
+rm "$BINARY_FILE_PATH"
diff --git a/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py b/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py
new file mode 100755
index 00000000000..3ed42f1c820
--- /dev/null
+++ b/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+
+# The protobuf compiler protoc doesn't support encoding or decoding length-delimited protobuf message.
+# To do that this script has been written. 
+
+import argparse
+import os.path
+import struct
+import subprocess
+import sys
+import tempfile
+
+def read_varint(input):
+    res = 0
+    shift = 0
+    while True:
+        c = input.read(1)
+        if len(c) == 0:
+            return None
+        b = c[0]
+        if b < 0x80:
+            res += b << shift
+            break
+        b -= 0x80
+        res += b << shift
+        shift = shift << 7
+    return res
+
+def write_varint(output, value):
+    while True:
+        if value < 0x80:
+            b = value
+            output.write(b.to_bytes(1, byteorder='little'))
+            break
+        b = (value & 0x7F) + 0x80
+        output.write(b.to_bytes(1, byteorder='little'))
+        value = value >> 7
+
+def write_hexdump(output, data):
+    with subprocess.Popen(["hexdump", "-C"], stdin=subprocess.PIPE, stdout=output, shell=False) as proc:
+        proc.communicate(data)
+        if proc.returncode != 0:
+            raise RuntimeError("hexdump returned code " + str(proc.returncode))
+    output.flush()
+
+class FormatSchemaSplitted:
+    def __init__(self, format_schema):
+        self.format_schema = format_schema
+        splitted = self.format_schema.split(':')
+        if len(splitted) < 2:
+            raise RuntimeError('The format schema must have the format "schemafile:MessageType"')
+        path = splitted[0]
+        self.schemadir = os.path.dirname(path)
+        self.schemaname = os.path.basename(path)
+        if not self.schemaname.endswith(".proto"):
+            self.schemaname = self.schemaname + ".proto"
+        self.message_type = splitted[1]
+
+def decode(input, output, format_schema):
+    if not type(format_schema) is FormatSchemaSplitted:
+        format_schema = FormatSchemaSplitted(format_schema)
+    msgindex = 1
+    while True:
+        sz = read_varint(input)
+        if sz is None:
+            break
+        output.write("MESSAGE #{msgindex} AT 0x{msgoffset:08X}\n".format(msgindex=msgindex, msgoffset=input.tell()).encode())
+        output.flush()
+        msg = input.read(sz)
+        if len(msg) < sz:
+            raise EOFError('Unexpected end of file')
+        with subprocess.Popen(["protoc",
+                                "--decode", format_schema.message_type, format_schema.schemaname],
+                              cwd=format_schema.schemadir,
+                              stdin=subprocess.PIPE,
+                              stdout=output,
+                              shell=False) as proc:
+            proc.communicate(msg)
+            if proc.returncode != 0:
+                raise RuntimeError("protoc returned code " + str(proc.returncode))
+        output.flush()
+        msgindex = msgindex + 1
+
+def encode(input, output, format_schema):
+    if not type(format_schema) is FormatSchemaSplitted:
+        format_schema = FormatSchemaSplitted(format_schema)
+    line_offset = input.tell()
+    line = input.readline()
+    while True:
+        if len(line) == 0:
+            break
+        if not line.startswith(b"MESSAGE #"):
+            raise RuntimeError("The line at 0x{line_offset:08X} must start with the text 'MESSAGE #'".format(line_offset=line_offset))
+        msg = b""
+        while True:
+            line_offset = input.tell()
+            line = input.readline()
+            if line.startswith(b"MESSAGE #") or len(line) == 0:
+                break
+            msg += line
+        with subprocess.Popen(["protoc",
+                                "--encode", format_schema.message_type, format_schema.schemaname],
+                              cwd=format_schema.schemadir,
+                              stdin=subprocess.PIPE,
+                              stdout=subprocess.PIPE,
+                              shell=False) as proc:
+            msgbin = proc.communicate(msg)[0]
+            if proc.returncode != 0:
+                raise RuntimeError("protoc returned code " + str(proc.returncode))
+        write_varint(output, len(msgbin))
+        output.write(msgbin)
+        output.flush()
+
+def decode_and_check(input, output, format_schema):
+    input_data = input.read()
+    output.write(b"Binary representation:\n")
+    output.flush()
+    write_hexdump(output, input_data)
+    output.write(b"\n")
+    output.flush()
+
+    with tempfile.TemporaryFile() as tmp_input, tempfile.TemporaryFile() as tmp_decoded, tempfile.TemporaryFile() as tmp_encoded:
+        tmp_input.write(input_data)
+        tmp_input.flush()
+        tmp_input.seek(0)
+        decode(tmp_input, tmp_decoded, format_schema)
+        tmp_decoded.seek(0)
+        decoded_text = tmp_decoded.read()
+        output.write(decoded_text)
+        output.flush()
+        tmp_decoded.seek(0)
+        encode(tmp_decoded, tmp_encoded, format_schema)
+        tmp_encoded.seek(0)
+        encoded_data = tmp_encoded.read()
+
+    if encoded_data == input_data:
+        output.write(b"\nBinary representation is as expected\n")
+        output.flush()
+    else:
+        output.write(b"\nBinary representation differs from the expected one (listed below):\n")
+        output.flush()
+        write_hexdump(output, encoded_data)
+        sys.exit(1)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Encodes or decodes length-delimited protobuf messages.')
+    parser.add_argument('--input', help='The input file, the standard input will be used if not specified.')
+    parser.add_argument('--output', help='The output file, the standard output will be used if not specified')
+    parser.add_argument('--format_schema', required=True, help='Format schema in the format "schemafile:MessageType"')
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument('--encode', action='store_true', help='Specify to encode length-delimited messages.'
+                       'The utility will read text-format messages of the given type from the input and write it in binary to the output.')
+    group.add_argument('--decode', action='store_true', help='Specify to decode length-delimited messages.'
+                       'The utility will read messages in binary from the input and write text-format messages to the output.')
+    group.add_argument('--decode_and_check', action='store_true', help='The same as --decode, and the utility will then encode '
+                       ' the decoded data back to the binary form to check that the result of that encoding is the same as the input was.')
+    args = parser.parse_args()
+    
+    custom_input_file = None
+    custom_output_file = None
+    try:
+        if args.input:
+            custom_input_file = open(args.input, "rb")
+        if args.output:
+            custom_output_file = open(args.output, "wb")
+        input = custom_input_file if custom_input_file else sys.stdin.buffer
+        output = custom_output_file if custom_output_file else sys.stdout.buffer
+
+        if args.encode:
+            encode(input, output, args.format_schema)
+        elif args.decode:
+            decode(input, output, args.format_schema)
+        elif args.decode_and_check:
+            decode_and_check(input, output, args.format_schema)
+
+    finally:
+        if custom_input_file:
+            custom_input_file.close()
+        if custom_output_file:
+            custom_output_file.close()
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index ee25bee6a0a..0e470e14916 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -131,6 +131,12 @@
         "00763_create_query_as_table_engine_bug",
         "00765_sql_compatibility_aliases",
         "00825_protobuf_format_input",
+        "00825_protobuf_format_nested_optional",
+        "00825_protobuf_format_array_3dim",
+        "00825_protobuf_format_map",
+        "00825_protobuf_format_array_of_arrays",
+        "00825_protobuf_format_table_default",
+        "00825_protobuf_format_enum_mapping",
         "00826_cross_to_inner_join",
         "00834_not_between",
         "00909_kill_not_initialized_query",

From 3891dd62842b1b3d6fa8483cbc26537d2d0923ba Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Wed, 17 Feb 2021 21:23:27 +0300
Subject: [PATCH 1133/1238] Update InterpreterSelectQuery.cpp

---
 src/Interpreters/InterpreterSelectQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index a325a8d3328..9f97160f77f 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -784,7 +784,7 @@ static bool hasWithTotalsInAnySubqueryInFromClause(const ASTSelectQuery & query)
     {
         if (const auto * ast_union = query_table->as<ASTSelectWithUnionQuery>())
         {
-            ///NOTE: Child of subquery can be ASTSelectWithUnionQuery or ASTSelectQuery,
+            /// NOTE: Child of subquery can be ASTSelectWithUnionQuery or ASTSelectQuery,
             /// and after normalization, the height of the AST tree is at most 2
             for (const auto & elem : ast_union->list_of_selects->children)
             {

From 62486d6e06eb0eb23ab3a0c3b640bb1895a76181 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 17 Feb 2021 18:40:25 +0000
Subject: [PATCH 1134/1238] Add test

---
 .../integration/test_odbc_interaction/test.py | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py
index 084fc407f39..6bb6a6ee777 100644
--- a/tests/integration/test_odbc_interaction/test.py
+++ b/tests/integration/test_odbc_interaction/test.py
@@ -342,3 +342,25 @@ def test_bridge_dies_with_parent(started_cluster):
 
     assert clickhouse_pid is None
     assert bridge_pid is None
+
+
+def test_odbc_postgres_date_data_type(started_cluster):
+    conn = get_postgres_conn();
+    cursor = conn.cursor()
+    cursor.execute("CREATE TABLE IF NOT EXISTS clickhouse.test_date (column1 integer, column2 date)")
+
+    cursor.execute("INSERT INTO clickhouse.test_date VALUES (1, '2020-12-01')")
+    cursor.execute("INSERT INTO clickhouse.test_date VALUES (2, '2020-12-02')")
+    cursor.execute("INSERT INTO clickhouse.test_date VALUES (3, '2020-12-03')")
+    conn.commit()
+
+    node1.query(
+        '''
+        CREATE TABLE test_date (column1 UInt64, column2 Date)
+        ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_date')''')
+
+    expected = '1\t2020-12-01\n2\t2020-12-02\n3\t2020-12-03\n'
+    result = node1.query('SELECT * FROM test_date');
+    assert(result == expected)
+
+

From f483cd091a5dbc71c7e507ab87d0d6fad307eb39 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 14 Feb 2021 23:31:58 +0300
Subject: [PATCH 1135/1238] test/stress: use clickhouse builtin start/stop to
 run server from the same user

This will allow to attach with gdb for better diagnosis.
---
 docker/test/stress/run.sh | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 88a633ac488..44612a83504 100755
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -10,14 +10,7 @@ dpkg -i package_folder/clickhouse-test_*.deb
 
 function stop()
 {
-    timeout 120 service clickhouse-server stop
-
-    # Wait for process to disappear from processlist and also try to kill zombies.
-    while kill -9 "$(pidof clickhouse-server)"
-    do
-        echo "Killed clickhouse-server"
-        sleep 0.5
-    done
+    clickhouse stop
 }
 
 function start()
@@ -33,7 +26,8 @@ function start()
             tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
             break
         fi
-        timeout 120 service clickhouse-server start
+        # use root to match with current uid
+        clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>/var/log/clickhouse-server/stderr.log
         sleep 0.5
         counter=$((counter + 1))
     done

From 63eff6e8c812a8770fc54fa987c68e7fb681abe0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 13 Feb 2021 11:41:00 +0300
Subject: [PATCH 1136/1238] test/stress: improve backtrace catching on server
 failures

Otherwise sometimes stracktraces may be lost [1]:

  [1]: https://clickhouse-test-reports.s3.yandex.net/19580/6aecb62416ece880cbb8ee3a803e14d841388dde/stress_test_(thread).html#fail1
---
 docker/test/stress/run.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 44612a83504..60e9ffd265c 100755
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -31,6 +31,18 @@ function start()
         sleep 0.5
         counter=$((counter + 1))
     done
+
+    echo "
+handle all noprint
+handle SIGSEGV stop print
+handle SIGBUS stop print
+handle SIGABRT stop print
+continue
+thread apply all backtrace
+continue
+" > script.gdb
+
+    gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" &
 }
 
 # install test configs

From 770c3406df6d55541dcb59b9146206b2558cbe86 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 15 Feb 2021 21:02:21 +0300
Subject: [PATCH 1137/1238] test/stress: fix permissions for clickhouse
 directories

---
 docker/test/stress/run.sh | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 60e9ffd265c..dc1e4db4477 100755
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -8,6 +8,20 @@ dpkg -i package_folder/clickhouse-server_*.deb
 dpkg -i package_folder/clickhouse-client_*.deb
 dpkg -i package_folder/clickhouse-test_*.deb
 
+function configure()
+{
+    # install test configs
+    /usr/share/clickhouse-test/config/install.sh
+
+    # for clickhouse-server (via service)
+    echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
+    # for clickhouse-client
+    export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
+
+    # since we run clickhouse from root
+    sudo chown root: /var/lib/clickhouse
+}
+
 function stop()
 {
     clickhouse stop
@@ -45,13 +59,7 @@ continue
     gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" &
 }
 
-# install test configs
-/usr/share/clickhouse-test/config/install.sh
-
-# for clickhouse-server (via service)
-echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
-# for clickhouse-client
-export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
+configure
 
 start
 

From 65f2b6a0449f19e0488c5c66e013e9002b4949d3 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 15 Feb 2021 10:18:37 +0300
Subject: [PATCH 1138/1238] test/fasttest: add gdb into docker image

---
 docker/test/fasttest/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile
index 03b7b2fc53a..64be52d8e30 100644
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@@ -47,6 +47,7 @@ RUN apt-get update \
         expect \
         fakeroot \
         git \
+        gdb \
         gperf \
         lld-${LLVM_VERSION} \
         llvm-${LLVM_VERSION} \

From ee18f6a7ec23304c7ebc5128882d163d510525e0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 14 Feb 2021 23:34:14 +0300
Subject: [PATCH 1139/1238] test/fasttest: collect diagnosis by attaching with
 gdb in background

Otherwise sometimes stacktraces may be lost [1]:

  [1]: https://clickhouse-test-reports.s3.yandex.net/20477/8ad20fcee5aaa642c2a2dd873d02103692d554f4/fast_test.html#fail1
---
 docker/test/fasttest/run.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index e6294b5d74d..fbdad93a553 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -107,6 +107,18 @@ function start_server
     fi
 
     echo "ClickHouse server pid '$server_pid' started and responded"
+
+    echo "
+handle all noprint
+handle SIGSEGV stop print
+handle SIGBUS stop print
+handle SIGABRT stop print
+continue
+thread apply all backtrace
+continue
+" > script.gdb
+
+    gdb -batch -command script.gdb -p "$server_pid" &
 }
 
 function clone_root

From 9b72255ca4fd4d1ec7fd090dd9b39ab16ec6965e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 18 Feb 2021 02:09:46 +0300
Subject: [PATCH 1140/1238] Implement compression for all columns except
 LowCardinality

---
 src/Columns/ColumnArray.cpp       | 25 +++++++++++++-
 src/Columns/ColumnArray.h         |  5 ++-
 src/Columns/ColumnDecimal.cpp     | 25 ++++++++++++++
 src/Columns/ColumnDecimal.h       |  2 ++
 src/Columns/ColumnFixedString.cpp | 30 ++++++++++++++++-
 src/Columns/ColumnFixedString.h   |  2 ++
 src/Columns/ColumnMap.h           |  2 ++
 src/Columns/ColumnNullable.cpp    | 15 +++++++++
 src/Columns/ColumnNullable.h      |  2 ++
 src/Columns/ColumnString.cpp      | 54 +++++++++++++++++++++++++++++++
 src/Columns/ColumnString.h        |  2 ++
 src/Columns/ColumnTuple.cpp       | 24 +++++++++++++-
 src/Columns/ColumnTuple.h         |  1 +
 src/Columns/ColumnUnique.h        |  5 +++
 14 files changed, 188 insertions(+), 6 deletions(-)

diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index 8c0e06424e7..e8a48672435 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -7,6 +7,7 @@
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnsCommon.h>
+#include <Columns/ColumnCompressed.h>
 
 #include <common/unaligned.h>
 #include <common/sort.h>
@@ -369,8 +370,12 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
                                         compare_results, direction, nan_direction_hint);
 }
 
+
+namespace
+{
+
 template <bool positive>
-struct ColumnArray::Cmp
+struct Cmp
 {
     const ColumnArray & parent;
     int nan_direction_hint;
@@ -390,6 +395,9 @@ struct ColumnArray::Cmp
     }
 };
 
+}
+
+
 void ColumnArray::reserve(size_t n)
 {
     getOffsets().reserve(n);
@@ -912,6 +920,21 @@ void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool
         updatePermutationImpl(limit, res, equal_range, Cmp<true>(*this, nan_direction_hint, &collator));
 }
 
+ColumnPtr ColumnArray::compress() const
+{
+    ColumnPtr data_compressed = data->compress();
+    ColumnPtr offsets_compressed = offsets->compress();
+
+    size_t byte_size = data_compressed->byteSize() + offsets_compressed->byteSize();
+
+    return ColumnCompressed::create(size(), byte_size,
+        [data_compressed = std::move(data_compressed), offsets_compressed = std::move(offsets_compressed)]
+        {
+            return ColumnArray::create(data_compressed->decompress(), offsets_compressed->decompress());
+        });
+}
+
+
 ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
 {
     if (replicate_offsets.empty())
diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h
index e81ecbc1ca0..1caaf672d49 100644
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@@ -123,6 +123,8 @@ public:
 
     void gather(ColumnGathererStream & gatherer_stream) override;
 
+    ColumnPtr compress() const override;
+
     void forEachSubcolumn(ColumnCallback callback) override
     {
         callback(offsets);
@@ -183,9 +185,6 @@ private:
 
     template <typename Comparator>
     void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const;
-
-    template <bool positive>
-    struct Cmp;
 };
 
 
diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp
index ddc971032b6..bb61f60706e 100644
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@@ -14,6 +14,7 @@
 
 #include <Columns/ColumnsCommon.h>
 #include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnCompressed.h>
 #include <DataStreams/ColumnGathererStream.h>
 
 
@@ -346,6 +347,30 @@ void ColumnDecimal<T>::gather(ColumnGathererStream & gatherer)
     gatherer.gather(*this);
 }
 
+template <typename T>
+ColumnPtr ColumnDecimal<T>::compress() const
+{
+    size_t source_size = data.size() * sizeof(T);
+
+    /// Don't compress small blocks.
+    if (source_size < 4096) /// A wild guess.
+        return ColumnCompressed::wrap(this->getPtr());
+
+    auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size);
+
+    if (!compressed)
+        return ColumnCompressed::wrap(this->getPtr());
+
+    return ColumnCompressed::create(data.size(), compressed->size(),
+        [compressed = std::move(compressed), column_size = data.size(), scale = this->scale]
+        {
+            auto res = ColumnDecimal<T>::create(column_size, scale);
+            ColumnCompressed::decompressBuffer(
+                compressed->data(), res->getData().data(), compressed->size(), column_size * sizeof(T));
+            return res;
+        });
+}
+
 template <typename T>
 void ColumnDecimal<T>::getExtremes(Field & min, Field & max) const
 {
diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h
index ef841292a7d..5016ddca791 100644
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@@ -172,6 +172,8 @@ public:
         return false;
     }
 
+    ColumnPtr compress() const override;
+
 
     void insertValue(const T value) { data.push_back(value); }
     Container & getData() { return data; }
diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp
index 55e387ff2ee..278c2fef5f8 100644
--- a/src/Columns/ColumnFixedString.cpp
+++ b/src/Columns/ColumnFixedString.cpp
@@ -1,6 +1,7 @@
 #include <Columns/ColumnFixedString.h>
-
 #include <Columns/ColumnsCommon.h>
+#include <Columns/ColumnCompressed.h>
+
 #include <DataStreams/ColumnGathererStream.h>
 #include <IO/WriteHelpers.h>
 #include <Common/Arena.h>
@@ -446,4 +447,31 @@ void ColumnFixedString::getExtremes(Field & min, Field & max) const
     get(max_idx, max);
 }
 
+ColumnPtr ColumnFixedString::compress() const
+{
+    size_t source_size = chars.size() * n;
+
+    /// Don't compress small blocks.
+    if (source_size < 4096) /// A wild guess.
+        return ColumnCompressed::wrap(this->getPtr());
+
+    auto compressed = ColumnCompressed::compressBuffer(chars.data(), source_size);
+
+    if (!compressed)
+        return ColumnCompressed::wrap(this->getPtr());
+
+    size_t column_size = size();
+
+    return ColumnCompressed::create(column_size, compressed->size(),
+        [compressed = std::move(compressed), column_size, n = n]
+        {
+            size_t chars_size = n * column_size;
+            auto res = ColumnFixedString::create(n);
+            res->getChars().resize(chars_size);
+            ColumnCompressed::decompressBuffer(
+                compressed->data(), res->getChars().data(), compressed->size(), chars_size);
+            return res;
+        });
+}
+
 }
diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h
index 286b3a752dc..1bb7f922f3e 100644
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@@ -156,6 +156,8 @@ public:
 
     void gather(ColumnGathererStream & gatherer_stream) override;
 
+    ColumnPtr compress() const override;
+
     void reserve(size_t size) override
     {
         chars.reserve(n * size);
diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h
index c1948491db5..a970f67bd46 100644
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@@ -91,6 +91,8 @@ public:
 
     const ColumnTuple & getNestedData() const { return assert_cast<const ColumnTuple &>(getNestedColumn().getData()); }
     ColumnTuple & getNestedData() { return assert_cast<ColumnTuple &>(getNestedColumn().getData()); }
+
+    ColumnPtr compress() const override { return nested->compress(); }
 };
 
 }
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 35ce005073a..4e5cc2b4cf7 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -7,6 +7,7 @@
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnString.h>
+#include <Columns/ColumnCompressed.h>
 #include <DataStreams/ColumnGathererStream.h>
 
 
@@ -511,6 +512,20 @@ void ColumnNullable::protect()
     getNullMapColumn().protect();
 }
 
+ColumnPtr ColumnNullable::compress() const
+{
+    ColumnPtr nested_compressed = nested_column->compress();
+    ColumnPtr null_map_compressed = null_map->compress();
+
+    size_t byte_size = nested_column->byteSize() + null_map->byteSize();
+
+    return ColumnCompressed::create(size(), byte_size,
+        [nested_column = std::move(nested_column), null_map = std::move(null_map)]
+        {
+            return ColumnNullable::create(nested_column->decompress(), null_map->decompress());
+        });
+}
+
 
 namespace
 {
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index ade2c106627..8d267de8644 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -117,6 +117,8 @@ public:
 
     void gather(ColumnGathererStream & gatherer_stream) override;
 
+    ColumnPtr compress() const override;
+
     void forEachSubcolumn(ColumnCallback callback) override
     {
         callback(nested_column);
diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp
index 00d6349408f..190517bfeb9 100644
--- a/src/Columns/ColumnString.cpp
+++ b/src/Columns/ColumnString.cpp
@@ -2,6 +2,7 @@
 
 #include <Columns/Collator.h>
 #include <Columns/ColumnsCommon.h>
+#include <Columns/ColumnCompressed.h>
 #include <DataStreams/ColumnGathererStream.h>
 #include <Common/Arena.h>
 #include <Common/HashTable/Hash.h>
@@ -525,6 +526,59 @@ void ColumnString::getExtremes(Field & min, Field & max) const
 }
 
 
+ColumnPtr ColumnString::compress() const
+{
+    size_t source_chars_size = chars.size();
+    size_t source_offsets_size = offsets.size() * sizeof(Offset);
+
+    /// Don't compress small blocks.
+    if (source_chars_size < 4096) /// A wild guess.
+        return ColumnCompressed::wrap(this->getPtr());
+
+    auto chars_compressed = ColumnCompressed::compressBuffer(chars.data(), source_chars_size);
+    auto offsets_compressed = ColumnCompressed::compressBuffer(offsets.data(), source_offsets_size);
+
+    /// Return original column if not compressable.
+    if (!chars_compressed && !offsets_compressed)
+        return ColumnCompressed::wrap(this->getPtr());
+
+    if (!chars_compressed)
+    {
+        chars_compressed = std::make_shared<Memory<>>(source_chars_size);
+        memcpy(chars_compressed->data(), chars.data(), source_chars_size);
+    }
+
+    if (!offsets_compressed)
+    {
+        offsets_compressed = std::make_shared<Memory<>>(source_offsets_size);
+        memcpy(offsets_compressed->data(), offsets.data(), source_offsets_size);
+    }
+
+    return ColumnCompressed::create(offsets.size(), chars_compressed->size() + offsets_compressed->size(),
+        [
+            chars_compressed = std::move(chars_compressed),
+            offsets_compressed = std::move(offsets_compressed),
+            source_chars_size,
+            source_offsets_elements = offsets.size()
+        ]
+        {
+            auto res = ColumnString::create();
+
+            res->getChars().resize(source_chars_size);
+            res->getOffsets().resize(source_offsets_elements);
+
+            ColumnCompressed::decompressBuffer(
+                chars_compressed->data(), res->getChars().data(), chars_compressed->size(), source_chars_size);
+
+            ColumnCompressed::decompressBuffer(
+                offsets_compressed->data(), res->getOffsets().data(), offsets_compressed->size(), source_offsets_elements * sizeof(Offset));
+
+            return res;
+        });
+
+}
+
+
 int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const
 {
     const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);
diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index c1e76c5e28e..843e445d1a0 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -261,6 +261,8 @@ public:
 
     void gather(ColumnGathererStream & gatherer_stream) override;
 
+    ColumnPtr compress() const override;
+
     void reserve(size_t n) override;
 
     void getExtremes(Field & min, Field & max) const override;
diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp
index fa5a15d0351..1d85c67e7c6 100644
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@@ -1,6 +1,7 @@
 #include <Columns/ColumnTuple.h>
 
 #include <Columns/IColumnImpl.h>
+#include <Columns/ColumnCompressed.h>
 #include <Core/Field.h>
 #include <DataStreams/ColumnGathererStream.h>
 #include <IO/Operators.h>
@@ -486,7 +487,7 @@ bool ColumnTuple::structureEquals(const IColumn & rhs) const
 
 bool ColumnTuple::isCollationSupported() const
 {
-    for (const auto& column : columns)
+    for (const auto & column : columns)
     {
         if (column->isCollationSupported())
             return true;
@@ -495,4 +496,25 @@ bool ColumnTuple::isCollationSupported() const
 }
 
 
+ColumnPtr ColumnTuple::compress() const
+{
+    size_t byte_size = 0;
+    Columns compressed;
+    compressed.reserve(columns.size());
+    for (const auto & column : columns)
+    {
+        auto compressed_column = column->compress();
+        byte_size += compressed_column->byteSize();
+        compressed.emplace_back(std::move(compressed_column));
+    }
+
+    return ColumnCompressed::create(size(), byte_size,
+        [compressed = std::move(compressed)]
+        {
+            for (auto & column : compressed)
+                column = column->decompress();
+            return ColumnTuple::create(compressed);
+        });
+}
+
 }
diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h
index f763ca3fcba..818b29937bd 100644
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@@ -89,6 +89,7 @@ public:
     void forEachSubcolumn(ColumnCallback callback) override;
     bool structureEquals(const IColumn & rhs) const override;
     bool isCollationSupported() const override;
+    ColumnPtr compress() const override;
 
     size_t tupleSize() const { return columns.size(); }
 
diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h
index 5d58b2484e0..d1c4a4e1183 100644
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@@ -28,6 +28,11 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
 }
 
+/** Stores another column with unique values
+  * and also an index that allows to find position by value.
+  *
+  * This column is not used on it's own but only as implementation detail of ColumnLowCardinality.
+  */
 template <typename ColumnType>
 class ColumnUnique final : public COWHelper<IColumnUnique, ColumnUnique<ColumnType>>
 {

From 1781a64370c86c93be915db8673644cffe0e58df Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 18 Feb 2021 02:11:41 +0300
Subject: [PATCH 1141/1238] Whitespaces

---
 src/Columns/ColumnUnique.h |  2 +-
 src/Columns/ReverseIndex.h | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h
index d1c4a4e1183..fbd3c3641b5 100644
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@@ -39,7 +39,7 @@ class ColumnUnique final : public COWHelper<IColumnUnique, ColumnUnique<ColumnTy
     friend class COWHelper<IColumnUnique, ColumnUnique<ColumnType>>;
 
 private:
-    explicit ColumnUnique(MutableColumnPtr && holder, bool is_nullable);
+    ColumnUnique(MutableColumnPtr && holder, bool is_nullable);
     explicit ColumnUnique(const IDataType & type);
     ColumnUnique(const ColumnUnique & other);
 
diff --git a/src/Columns/ReverseIndex.h b/src/Columns/ReverseIndex.h
index 154293acf99..35b0029fc7b 100644
--- a/src/Columns/ReverseIndex.h
+++ b/src/Columns/ReverseIndex.h
@@ -316,8 +316,8 @@ template <typename IndexType, typename ColumnType>
 class ReverseIndex
 {
 public:
-    explicit ReverseIndex(UInt64 num_prefix_rows_to_skip_, UInt64 base_index_)
-            : num_prefix_rows_to_skip(num_prefix_rows_to_skip_), base_index(base_index_), saved_hash_ptr(nullptr) {}
+    ReverseIndex(UInt64 num_prefix_rows_to_skip_, UInt64 base_index_)
+        : num_prefix_rows_to_skip(num_prefix_rows_to_skip_), base_index(base_index_), saved_hash_ptr(nullptr) {}
 
     void setColumn(ColumnType * column_);
 
@@ -329,14 +329,16 @@ public:
     /// Returns the found data's index in the dictionary. If index is not built, builds it.
     UInt64 getInsertionPoint(StringRef data)
     {
-        if (!index) buildIndex();
+        if (!index)
+            buildIndex();
         return getIndexImpl(data);
     }
 
     /// Returns the found data's index in the dictionary if the #index is built, otherwise, returns a std::nullopt.
     std::optional<UInt64> getIndex(StringRef data) const
     {
-        if (!index) return {};
+        if (!index)
+            return {};
         return getIndexImpl(data);
     }
 

From dcba99f4b1d3c1ed8b4838d00458271cfb2be8d4 Mon Sep 17 00:00:00 2001
From: Anton Popov <pad11rus@gmail.com>
Date: Thu, 18 Feb 2021 02:19:58 +0300
Subject: [PATCH 1142/1238] fix usage of 'distinct' combinator with 'state'
 combinator

---
 src/AggregateFunctions/AggregateFunctionDistinct.h       | 5 +++++
 .../01259_combinator_distinct_distributed.reference      | 4 ++++
 .../01259_combinator_distinct_distributed.sql            | 9 +++++++++
 3 files changed, 18 insertions(+)

diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h
index b481e2a28e7..b587bbebf6e 100644
--- a/src/AggregateFunctions/AggregateFunctionDistinct.h
+++ b/src/AggregateFunctions/AggregateFunctionDistinct.h
@@ -236,6 +236,11 @@ public:
         return true;
     }
 
+    bool isState() const override
+    {
+        return nested_func->isState();
+    }
+
     AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
 };
 
diff --git a/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference b/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference
index 096d5703292..72a41ac1d84 100644
--- a/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference
+++ b/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference
@@ -2,3 +2,7 @@
 [0,1,2,3,4,5,6,7,8,9,10,11,12]
 20
 0.49237
+78
+[0,1,2,3,4,5,6,7,8,9,10,11,12]
+20
+0.49237
diff --git a/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql b/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql
index f851e64dbcb..f95d2d87b8e 100644
--- a/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql
+++ b/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql
@@ -1,3 +1,12 @@
+SET distributed_aggregation_memory_efficient = 1;
+
+SELECT sum(DISTINCT number % 13) FROM remote('127.0.0.{1,2}', numbers_mt(100000));
+SELECT arraySort(groupArray(DISTINCT number % 13)) FROM remote('127.0.0.{1,2}', numbers_mt(100000));
+SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM remote('127.0.0.{1,2}', numbers_mt(100000));
+SELECT round(corrStable(DISTINCT x, y), 5) FROM (SELECT number % 10 AS x, number % 5 AS y FROM remote('127.0.0.{1,2}', numbers(1000)));
+
+SET distributed_aggregation_memory_efficient = 0;
+
 SELECT sum(DISTINCT number % 13) FROM remote('127.0.0.{1,2}', numbers_mt(100000));
 SELECT arraySort(groupArray(DISTINCT number % 13)) FROM remote('127.0.0.{1,2}', numbers_mt(100000));
 SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM remote('127.0.0.{1,2}', numbers_mt(100000));

From b7011f4f9c2a6df4144e9dec4a45c12e7fa62ec8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 18 Feb 2021 02:52:07 +0300
Subject: [PATCH 1143/1238] Fix build

---
 src/Columns/ColumnTuple.cpp            | 2 +-
 src/DataTypes/DataTypeLowCardinality.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp
index 1d85c67e7c6..c7c5f7b97c6 100644
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@@ -509,7 +509,7 @@ ColumnPtr ColumnTuple::compress() const
     }
 
     return ColumnCompressed::create(size(), byte_size,
-        [compressed = std::move(compressed)]
+        [compressed = std::move(compressed)]() mutable
         {
             for (auto & column : compressed)
                 column = column->decompress();
diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h
index 6ed2b792ce3..fc28ce0a59d 100644
--- a/src/DataTypes/DataTypeLowCardinality.h
+++ b/src/DataTypes/DataTypeLowCardinality.h
@@ -1,7 +1,9 @@
 #pragma once
+
 #include <DataTypes/IDataType.h>
 #include <Columns/IColumnUnique.h>
 
+
 namespace DB
 {
 

From 634be2b933d87926fe79ce54bc037b4740dcf7de Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 18 Feb 2021 03:52:09 +0300
Subject: [PATCH 1144/1238] Fix error

---
 src/Columns/ColumnCompressed.cpp  |  4 ++--
 src/Columns/ColumnCompressed.h    |  5 +++--
 src/Columns/ColumnDecimal.cpp     |  2 +-
 src/Columns/ColumnFixedString.cpp |  4 ++--
 src/Columns/ColumnString.cpp      | 18 +++---------------
 src/Columns/ColumnVector.cpp      |  2 +-
 6 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/src/Columns/ColumnCompressed.cpp b/src/Columns/ColumnCompressed.cpp
index d7d30745868..292c6968b86 100644
--- a/src/Columns/ColumnCompressed.cpp
+++ b/src/Columns/ColumnCompressed.cpp
@@ -15,7 +15,7 @@ namespace ErrorCodes
 }
 
 
-std::shared_ptr<Memory<>> ColumnCompressed::compressBuffer(const void * data, size_t data_size)
+std::shared_ptr<Memory<>> ColumnCompressed::compressBuffer(const void * data, size_t data_size, bool always_compress)
 {
     size_t max_dest_size = LZ4_COMPRESSBOUND(data_size);
 
@@ -34,7 +34,7 @@ std::shared_ptr<Memory<>> ColumnCompressed::compressBuffer(const void * data, si
         throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column");
 
     /// If compression is inefficient.
-    if (static_cast<size_t>(compressed_size) * 2 > data_size)
+    if (!always_compress && static_cast<size_t>(compressed_size) * 2 > data_size)
         return {};
 
     /// Shrink to fit.
diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h
index bd70005ac5d..f6b6bf22177 100644
--- a/src/Columns/ColumnCompressed.h
+++ b/src/Columns/ColumnCompressed.h
@@ -65,8 +65,9 @@ public:
 
     /// Helper methods for compression.
 
-    /// If data is not worth to be compressed - returns nullptr. Note: shared_ptr is to allow to be captured by std::function.
-    static std::shared_ptr<Memory<>> compressBuffer(const void * data, size_t data_size);
+    /// If data is not worth to be compressed and not 'always_compress' - returns nullptr.
+    /// Note: shared_ptr is to allow to be captured by std::function.
+    static std::shared_ptr<Memory<>> compressBuffer(const void * data, size_t data_size, bool always_compress);
 
     static void decompressBuffer(
         const void * compressed_data, void * decompressed_data, size_t compressed_size, size_t decompressed_size);
diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp
index bb61f60706e..bad3a4c3402 100644
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@@ -356,7 +356,7 @@ ColumnPtr ColumnDecimal<T>::compress() const
     if (source_size < 4096) /// A wild guess.
         return ColumnCompressed::wrap(this->getPtr());
 
-    auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size);
+    auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size, false);
 
     if (!compressed)
         return ColumnCompressed::wrap(this->getPtr());
diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp
index 278c2fef5f8..84bd0561f01 100644
--- a/src/Columns/ColumnFixedString.cpp
+++ b/src/Columns/ColumnFixedString.cpp
@@ -449,13 +449,13 @@ void ColumnFixedString::getExtremes(Field & min, Field & max) const
 
 ColumnPtr ColumnFixedString::compress() const
 {
-    size_t source_size = chars.size() * n;
+    size_t source_size = chars.size();
 
     /// Don't compress small blocks.
     if (source_size < 4096) /// A wild guess.
         return ColumnCompressed::wrap(this->getPtr());
 
-    auto compressed = ColumnCompressed::compressBuffer(chars.data(), source_size);
+    auto compressed = ColumnCompressed::compressBuffer(chars.data(), source_size, false);
 
     if (!compressed)
         return ColumnCompressed::wrap(this->getPtr());
diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp
index 190517bfeb9..f46c96caf8c 100644
--- a/src/Columns/ColumnString.cpp
+++ b/src/Columns/ColumnString.cpp
@@ -535,24 +535,13 @@ ColumnPtr ColumnString::compress() const
     if (source_chars_size < 4096) /// A wild guess.
         return ColumnCompressed::wrap(this->getPtr());
 
-    auto chars_compressed = ColumnCompressed::compressBuffer(chars.data(), source_chars_size);
-    auto offsets_compressed = ColumnCompressed::compressBuffer(offsets.data(), source_offsets_size);
+    auto chars_compressed = ColumnCompressed::compressBuffer(chars.data(), source_chars_size, false);
 
     /// Return original column if not compressable.
-    if (!chars_compressed && !offsets_compressed)
+    if (!chars_compressed)
         return ColumnCompressed::wrap(this->getPtr());
 
-    if (!chars_compressed)
-    {
-        chars_compressed = std::make_shared<Memory<>>(source_chars_size);
-        memcpy(chars_compressed->data(), chars.data(), source_chars_size);
-    }
-
-    if (!offsets_compressed)
-    {
-        offsets_compressed = std::make_shared<Memory<>>(source_offsets_size);
-        memcpy(offsets_compressed->data(), offsets.data(), source_offsets_size);
-    }
+    auto offsets_compressed = ColumnCompressed::compressBuffer(offsets.data(), source_offsets_size, true);
 
     return ColumnCompressed::create(offsets.size(), chars_compressed->size() + offsets_compressed->size(),
         [
@@ -575,7 +564,6 @@ ColumnPtr ColumnString::compress() const
 
             return res;
         });
-
 }
 
 
diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index b8bfef7258e..19ba86c5120 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -533,7 +533,7 @@ ColumnPtr ColumnVector<T>::compress() const
     if (source_size < 4096) /// A wild guess.
         return ColumnCompressed::wrap(this->getPtr());
 
-    auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size);
+    auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size, false);
 
     if (!compressed)
         return ColumnCompressed::wrap(this->getPtr());

From 5007f7f0183f3cc6ce2b3580b99748ff7a3649ae Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 18 Feb 2021 05:57:15 +0300
Subject: [PATCH 1145/1238] Fix typo

---
 src/Columns/ColumnString.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp
index f46c96caf8c..8fd22e85e10 100644
--- a/src/Columns/ColumnString.cpp
+++ b/src/Columns/ColumnString.cpp
@@ -537,7 +537,7 @@ ColumnPtr ColumnString::compress() const
 
     auto chars_compressed = ColumnCompressed::compressBuffer(chars.data(), source_chars_size, false);
 
-    /// Return original column if not compressable.
+    /// Return original column if not compressible.
     if (!chars_compressed)
         return ColumnCompressed::wrap(this->getPtr());
 

From 04cb91a0fd1e3dc0f3a1b00d752d93b19a116e97 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Thu, 18 Feb 2021 06:02:41 +0300
Subject: [PATCH 1146/1238] Fix error

---
 src/Columns/ColumnMap.cpp | 10 ++++++++++
 src/Columns/ColumnMap.h   |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp
index 1cfd7e6c4ef..cc2640a9cf6 100644
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@@ -1,4 +1,5 @@
 #include <Columns/ColumnMap.h>
+#include <Columns/ColumnCompressed.h>
 #include <Columns/IColumnImpl.h>
 #include <DataStreams/ColumnGathererStream.h>
 #include <IO/WriteBufferFromString.h>
@@ -243,4 +244,13 @@ bool ColumnMap::structureEquals(const IColumn & rhs) const
     return false;
 }
 
+ColumnPtr ColumnMap::compress() const
+{
+    auto compressed = nested->compress();
+    return ColumnCompressed::create(size(), compressed->byteSize(), [compressed = std::move(compressed)]
+    {
+        return ColumnMap::create(compressed->decompress());
+    });
+}
+
 }
diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h
index a970f67bd46..acae1574f4c 100644
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@@ -92,7 +92,7 @@ public:
     const ColumnTuple & getNestedData() const { return assert_cast<const ColumnTuple &>(getNestedColumn().getData()); }
     ColumnTuple & getNestedData() { return assert_cast<ColumnTuple &>(getNestedColumn().getData()); }
 
-    ColumnPtr compress() const override { return nested->compress(); }
+    ColumnPtr compress() const override;
 };
 
 }

From adf5d24177b6d23d4788e531fa2267378c07aae6 Mon Sep 17 00:00:00 2001
From: M0r64n <M0r64n1635@gmail.com>
Date: Thu, 18 Feb 2021 11:36:17 +0400
Subject: [PATCH 1147/1238] Correct file engine settings tests

---
 .../01720_engine_file_empty_if_not_exists.sql |  1 +
 .../01721_engine_file_truncate_on_insert.sql  | 21 ++++++++++---------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql b/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql
index c04e01ccc88..d665dbc722f 100644
--- a/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql
+++ b/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql
@@ -13,3 +13,4 @@ SET engine_file_empty_if_not_exists=1;
 SELECT * FROM file_engine_table;
 
 SET engine_file_empty_if_not_exists=0;
+DROP TABLE file_engine_table;
diff --git a/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql
index 65246db7963..42d935cc0dd 100644
--- a/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql
+++ b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql
@@ -1,20 +1,21 @@
-INSERT INTO TABLE FUNCTION file('01718_file/test/data.TSV', 'TSV', 'id UInt32') VALUES ('file', 42);
+DROP TABLE IF EXISTS test;
+
+INSERT INTO TABLE FUNCTION file('01718_file/test/data.TSV', 'TSV', 'id UInt32') VALUES (1);
 ATTACH TABLE test FROM '01718_file/test' (id UInt8) ENGINE=File(TSV);
 
-CREATE TABLE file_engine_table (id UInt32) ENGINE=File(TabSeparated);
-
-INSERT INTO file_engine_table VALUES (1), (2), (3);
-INSERT INTO file_engine_table VALUES (4);
-SELECT * FROM file_engine_table;
+INSERT INTO test VALUES (2), (3);
+INSERT INTO test VALUES (4);
+SELECT * FROM test;
 
 SET engine_file_truncate_on_insert=0;
 
-INSERT INTO file_engine_table VALUES (5), (6);
-SELECT * FROM file_engine_table;
+INSERT INTO test VALUES (5), (6);
+SELECT * FROM test;
 
 SET engine_file_truncate_on_insert=1;
 
-INSERT INTO file_engine_table VALUES (0), (1), (2);
-SELECT * FROM file_engine_table;
+INSERT INTO test VALUES (0), (1), (2);
+SELECT * FROM test;
 
 SET engine_file_truncate_on_insert=0;
+DROP TABLE test;

From 1ce9570fcb4919880c19b05986dd9f7691fefb6f Mon Sep 17 00:00:00 2001
From: M0r64n <m0r64n1635@gmail.com>
Date: Thu, 18 Feb 2021 07:50:15 +0000
Subject: [PATCH 1148/1238] Fix 01721_engine_file_truncate_on_insert.reference

---
 .../0_stateless/01721_engine_file_truncate_on_insert.reference  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference
index a25fb4f0e7e..578661c9194 100644
--- a/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference
+++ b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference
@@ -10,4 +10,4 @@
 6
 0
 1
-2
\ No newline at end of file
+2

From 4278098f9a243c740961248ad2232e425bd567d9 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Thu, 18 Feb 2021 13:09:01 +0300
Subject: [PATCH 1149/1238] Reinterpret function added Decimal, DateTim64
 support

---
 .../functions/type-conversion-functions.md    | 10 ++-
 src/Functions/reinterpretAs.cpp               | 65 ++++++++++++++-----
 .../01676_reinterpret_as.reference            | 10 +++
 .../0_stateless/01676_reinterpret_as.sql      | 12 +++-
 4 files changed, 76 insertions(+), 21 deletions(-)

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 6bc274eba73..0cfeb282bb3 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -324,16 +324,20 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
 └─────────────┴──────────────┴───────────────┘
 ```
 
-## reinterpretAsUInt(8\|16\|32\|64\|256) {#type_conversion_function-reinterpretAsUInt8163264256}
+## reinterpretAsUInt(8\|16\|32\|64\|256) {#reinterpretAsUInt8163264256}
 
-## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#type_conversion_function-reinterpretAsInt8163264128256}
+## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#reinterpretAsInt8163264128256}
 
-## reinterpretAsFloat(32\|64) {##type_conversion_function-reinterpretAsFloat}
+## reinterpretAsDecimal(32\|64\|128\|256) {#reinterpretAsDecimal3264128256}
+
+## reinterpretAsFloat(32\|64) {#type_conversion_function-reinterpretAsFloat}
 
 ## reinterpretAsDate {#type_conversion_function-reinterpretAsDate}
 
 ## reinterpretAsDateTime {#type_conversion_function-reinterpretAsDateTime}
 
+## reinterpretAsDateTime64 {#type_conversion_function-reinterpretAsDateTime64}
+
 ## reinterpretAsString {#type_conversion_function-reinterpretAsString}
 
 ## reinterpretAsFixedString {#type_conversion_function-reinterpretAsFixedString}
diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp
index c15ba969fdb..3f4ba3d23e1 100644
--- a/src/Functions/reinterpretAs.cpp
+++ b/src/Functions/reinterpretAs.cpp
@@ -11,10 +11,13 @@
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypeDateTime64.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnVector.h>
+#include <Columns/ColumnDecimal.h>
 
 #include <Common/typeid_cast.h>
 #include <Common/memcpySmall.h>
@@ -158,7 +161,7 @@ public:
                 {
                     const auto * col_from = assert_cast<const ColumnString *>(arguments[0].column.get());
 
-                    auto col_res = ToColumnType::create();
+                    auto col_res = numericColumnCreateHelper<ToType>(static_cast<const ToType&>(*result_type.get()));
 
                     const ColumnString::Chars & data_from = col_from->getChars();
                     const ColumnString::Offsets & offsets_from = col_from->getOffsets();
@@ -185,7 +188,7 @@ public:
                 {
                     const auto * col_from_fixed = assert_cast<const ColumnFixedString *>(arguments[0].column.get());
 
-                    auto col_res = ToColumnType::create();
+                    auto col_res = numericColumnCreateHelper<ToType>(static_cast<const ToType&>(*result_type.get()));
 
                     const ColumnString::Chars & data_from = col_from_fixed->getChars();
                     size_t step = col_from_fixed->getN();
@@ -209,12 +212,27 @@ public:
                 }
                 else if constexpr (CanBeReinterpretedAsNumeric<FromType>)
                 {
-                    using FromTypeFieldType = typename FromType::FieldType;
-                    const auto * col = assert_cast<const ColumnVector<FromTypeFieldType>*>(arguments[0].column.get());
+                    using From = typename FromType::FieldType;
+                    using To = typename ToType::FieldType;
 
-                    auto col_res = ToColumnType::create();
-                    reinterpretImpl(col->getData(), col_res->getData());
-                    result = std::move(col_res);
+                    using FromColumnType = std::conditional_t<IsDecimalNumber<From>, ColumnDecimal<From>, ColumnVector<From>>;
+
+                    const auto * column_from = assert_cast<const FromColumnType*>(arguments[0].column.get());
+
+                    auto column_to = numericColumnCreateHelper<ToType>(static_cast<const ToType&>(*result_type.get()));
+
+                    auto & from = column_from->getData();
+                    auto & to = column_to->getData();
+
+                    size_t size = from.size();
+                    to.resize_fill(size);
+
+                    static constexpr size_t copy_size = std::min(sizeof(From), sizeof(To));
+
+                    for (size_t i = 0; i < size; ++i)
+                        memcpy(static_cast<void*>(&to[i]), static_cast<const void*>(&from[i]), copy_size);
+
+                    result = std::move(column_to);
 
                     return true;
                 }
@@ -232,7 +250,7 @@ public:
 private:
     template <typename T>
     static constexpr auto CanBeReinterpretedAsNumeric =
-        IsDataTypeNumber<T> ||
+        IsDataTypeDecimalOrNumber<T> ||
         std::is_same_v<T, DataTypeDate> ||
         std::is_same_v<T, DataTypeDateTime> ||
         std::is_same_v<T, DataTypeUUID>;
@@ -243,7 +261,8 @@ private:
             type.isInt() ||
             type.isDateOrDateTime() ||
             type.isFloat() ||
-            type.isUUID();
+            type.isUUID() ||
+            type.isDecimal();
     }
 
     static void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
@@ -296,18 +315,32 @@ private:
         }
     }
 
-    template <typename From, typename To>
-    static void reinterpretImpl(const PaddedPODArray<From> & from, PaddedPODArray<To> & to)
+    template <typename Type>
+    static typename Type::ColumnType::MutablePtr numericColumnCreateHelper(const Type & type)
     {
+        size_t column_size = 0;
+
+        using ColumnType = typename Type::ColumnType;
+
+        if constexpr (IsDataTypeDecimal<Type>)
+            return ColumnType::create(column_size, type.getScale());
+        else
+            return ColumnType::create(column_size);
+    }
+
+    template <typename FromContainer, typename ToContainer>
+    static void reinterpretImpl(const FromContainer & from, ToContainer & to)
+    {
+        using From = typename FromContainer::value_type;
+        using To = typename ToContainer::value_type;
+
         size_t size = from.size();
         to.resize_fill(size);
 
+        static constexpr size_t copy_size = std::min(sizeof(From), sizeof(To));
+
         for (size_t i = 0; i < size; ++i)
-        {
-            memcpy(static_cast<void*>(&to[i]),
-                static_cast<const void*>(&from[i]),
-                std::min(sizeof(From), sizeof(To)));
-        }
+            memcpy(static_cast<void*>(&to[i]), static_cast<const void*>(&from[i]), copy_size);
     }
 };
 
diff --git a/tests/queries/0_stateless/01676_reinterpret_as.reference b/tests/queries/0_stateless/01676_reinterpret_as.reference
index b39deb55a7f..459ca166dc1 100644
--- a/tests/queries/0_stateless/01676_reinterpret_as.reference
+++ b/tests/queries/0_stateless/01676_reinterpret_as.reference
@@ -28,4 +28,14 @@ Integer and String types
 1	1	49
 1	1	49
 11	11	12593
+Dates
+1970-01-01	1970-01-01
+1970-01-01 03:00:00	1970-01-01 03:00:00
+1970-01-01 03:00:00.000	1970-01-01 03:00:00.000
+Decimals
+5.00	0.49
+5.00	0.49
+5.00	0.49
+5.00	0.49
+0.00
 ReinterpretErrors
diff --git a/tests/queries/0_stateless/01676_reinterpret_as.sql b/tests/queries/0_stateless/01676_reinterpret_as.sql
index ff727f284bb..5eb94ed0a13 100644
--- a/tests/queries/0_stateless/01676_reinterpret_as.sql
+++ b/tests/queries/0_stateless/01676_reinterpret_as.sql
@@ -28,7 +28,15 @@ SELECT 'Integer and String types';
 SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('1') as a;
 SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('11') as a;
 SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt16('11') as a;
+SELECT 'Dates';
+SELECT reinterpret(0, 'Date'), reinterpret('', 'Date');
+SELECT reinterpret(0, 'DateTime'), reinterpret('', 'DateTime');
+SELECT reinterpret(0, 'DateTime64'), reinterpret('', 'DateTime64');
+SELECT 'Decimals';
+SELECT reinterpret(toDecimal32(5, 2), 'Decimal32(2)'), reinterpret('1', 'Decimal32(2)');
+SELECT reinterpret(toDecimal64(5, 2), 'Decimal64(2)'), reinterpret('1', 'Decimal64(2)');;
+SELECT reinterpret(toDecimal128(5, 2), 'Decimal128(2)'), reinterpret('1', 'Decimal128(2)');
+SELECT reinterpret(toDecimal256(5, 2), 'Decimal256(2)'), reinterpret('1', 'Decimal256(2)');
+SELECT reinterpret(toDateTime64(0, 0), 'Decimal64(2)');
 SELECT 'ReinterpretErrors';
-SELECT reinterpret(toDecimal64(1, 2), 'UInt8'); -- {serverError 43}
 SELECT reinterpret('123', 'FixedString(1)'); -- {serverError 43}
-SELECT reinterpret(toDateTime('9922337203.6854775808', 1), 'Decimal64(1)'); -- {serverError 43}

From c92e613b82545c8ed13641b69a9e5ab9c2665b74 Mon Sep 17 00:00:00 2001
From: zlx19950903 <76729556+zlx19950903@users.noreply.github.com>
Date: Thu, 18 Feb 2021 20:05:55 +0800
Subject: [PATCH 1150/1238] Add a function `htmlOrXmlCoarseParse` to extract
 content from html or xml format string. (#19600)

* add html and xml coarse parse

* add test file

* add conditional check: hyperscan

* fix style error

* add conditional check

* bug fix

* delete unit

* typos check fix

* add unit test

* style check fix

* fix build error: case style

* acradis_skip test fix

* LINT error fix

* Remove comments

Co-authored-by: guojiantao <guojiantao15@mails.ucas.ac.cn>
Co-authored-by: Ivan <5627721+abyss7@users.noreply.github.com>
Co-authored-by: Ivan Lezhankin <ilezhankin@yandex-team.ru>
---
 docker/test/fasttest/run.sh                   |   1 +
 src/Functions/htmlOrXmlCoarseParse.cpp        | 582 ++++++++++++++++++
 src/Functions/registerFunctionsString.cpp     |   7 +-
 src/Functions/ya.make                         |   1 +
 .../01674_htm_xml_coarse_parse.reference      |   9 +
 .../01674_htm_xml_coarse_parse.sql            |  15 +
 .../queries/0_stateless/arcadia_skip_list.txt |   1 +
 7 files changed, 615 insertions(+), 1 deletion(-)
 create mode 100644 src/Functions/htmlOrXmlCoarseParse.cpp
 create mode 100644 tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference
 create mode 100644 tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 90663102f17..1c5f62a9e46 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -342,6 +342,7 @@ function run_tests
 
         # JSON functions
         01666_blns
+        01674_htm_xml_coarse_parse
     )
 
     (time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
diff --git a/src/Functions/htmlOrXmlCoarseParse.cpp b/src/Functions/htmlOrXmlCoarseParse.cpp
new file mode 100644
index 00000000000..442de3d36b0
--- /dev/null
+++ b/src/Functions/htmlOrXmlCoarseParse.cpp
@@ -0,0 +1,582 @@
+#include <Columns/ColumnString.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunctionImpl.h>
+
+#include <utility>
+#include <vector>
+#include <algorithm>
+
+#if USE_HYPERSCAN
+#   include <hs.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int CANNOT_ALLOCATE_MEMORY;
+    extern const int NOT_IMPLEMENTED;
+}
+
+namespace
+{
+struct HxCoarseParseImpl
+{
+private:
+    struct SpanInfo
+    {
+        SpanInfo(): id(0), match_space(std::pair<unsigned long long, unsigned long long>(0, 0)) {}  // NOLINT
+        SpanInfo(unsigned int matchId, std::pair<unsigned long long, unsigned long long> matchSpan): id(matchId), match_space(matchSpan){} // NOLINT
+        SpanInfo(const SpanInfo& obj)
+        {
+            id = obj.id;
+            match_space = obj.match_space;
+        }
+        SpanInfo& operator=(const SpanInfo& obj) = default;
+
+        unsigned int id;
+        std::pair<unsigned long long, unsigned long long> match_space;  // NOLINT
+    };
+    using SpanElement = std::vector<SpanInfo>;
+    struct Span
+    {
+        Span(): set_script(false), set_style(false), set_semi(false), is_finding_cdata(false) {}
+
+        SpanElement copy_stack;         // copy area
+        SpanElement tag_stack;          // regexp area
+        SpanInfo script_ptr;            // script pointer
+        bool set_script;                // whether set script
+        SpanInfo style_ptr;             // style pointer
+        bool set_style;                 // whether set style
+        SpanInfo semi_ptr;              // tag ptr
+        bool set_semi;                  // whether set semi
+
+        bool is_finding_cdata;
+    };
+
+    static inline void copyZone(
+        ColumnString::Offset& current_dst_string_offset,
+        ColumnString::Offset& current_copy_loc,
+        ColumnString::Chars& dst_chars,
+        const ColumnString::Chars& src_chars,
+        size_t bytes_to_copy,
+        unsigned is_space
+    )
+    {
+        bool is_last_space = false;
+        if (current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' ')
+        {
+            is_last_space = true;
+        }
+        if (bytes_to_copy == 0)
+        {
+            if (is_space && !is_last_space)
+            {
+                dst_chars[current_dst_string_offset++] = ' ';
+            }
+        }
+        else
+        {
+            if (is_last_space && src_chars[current_copy_loc] == ' ')
+            {
+                --bytes_to_copy;
+                ++current_copy_loc;
+            }
+            if (bytes_to_copy > 0)
+            {
+                memcpySmallAllowReadWriteOverflow15(
+                    &dst_chars[current_dst_string_offset], &src_chars[current_copy_loc], bytes_to_copy);
+                current_dst_string_offset += bytes_to_copy;
+            }
+
+            // separator is space and last character is not space.
+            if (is_space && !(current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' '))
+            {
+                dst_chars[current_dst_string_offset++] = ' ';
+            }
+        }
+        // return;
+    }
+    static inline void popArea(SpanElement& stack, unsigned long long from, unsigned long long to)  //NOLINT
+    {
+        while (!stack.empty())
+        {
+            if (to > stack.back().match_space.second && from < stack.back().match_space.second)
+            {
+                stack.pop_back();
+            }
+            else
+            {
+                break;
+            }
+        }
+        // return;
+    }
+
+    static void dealCommonTag(Span* matches)
+    {
+        while (!matches->copy_stack.empty() && matches->copy_stack.back().id != 10)
+        {
+            matches->copy_stack.pop_back();
+        }
+        if (!matches->copy_stack.empty())
+        {
+            matches->copy_stack.pop_back();
+        }
+        unsigned long long from;    // NOLINT
+        unsigned long long to;      // NOLINT
+        unsigned id;
+        for (auto begin = matches->tag_stack.begin(); begin != matches->tag_stack.end(); ++begin)
+        {
+            from = begin->match_space.first;
+            to = begin->match_space.second;
+            id = begin->id;
+            switch (id)
+            {
+                case 12:
+                case 13:
+                {
+                    popArea(matches->copy_stack, from, to);
+                    if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
+                        matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                    break;
+                }
+                case 0:
+                case 2:
+                case 3:
+                case 4:
+                case 5:
+                case 6:
+                case 7:
+                case 8:
+                case 9:
+                case 10:
+                {
+                    if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
+                    {
+                        matches->set_semi = true;
+                        matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
+                    }
+                    break;
+                }
+                case 1:
+                {
+                    if (matches->set_semi)
+                    {
+                        switch (matches->semi_ptr.id)
+                        {
+                            case 0:
+                            case 2:
+                            case 3:
+                            case 6:
+                            case 7:
+                            case 10:
+                            {
+                                if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
+                                {
+                                    if (!matches->set_script)
+                                    {
+                                        matches->set_script = true;
+                                        matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
+                                    }
+                                }
+                                else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
+                                {
+                                    if (!matches->set_style)
+                                    {
+                                        matches->set_style = true;
+                                        matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
+                                    }
+                                }
+                                popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
+                                matches->copy_stack.push_back(SpanInfo(0, std::make_pair(matches->semi_ptr.match_space.first, to)));
+                                matches->set_semi = false;
+                                break;
+                            }
+                            case 4:
+                            case 5:
+                            case 8:
+                            case 9:
+                            {
+                                SpanInfo complete_zone;
+
+                                complete_zone.match_space.second = to;
+                                if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
+                                {
+                                    complete_zone.id = matches->script_ptr.id;
+                                    complete_zone.match_space.first = matches->script_ptr.match_space.first;
+                                    matches->set_script = false;
+                                }
+                                else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
+                                {
+                                    complete_zone.id = matches->style_ptr.id;
+                                    complete_zone.match_space.first = matches->style_ptr.match_space.first;
+                                    matches->set_style = false;
+                                }
+                                else
+                                {
+                                    complete_zone.id = matches->semi_ptr.id;
+                                    complete_zone.match_space.first = matches->semi_ptr.match_space.first;
+                                }
+                                popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
+                                matches->copy_stack.push_back(complete_zone);
+                                matches->set_semi = false;
+                                break;
+                            }
+                        }
+                    }
+                    break;
+                }
+                default:
+                {
+                    break;
+                }
+            }
+        }
+        // return;
+    }
+    static int spanCollect(unsigned int id,
+                          unsigned long long from,  // NOLINT
+                          unsigned long long to,    // NOLINT
+                          unsigned int , void * ctx)
+    {
+        Span* matches = static_cast<Span*>(ctx);
+        from = id == 12 ? from : to - patterns_length[id];
+
+        if (matches->is_finding_cdata)
+        {
+            if (id == 11)
+            {
+                matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                matches->is_finding_cdata = false;
+                matches->tag_stack.clear();
+                if (matches->semi_ptr.id == 10)
+                {
+                    matches->set_semi = false;
+                }
+            }
+            else if (id == 12 || id == 13)
+            {
+                popArea(matches->copy_stack, from, to);
+                if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
+                    matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+
+                popArea(matches->tag_stack, from, to);
+                if (matches->tag_stack.empty() || from >= matches->tag_stack.back().match_space.second)
+                    matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+            }
+            else
+            {
+                popArea(matches->tag_stack, from, to);
+                matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+            }
+        }
+        else
+        {
+            switch (id)
+            {
+                case 12:
+                case 13:
+                {
+                    popArea(matches->copy_stack, from, to);
+                    if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
+                        matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                    break;
+                }
+                case 0:
+                case 2:
+                case 3:
+                case 4:
+                case 5:
+                case 6:
+                case 7:
+                case 8:
+                case 9:
+                {
+                    if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
+                    {
+                        matches->set_semi = true;
+                        matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
+                    }
+                    break;
+                }
+                case 10:
+                {
+                    if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
+                    {
+                        matches->set_semi = true;
+                        matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
+                    }
+                    matches->is_finding_cdata = true;
+                    matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                    matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
+                    break;
+                }
+                case 1:
+                {
+                    if (matches->set_semi)
+                    {
+                        switch (matches->semi_ptr.id)
+                        {
+                            case 0:
+                            case 2:
+                            case 3:
+                            case 6:
+                            case 7:
+                            case 10:
+                            {
+                                if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
+                                {
+                                    if (!matches->set_script)
+                                    {
+                                        matches->set_script = true;
+                                        matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
+                                    }
+                                }
+                                else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
+                                {
+                                    if (!matches->set_style)
+                                    {
+                                        matches->set_style = true;
+                                        matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
+                                    }
+                                }
+                                popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
+                                matches->copy_stack.push_back(SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to)));
+                                matches->set_semi = false;
+                                break;
+                            }
+                            case 4:
+                            case 5:
+                            case 8:
+                            case 9:
+                            {
+                                SpanInfo complete_zone;
+                                complete_zone.match_space.second = to;
+                                if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
+                                {
+                                    complete_zone.id = matches->script_ptr.id;
+                                    complete_zone.match_space.first = matches->script_ptr.match_space.first;
+                                    matches->set_script = false;
+                                }
+                                else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
+                                {
+                                    complete_zone.id = matches->style_ptr.id;
+                                    complete_zone.match_space.first = matches->style_ptr.match_space.first;
+                                    matches->set_style = false;
+                                }
+                                else
+                                {
+                                    complete_zone.id = matches->semi_ptr.id;
+                                    complete_zone.match_space.first = matches->semi_ptr.match_space.first;
+                                }
+                                popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
+                                matches->copy_stack.push_back(complete_zone);
+                                matches->set_semi = false;
+                                break;
+                            }
+                        }
+                    }
+                    break;
+                }
+                default:
+                {
+                    break;
+                }
+            }
+        }
+        return 0;
+    }
+    #if USE_HYPERSCAN
+    static hs_database_t* buildDatabase(const std::vector<const char* > &expressions,
+                                        const std::vector<unsigned> &flags,
+                                        const std::vector<unsigned> &id,
+                                        unsigned int mode)
+    {
+        hs_database_t *db;
+        hs_compile_error_t *compile_err;
+        hs_error_t err;
+        err = hs_compile_multi(expressions.data(), flags.data(), id.data(),
+                            expressions.size(), mode, nullptr, &db, &compile_err);
+
+        if (err != HS_SUCCESS)
+        {
+            hs_free_compile_error(compile_err);
+            throw Exception("Hyper scan database cannot be compiled.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+        }
+        return db;
+    }
+    #endif
+    static std::vector<const char*> patterns;
+    static std::vector<std::size_t> patterns_length;
+    static std::vector<unsigned> patterns_flag;
+    static std::vector<unsigned> ids;
+
+public:
+    static void executeInternal(
+        const ColumnString::Chars & src_chars,
+        const ColumnString::Offsets & src_offsets,
+        ColumnString::Chars & dst_chars,
+        ColumnString::Offsets & dst_offsets)
+    {
+    #if USE_HYPERSCAN
+        hs_database_t * db = buildDatabase(patterns, patterns_flag, ids, HS_MODE_BLOCK);
+        hs_scratch_t* scratch = nullptr;
+        if (hs_alloc_scratch(db, &scratch) != HS_SUCCESS)
+        {
+            hs_free_database(db);
+            throw Exception("Unable to allocate scratch space.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+        }
+        dst_chars.resize(src_chars.size());
+        dst_offsets.resize(src_offsets.size());
+
+        ColumnString::Offset current_src_string_offset = 0;
+        ColumnString::Offset current_dst_string_offset = 0;
+        ColumnString::Offset current_copy_loc;
+        ColumnString::Offset current_copy_end;
+        unsigned is_space;
+        size_t bytes_to_copy;
+        Span match_zoneall;
+
+        for (size_t off = 0; off < src_offsets.size(); ++off)
+        {
+            hs_scan(db, reinterpret_cast<const char *>(&src_chars[current_src_string_offset]), src_offsets[off] - current_src_string_offset, 0, scratch, spanCollect, &match_zoneall);
+            if (match_zoneall.is_finding_cdata)
+            {
+                dealCommonTag(&match_zoneall);
+            }
+            SpanElement& match_zone = match_zoneall.copy_stack;
+            current_copy_loc = current_src_string_offset;
+            if (match_zone.empty())
+            {
+                current_copy_end = src_offsets[off];
+                is_space = 0;
+            }
+            else
+            {
+                current_copy_end = current_src_string_offset + match_zone.begin()->match_space.first;
+                is_space = (match_zone.begin()->id == 12 || match_zone.begin()->id == 13)?1:0;
+            }
+
+            bytes_to_copy = current_copy_end - current_copy_loc;
+            copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
+            for (auto begin = match_zone.begin(); begin != match_zone.end(); ++begin)
+            {
+                current_copy_loc = current_src_string_offset + begin->match_space.second;
+                if (begin + 1 >= match_zone.end())
+                {
+                    current_copy_end = src_offsets[off];
+                    is_space = 0;
+                }
+                else
+                {
+                    current_copy_end = current_src_string_offset + (begin+1)->match_space.first;
+                    is_space = ((begin+1)->id == 12 || (begin+1)->id == 13)?1:0;
+                }
+                bytes_to_copy = current_copy_end - current_copy_loc;
+                copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
+            }
+            if (current_dst_string_offset > 1 && dst_chars[current_dst_string_offset - 2] == ' ')
+            {
+                dst_chars[current_dst_string_offset - 2] = 0;
+                --current_dst_string_offset;
+            }
+            dst_offsets[off] = current_dst_string_offset;
+            current_src_string_offset = src_offsets[off];
+            match_zoneall.copy_stack.clear();
+            match_zoneall.tag_stack.clear();
+        }
+            dst_chars.resize(dst_chars.size());
+            hs_free_scratch(scratch);
+            hs_free_database(db);
+    #else
+        (void)src_chars;
+        (void)src_offsets;
+        (void)dst_chars;
+        (void)dst_offsets;
+        throw Exception(
+            "htmlOrXmlCoarseParse is not implemented when hyperscan is off (is it x86 processor?)",
+            ErrorCodes::NOT_IMPLEMENTED);
+    #endif
+    }
+};
+
+std::vector<const char*> HxCoarseParseImpl::patterns =
+    {
+        "<[^\\s<>]",       // 0  "<", except "< ", "<<", "<>"
+        ">",               // 1  ">"
+        "<script\\s",      // 2  <script xxxxx>
+        "<script",         // 3  <script>
+        "</script\\s",     // 4  </script xxxx>
+        "</script",        // 5  </script>
+        "<style\\s",       // 6  <style xxxxxx>
+        "<style",          // 7  <style>
+        "</style\\s",      // 8  </style xxxxx>
+        "</style",         // 9  </style>
+        "<!\\[CDATA\\[",   // 10 <![CDATA[xxxxxx]]>
+        "\\]\\]>",         // 11 ]]>
+        "\\s{2,}",         // 12 "   ", continuous blanks
+        "[^\\S ]"          // 13 "\n", "\t" and other white space, it does not include single ' '.
+    };
+std::vector<std::size_t> HxCoarseParseImpl::patterns_length =
+    {
+        2, 1, 8, 7, 9, 8, 7, 6, 8, 7, 9, 3, 0, 1
+    };
+#if USE_HYPERSCAN
+std::vector<unsigned> HxCoarseParseImpl::patterns_flag =
+    {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, HS_FLAG_SOM_LEFTMOST, 0
+    };
+#endif
+std::vector<unsigned> HxCoarseParseImpl::ids =
+    {
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
+    };
+
+class FunctionHtmlOrXmlCoarseParse : public IFunction
+{
+public:
+    static constexpr auto name = "htmlOrXmlCoarseParse";
+
+    static FunctionPtr create(const Context &) {return std::make_shared<FunctionHtmlOrXmlCoarseParse>(); }
+
+    String getName() const override {return name;}
+
+    size_t getNumberOfArguments() const override {return 1;}
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (!isString(arguments[0]))
+            throw Exception(
+                "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        return arguments[0];
+    }
+
+    bool useDefaultImplementationForConstants() const override {return true;}
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & , size_t) const override
+    {
+        const auto & strcolumn = arguments[0].column;
+        if (const ColumnString* html_sentence = checkAndGetColumn<ColumnString>(strcolumn.get()))
+        {
+            auto col_res = ColumnString::create();
+            HxCoarseParseImpl::executeInternal(html_sentence->getChars(), html_sentence->getOffsets(), col_res->getChars(), col_res->getOffsets());
+            return col_res;
+        }
+        else
+        {
+            throw Exception("First argument for function " + getName() + " must be string.", ErrorCodes::ILLEGAL_COLUMN);
+        }
+    }
+};
+}
+
+void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionHtmlOrXmlCoarseParse>();
+}
+
+}
+#endif
diff --git a/src/Functions/registerFunctionsString.cpp b/src/Functions/registerFunctionsString.cpp
index 5cf30dd83a6..b6327dfb92f 100644
--- a/src/Functions/registerFunctionsString.cpp
+++ b/src/Functions/registerFunctionsString.cpp
@@ -6,7 +6,9 @@ namespace DB
 {
 
 class FunctionFactory;
-
+#if USE_HYPERSCAN
+void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory &);
+#endif
 void registerFunctionRepeat(FunctionFactory &);
 void registerFunctionEmpty(FunctionFactory &);
 void registerFunctionNotEmpty(FunctionFactory &);
@@ -45,6 +47,9 @@ void registerFunctionTryBase64Decode(FunctionFactory &);
 
 void registerFunctionsString(FunctionFactory & factory)
 {
+#if USE_HYPERSCAN
+    registerFunctionHtmlOrXmlCoarseParse(factory);
+#endif
     registerFunctionRepeat(factory);
     registerFunctionEmpty(factory);
     registerFunctionNotEmpty(factory);
diff --git a/src/Functions/ya.make b/src/Functions/ya.make
index ea975901077..20ba5f846a3 100644
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@@ -291,6 +291,7 @@ SRCS(
     hasToken.cpp
     hasTokenCaseInsensitive.cpp
     hostName.cpp
+    htmlOrXmlCoarseParse.cpp
     hypot.cpp
     identity.cpp
     if.cpp
diff --git a/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference
new file mode 100644
index 00000000000..63b3707b9b4
--- /dev/null
+++ b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference
@@ -0,0 +1,9 @@
+
+
+Here is CDTATA.
+This is a white space test.
+This is a complex test. <script type="text/javascript">Hello, world</script> world <style> hello
+hello, world
+
+hello, world
+white space collapse
diff --git a/tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
new file mode 100644
index 00000000000..65c243687c1
--- /dev/null
+++ b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
@@ -0,0 +1,15 @@
+SELECT htmlOrXmlCoarseParse('<script>Here is script.</script>');
+SELECT htmlOrXmlCoarseParse('<style>Here is style.</style>');
+SELECT htmlOrXmlCoarseParse('<![CDATA[Here is CDTATA.]]>');
+SELECT htmlOrXmlCoarseParse('This is a     white   space test.');
+SELECT htmlOrXmlCoarseParse('This is a complex test. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><![CDATA[<script type="text/javascript">Hello, world</script> ]]><hello />world<![CDATA[ <style> ]]> hello</style>\n<script><![CDATA[</script>]]>hello</script>\n</html>');
+DROP TABLE IF EXISTS defaults;
+CREATE TABLE defaults
+(
+    stringColumn String
+) ENGINE = Memory();
+
+INSERT INTO defaults values ('<common tag>hello, world<tag>'), ('<script desc=content> some content </script>'), ('<![CDATA[hello, world]]>'), ('white space    collapse');
+
+SELECT htmlOrXmlCoarseParse(stringColumn) FROM defaults;
+DROP table defaults;
diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index b141443a979..5466fb4bfb8 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -197,6 +197,7 @@
 01181_db_atomic_drop_on_cluster
 01658_test_base64Encode_mysql_compatibility
 01659_test_base64Decode_mysql_compatibility
+01674_htm_xml_coarse_parse
 01675_data_type_coroutine
 01676_clickhouse_client_autocomplete
 01671_aggregate_function_group_bitmap_data

From 77fd060665751fc6528dd9f77e0fdea41cbc23bc Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sun, 14 Feb 2021 19:09:36 +0800
Subject: [PATCH 1151/1238] Normalize function names

---
 .../AggregateFunctionFactory.cpp              | 16 +++--
 src/Common/IFactoryWithAliases.h              | 14 ++++
 src/Functions/FunctionFactory.cpp             | 15 +++--
 src/Functions/FunctionsRound.cpp              |  2 +-
 src/Functions/extractAllGroupsVertical.cpp    |  2 +-
 src/Interpreters/FunctionNameNormalizer.cpp   | 18 +++++
 src/Interpreters/FunctionNameNormalizer.h     | 14 ++++
 src/Interpreters/MutationsInterpreter.cpp     |  4 +-
 src/Interpreters/TreeRewriter.cpp             |  4 ++
 src/Interpreters/addTypeConversionToAST.cpp   |  2 +-
 src/Interpreters/inplaceBlockConversions.cpp  |  2 +-
 .../Impl/ConstantExpressionTemplate.cpp       |  2 +-
 tests/integration/test_mysql_protocol/test.py |  2 +-
 .../00597_push_down_predicate.reference       |  2 +-
 .../01029_early_constant_folding.reference    |  2 +-
 ...1611_constant_folding_subqueries.reference |  2 +-
 ..._case_insensitive_function_names.reference | 66 +++++++++++++++++++
 ...malize_case_insensitive_function_names.sql |  1 +
 18 files changed, 151 insertions(+), 19 deletions(-)
 create mode 100644 src/Interpreters/FunctionNameNormalizer.cpp
 create mode 100644 src/Interpreters/FunctionNameNormalizer.h
 create mode 100644 tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference
 create mode 100644 tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql

diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp
index 5fc690d59f2..061077dd8fa 100644
--- a/src/AggregateFunctions/AggregateFunctionFactory.cpp
+++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp
@@ -30,6 +30,10 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+const String & getAggregateFunctionCanonicalNameIfAny(const String & name)
+{
+    return AggregateFunctionFactory::instance().getCanonicalNameIfAny(name);
+}
 
 void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, CaseSensitiveness case_sensitiveness)
 {
@@ -41,10 +45,14 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat
         throw Exception("AggregateFunctionFactory: the aggregate function name '" + name + "' is not unique",
             ErrorCodes::LOGICAL_ERROR);
 
-    if (case_sensitiveness == CaseInsensitive
-        && !case_insensitive_aggregate_functions.emplace(Poco::toLower(name), creator_with_properties).second)
-        throw Exception("AggregateFunctionFactory: the case insensitive aggregate function name '" + name + "' is not unique",
-            ErrorCodes::LOGICAL_ERROR);
+    if (case_sensitiveness == CaseInsensitive)
+    {
+        auto key = Poco::toLower(name);
+        if (!case_insensitive_aggregate_functions.emplace(key, creator_with_properties).second)
+            throw Exception("AggregateFunctionFactory: the case insensitive aggregate function name '" + name + "' is not unique",
+                ErrorCodes::LOGICAL_ERROR);
+        case_insensitive_name_mapping[key] = name;
+    }
 }
 
 static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
diff --git a/src/Common/IFactoryWithAliases.h b/src/Common/IFactoryWithAliases.h
index 49c03049b92..5ef795c92d0 100644
--- a/src/Common/IFactoryWithAliases.h
+++ b/src/Common/IFactoryWithAliases.h
@@ -35,6 +35,8 @@ protected:
             return name;
     }
 
+    std::unordered_map<String, String> case_insensitive_name_mapping;
+
 public:
     /// For compatibility with SQL, it's possible to specify that certain function name is case insensitive.
     enum CaseSensitiveness
@@ -68,9 +70,12 @@ public:
                 factory_name + ": the alias name '" + alias_name + "' is already registered as real name", ErrorCodes::LOGICAL_ERROR);
 
         if (case_sensitiveness == CaseInsensitive)
+        {
             if (!case_insensitive_aliases.emplace(alias_name_lowercase, real_dict_name).second)
                 throw Exception(
                     factory_name + ": case insensitive alias name '" + alias_name + "' is not unique", ErrorCodes::LOGICAL_ERROR);
+            case_insensitive_name_mapping[alias_name_lowercase] = real_name;
+        }
 
         if (!aliases.emplace(alias_name, real_dict_name).second)
             throw Exception(factory_name + ": alias name '" + alias_name + "' is not unique", ErrorCodes::LOGICAL_ERROR);
@@ -111,6 +116,15 @@ public:
         return getMap().count(name) || getCaseInsensitiveMap().count(name) || isAlias(name);
     }
 
+    /// Return the canonical name (the name used in registration) if it's different from `name`.
+    const String & getCanonicalNameIfAny(const String & name) const
+    {
+        auto it = case_insensitive_name_mapping.find(Poco::toLower(name));
+        if (it != case_insensitive_name_mapping.end())
+            return it->second;
+        return name;
+    }
+
     virtual ~IFactoryWithAliases() override {}
 
 private:
diff --git a/src/Functions/FunctionFactory.cpp b/src/Functions/FunctionFactory.cpp
index 768f1cfe487..09fd360a925 100644
--- a/src/Functions/FunctionFactory.cpp
+++ b/src/Functions/FunctionFactory.cpp
@@ -21,6 +21,10 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+const String & getFunctionCanonicalNameIfAny(const String & name)
+{
+    return FunctionFactory::instance().getCanonicalNameIfAny(name);
+}
 
 void FunctionFactory::registerFunction(const
     std::string & name,
@@ -36,10 +40,13 @@ void FunctionFactory::registerFunction(const
         throw Exception("FunctionFactory: the function name '" + name + "' is already registered as alias",
                         ErrorCodes::LOGICAL_ERROR);
 
-    if (case_sensitiveness == CaseInsensitive
-        && !case_insensitive_functions.emplace(function_name_lowercase, creator).second)
-        throw Exception("FunctionFactory: the case insensitive function name '" + name + "' is not unique",
-                        ErrorCodes::LOGICAL_ERROR);
+    if (case_sensitiveness == CaseInsensitive)
+    {
+        if (!case_insensitive_functions.emplace(function_name_lowercase, creator).second)
+            throw Exception("FunctionFactory: the case insensitive function name '" + name + "' is not unique",
+                ErrorCodes::LOGICAL_ERROR);
+        case_insensitive_name_mapping[function_name_lowercase] = name;
+    }
 }
 
 
diff --git a/src/Functions/FunctionsRound.cpp b/src/Functions/FunctionsRound.cpp
index b1349bd2164..c5ad27a0b90 100644
--- a/src/Functions/FunctionsRound.cpp
+++ b/src/Functions/FunctionsRound.cpp
@@ -8,7 +8,7 @@ namespace DB
 void registerFunctionsRound(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionRound>("round", FunctionFactory::CaseInsensitive);
-    factory.registerFunction<FunctionRoundBankers>("roundBankers", FunctionFactory::CaseInsensitive);
+    factory.registerFunction<FunctionRoundBankers>("roundBankers", FunctionFactory::CaseSensitive);
     factory.registerFunction<FunctionFloor>("floor", FunctionFactory::CaseInsensitive);
     factory.registerFunction<FunctionCeil>("ceil", FunctionFactory::CaseInsensitive);
     factory.registerFunction<FunctionTrunc>("trunc", FunctionFactory::CaseInsensitive);
diff --git a/src/Functions/extractAllGroupsVertical.cpp b/src/Functions/extractAllGroupsVertical.cpp
index 9cbd148b016..bf33eef70f3 100644
--- a/src/Functions/extractAllGroupsVertical.cpp
+++ b/src/Functions/extractAllGroupsVertical.cpp
@@ -18,7 +18,7 @@ namespace DB
 void registerFunctionExtractAllGroupsVertical(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionExtractAllGroups<VerticalImpl>>();
-    factory.registerAlias("extractAllGroups", VerticalImpl::Name, FunctionFactory::CaseInsensitive);
+    factory.registerAlias("extractAllGroups", VerticalImpl::Name, FunctionFactory::CaseSensitive);
 }
 
 }
diff --git a/src/Interpreters/FunctionNameNormalizer.cpp b/src/Interpreters/FunctionNameNormalizer.cpp
new file mode 100644
index 00000000000..f22f72b5e03
--- /dev/null
+++ b/src/Interpreters/FunctionNameNormalizer.cpp
@@ -0,0 +1,18 @@
+#include <Interpreters/FunctionNameNormalizer.h>
+
+namespace DB
+{
+
+const String & getFunctionCanonicalNameIfAny(const String & name);
+const String & getAggregateFunctionCanonicalNameIfAny(const String & name);
+
+void FunctionNameNormalizer::visit(ASTPtr & ast)
+{
+    if (auto * node_func = ast->as<ASTFunction>())
+        node_func->name = getAggregateFunctionCanonicalNameIfAny(getFunctionCanonicalNameIfAny(node_func->name));
+
+    for (auto & child : ast->children)
+        visit(child);
+}
+
+}
diff --git a/src/Interpreters/FunctionNameNormalizer.h b/src/Interpreters/FunctionNameNormalizer.h
new file mode 100644
index 00000000000..2b20c28bce0
--- /dev/null
+++ b/src/Interpreters/FunctionNameNormalizer.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <Parsers/IAST.h>
+#include <Parsers/ASTFunction.h>
+
+namespace DB
+{
+
+struct FunctionNameNormalizer
+{
+    static void visit(ASTPtr &);
+};
+
+}
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 528b5ec6d8e..c393b214ee8 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -442,10 +442,10 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
                 auto type_literal = std::make_shared<ASTLiteral>(columns_desc.getPhysical(column).type->getName());
 
                 const auto & update_expr = kv.second;
-                auto updated_column = makeASTFunction("cast",
+                auto updated_column = makeASTFunction("CAST",
                     makeASTFunction("if",
                         getPartitionAndPredicateExpressionForMutationCommand(command),
-                        makeASTFunction("cast",
+                        makeASTFunction("CAST",
                             update_expr->clone(),
                             type_literal),
                         std::make_shared<ASTIdentifier>(column)),
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index fd87d86bf97..cf4db8f174e 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -8,6 +8,7 @@
 #include <Interpreters/ArrayJoinedColumnsVisitor.h>
 #include <Interpreters/TranslateQualifiedNamesVisitor.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/FunctionNameNormalizer.h>
 #include <Interpreters/MarkTableIdentifiersVisitor.h>
 #include <Interpreters/QueryNormalizer.h>
 #include <Interpreters/ExecuteScalarSubqueriesVisitor.h>
@@ -934,6 +935,9 @@ void TreeRewriter::normalize(ASTPtr & query, Aliases & aliases, const Settings &
     MarkTableIdentifiersVisitor::Data identifiers_data{aliases};
     MarkTableIdentifiersVisitor(identifiers_data).visit(query);
 
+    /// Rewrite function names to their canonical ones.
+    FunctionNameNormalizer().visit(query);
+
     /// Common subexpression elimination. Rewrite rules.
     QueryNormalizer::Data normalizer_data(aliases, settings);
     QueryNormalizer(normalizer_data).visit(query);
diff --git a/src/Interpreters/addTypeConversionToAST.cpp b/src/Interpreters/addTypeConversionToAST.cpp
index bb42ad79daa..18591fd732c 100644
--- a/src/Interpreters/addTypeConversionToAST.cpp
+++ b/src/Interpreters/addTypeConversionToAST.cpp
@@ -20,7 +20,7 @@ namespace ErrorCodes
 
 ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
 {
-    auto func = makeASTFunction("cast", ast, std::make_shared<ASTLiteral>(type_name));
+    auto func = makeASTFunction("CAST", ast, std::make_shared<ASTLiteral>(type_name));
 
     if (ASTWithAlias * ast_with_alias = dynamic_cast<ASTWithAlias *>(ast.get()))
     {
diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp
index eba03d7aa61..c9a96a81b48 100644
--- a/src/Interpreters/inplaceBlockConversions.cpp
+++ b/src/Interpreters/inplaceBlockConversions.cpp
@@ -43,7 +43,7 @@ void addDefaultRequiredExpressionsRecursively(const Block & block, const String
         RequiredSourceColumnsVisitor(columns_context).visit(column_default_expr);
         NameSet required_columns_names = columns_context.requiredColumns();
 
-        auto cast_func = makeASTFunction("cast", column_default_expr, std::make_shared<ASTLiteral>(columns.get(required_column).type->getName()));
+        auto cast_func = makeASTFunction("CAST", column_default_expr, std::make_shared<ASTLiteral>(columns.get(required_column).type->getName()));
         default_expr_list_accum->children.emplace_back(setAlias(cast_func, required_column));
         added_columns.emplace(required_column);
 
diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
index d7a65c2f15d..1685688f02d 100644
--- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
+++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
@@ -626,7 +626,7 @@ void ConstantExpressionTemplate::TemplateStructure::addNodesToCastResult(const I
         expr = makeASTFunction("assumeNotNull", std::move(expr));
     }
 
-    expr = makeASTFunction("cast", std::move(expr), std::make_shared<ASTLiteral>(result_column_type.getName()));
+    expr = makeASTFunction("CAST", std::move(expr), std::make_shared<ASTLiteral>(result_column_type.getName()));
 
     if (null_as_default)
     {
diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py
index 9532d4b8ba2..7f7d59674bc 100644
--- a/tests/integration/test_mysql_protocol/test.py
+++ b/tests/integration/test_mysql_protocol/test.py
@@ -217,7 +217,7 @@ def test_mysql_replacement_query(mysql_client, server_address):
         --password=123 -e "select database();"
     '''.format(host=server_address, port=server_port), demux=True)
     assert code == 0
-    assert stdout.decode() == 'database()\ndefault\n'
+    assert stdout.decode() == 'DATABASE()\ndefault\n'
 
     code, (stdout, stderr) = mysql_client.exec_run('''
         mysql --protocol tcp -h {host} -P {port} default -u default
diff --git a/tests/queries/0_stateless/00597_push_down_predicate.reference b/tests/queries/0_stateless/00597_push_down_predicate.reference
index 794d9e7af5f..bd1c4791df4 100644
--- a/tests/queries/0_stateless/00597_push_down_predicate.reference
+++ b/tests/queries/0_stateless/00597_push_down_predicate.reference
@@ -114,7 +114,7 @@ FROM
 (
     SELECT
         1 AS id,
-        identity(cast(1, \'UInt8\')) AS subquery
+        identity(CAST(1, \'UInt8\')) AS subquery
     WHERE subquery = 1
 )
 WHERE subquery = 1
diff --git a/tests/queries/0_stateless/01029_early_constant_folding.reference b/tests/queries/0_stateless/01029_early_constant_folding.reference
index 8a1d4cec388..8a2d7e6c61a 100644
--- a/tests/queries/0_stateless/01029_early_constant_folding.reference
+++ b/tests/queries/0_stateless/01029_early_constant_folding.reference
@@ -2,7 +2,7 @@ SELECT 1
 WHERE 0
 SELECT 1
 SELECT 1
-WHERE (1 IN (0, 2)) AND (2 = (identity(cast(2, \'UInt8\')) AS subquery))
+WHERE (1 IN (0, 2)) AND (2 = (identity(CAST(2, \'UInt8\')) AS subquery))
 SELECT 1
 WHERE 1 IN (
 (
diff --git a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference
index d10502c5860..e46fd479413 100644
--- a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference
+++ b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference
@@ -5,7 +5,7 @@ SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) FO
 1,10
 EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n);
 SELECT
-    identity(cast(0, \'UInt64\')) AS n,
+    identity(CAST(0, \'UInt64\')) AS n,
     toUInt64(10 / n)
 SELECT * FROM (WITH (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) as q SELECT * FROM system.one WHERE q > 0);
 0
diff --git a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference
new file mode 100644
index 00000000000..5b0f7bdeb2d
--- /dev/null
+++ b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference
@@ -0,0 +1,66 @@
+SELECT
+    CAST(1, 'INT'),
+    ceil(1),
+    ceil(1),
+    char(49),
+    CHAR_LENGTH('1'),
+    CHARACTER_LENGTH('1'),
+    coalesce(1),
+    concat('1', '1'),
+    corr(1, 1),
+    cos(1),
+    count(),
+    covarPop(1, 1),
+    covarSamp(1, 1),
+    DATABASE(),
+    dateDiff('DAY', toDate('2020-10-24'), toDate('2019-10-24')),
+    exp(1),
+    arrayFlatten([[1]]),
+    floor(1),
+    FQDN(),
+    greatest(1),
+    1,
+    ifNull(1, 1),
+    lower('A'),
+    least(1),
+    length('1'),
+    log(1),
+    position('1', '1'),
+    log(1),
+    log10(1),
+    log2(1),
+    lower('A'),
+    max(1),
+    substring('123', 1, 1),
+    min(1),
+    1 % 1,
+    NOT 1,
+    now(),
+    now64(),
+    nullIf(1, 1),
+    pi(),
+    position('123', '2'),
+    pow(1, 1),
+    pow(1, 1),
+    rand(),
+    replaceAll('1', '1', '2'),
+    reverse('123'),
+    round(1),
+    sin(1),
+    sqrt(1),
+    stddevPop(1),
+    stddevSamp(1),
+    substring('123', 2),
+    substring('123', 2),
+    count(),
+    tan(1),
+    tanh(1),
+    trunc(1),
+    trunc(1),
+    upper('A'),
+    upper('A'),
+    currentUser(),
+    varPop(1),
+    varSamp(1),
+    toWeek(toDate('2020-10-24')),
+    toYearWeek(toDate('2020-10-24'))
diff --git a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql
new file mode 100644
index 00000000000..9b35087182c
--- /dev/null
+++ b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql
@@ -0,0 +1 @@
+EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH('1'), CHARACTER_LENGTH('1'), COALESCE(1), CONCAT('1', '1'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), DATEDIFF('DAY', toDate('2020-10-24'), toDate('2019-10-24')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE('A'), LEAST(1), LENGTH('1'), LN(1), LOCATE('1', '1'), LOG(1), LOG10(1), LOG2(1), LOWER('A'), MAX(1), MID('123', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION('123', '2'), POW(1, 1), POWER(1, 1), RAND(), REPLACE('1', '1', '2'), REVERSE('123'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR('123', 2), SUBSTRING('123', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE('A'), UPPER('A'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate('2020-10-24')), YEARWEEK(toDate('2020-10-24')) format TSVRaw;

From 2dc7ba160a3bdc61765b12336edf753a0100f923 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sun, 14 Feb 2021 20:53:50 +0800
Subject: [PATCH 1152/1238] Better

---
 src/Interpreters/FunctionNameNormalizer.cpp   | 27 +++++++++++++++++--
 src/Interpreters/FunctionNameNormalizer.h     |  2 +-
 src/Interpreters/InterpreterCreateQuery.cpp   |  2 ++
 ...OptimizeIfWithConstantConditionVisitor.cpp |  2 +-
 src/Interpreters/TreeRewriter.cpp             |  2 +-
 src/Interpreters/inplaceBlockConversions.cpp  |  2 +-
 src/Parsers/ExpressionElementParsers.cpp      |  2 +-
 ...56_test_query_log_factories_info.reference |  2 +-
 8 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/src/Interpreters/FunctionNameNormalizer.cpp b/src/Interpreters/FunctionNameNormalizer.cpp
index f22f72b5e03..36ccc9340ea 100644
--- a/src/Interpreters/FunctionNameNormalizer.cpp
+++ b/src/Interpreters/FunctionNameNormalizer.cpp
@@ -1,18 +1,41 @@
 #include <Interpreters/FunctionNameNormalizer.h>
 
+#include <Parsers/ASTColumnDeclaration.h>
+#include <Parsers/ASTCreateQuery.h>
+
 namespace DB
 {
 
 const String & getFunctionCanonicalNameIfAny(const String & name);
 const String & getAggregateFunctionCanonicalNameIfAny(const String & name);
 
-void FunctionNameNormalizer::visit(ASTPtr & ast)
+void FunctionNameNormalizer::visit(IAST * ast)
 {
+    if (!ast)
+        return;
+
+    if (auto * node_storage = ast->as<ASTStorage>())
+    {
+        visit(node_storage->partition_by);
+        visit(node_storage->primary_key);
+        visit(node_storage->order_by);
+        visit(node_storage->sample_by);
+        visit(node_storage->ttl_table);
+        return;
+    }
+
+    if (auto * node_decl = ast->as<ASTColumnDeclaration>())
+    {
+        visit(node_decl->default_expression.get());
+        visit(node_decl->ttl.get());
+        return;
+    }
+
     if (auto * node_func = ast->as<ASTFunction>())
         node_func->name = getAggregateFunctionCanonicalNameIfAny(getFunctionCanonicalNameIfAny(node_func->name));
 
     for (auto & child : ast->children)
-        visit(child);
+        visit(child.get());
 }
 
 }
diff --git a/src/Interpreters/FunctionNameNormalizer.h b/src/Interpreters/FunctionNameNormalizer.h
index 2b20c28bce0..3f22bb2f627 100644
--- a/src/Interpreters/FunctionNameNormalizer.h
+++ b/src/Interpreters/FunctionNameNormalizer.h
@@ -8,7 +8,7 @@ namespace DB
 
 struct FunctionNameNormalizer
 {
-    static void visit(ASTPtr &);
+    static void visit(IAST *);
 };
 
 }
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index e9a11b9eb0d..bc38d4e3821 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -56,6 +56,7 @@
 #include <Interpreters/InterpreterDropQuery.h>
 #include <Interpreters/QueryLog.h>
 #include <Interpreters/addTypeConversionToAST.h>
+#include <Interpreters/FunctionNameNormalizer.h>
 
 #include <TableFunctions/TableFunctionFactory.h>
 #include <common/logger_useful.h>
@@ -1118,6 +1119,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, cons
 
 BlockIO InterpreterCreateQuery::execute()
 {
+    FunctionNameNormalizer().visit(query_ptr.get());
     auto & create = query_ptr->as<ASTCreateQuery &>();
     if (!create.cluster.empty())
     {
diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp
index dee4c69118b..cdcf6f7dddd 100644
--- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp
+++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp
@@ -29,7 +29,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v
     /// cast of numeric constant in condition to UInt8
     if (const auto * function = condition->as<ASTFunction>())
     {
-        if (function->name == "cast")
+        if (function->name == "CAST")
         {
             if (const auto * expr_list = function->arguments->as<ASTExpressionList>())
             {
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index cf4db8f174e..7b1a960d435 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -936,7 +936,7 @@ void TreeRewriter::normalize(ASTPtr & query, Aliases & aliases, const Settings &
     MarkTableIdentifiersVisitor(identifiers_data).visit(query);
 
     /// Rewrite function names to their canonical ones.
-    FunctionNameNormalizer().visit(query);
+    FunctionNameNormalizer().visit(query.get());
 
     /// Common subexpression elimination. Rewrite rules.
     QueryNormalizer::Data normalizer_data(aliases, settings);
diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp
index c9a96a81b48..d06cde99425 100644
--- a/src/Interpreters/inplaceBlockConversions.cpp
+++ b/src/Interpreters/inplaceBlockConversions.cpp
@@ -79,7 +79,7 @@ ASTPtr convertRequiredExpressions(Block & block, const NamesAndTypesList & requi
             continue;
 
         auto cast_func = makeASTFunction(
-            "cast", std::make_shared<ASTIdentifier>(required_column.name), std::make_shared<ASTLiteral>(required_column.type->getName()));
+            "CAST", std::make_shared<ASTIdentifier>(required_column.name), std::make_shared<ASTLiteral>(required_column.type->getName()));
 
         conversion_expr_list->children.emplace_back(setAlias(cast_func, required_column.name));
 
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 3d868812304..7a426e7774d 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -864,7 +864,7 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
         expr_list_args->children.push_back(std::move(type_literal));
 
         auto func_node = std::make_shared<ASTFunction>();
-        func_node->name = "cast";
+        func_node->name = "CAST";
         func_node->arguments = std::move(expr_list_args);
         func_node->children.push_back(func_node->arguments);
 
diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.reference b/tests/queries/0_stateless/01656_test_query_log_factories_info.reference
index 3c93cd9ec26..324890c0a5a 100644
--- a/tests/queries/0_stateless/01656_test_query_log_factories_info.reference
+++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.reference
@@ -11,7 +11,7 @@ arraySort(used_table_functions)
 ['numbers']
 
 arraySort(used_functions)
-['addDays','array','arrayFlatten','cast','modulo','plus','substring','toDate','toDayOfYear','toTypeName','toWeek']
+['CAST','addDays','array','arrayFlatten','modulo','plus','substring','toDate','toDayOfYear','toTypeName','toWeek']
 
 arraySort(used_data_type_families)
 ['Array','Int32','Nullable','String']

From cac9c7fc079835b4e26cf2b5ff8ad776b1369c5d Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 15 Feb 2021 00:00:47 +0800
Subject: [PATCH 1153/1238] Fix tests

---
 tests/queries/0_stateless/00642_cast.reference           | 4 ++--
 tests/queries/0_stateless/00643_cast_zookeeper.reference | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/00642_cast.reference b/tests/queries/0_stateless/00642_cast.reference
index 3d5572932fb..7f5333f590e 100644
--- a/tests/queries/0_stateless/00642_cast.reference
+++ b/tests/queries/0_stateless/00642_cast.reference
@@ -10,11 +10,11 @@ hello
 CREATE TABLE default.cast
 (
     `x` UInt8,
-    `e` Enum8('hello' = 1, 'world' = 2) DEFAULT cast(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')
+    `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')
 )
 ENGINE = MergeTree
 ORDER BY e
 SETTINGS index_granularity = 8192
 x	UInt8					
-e	Enum8(\'hello\' = 1, \'world\' = 2)	DEFAULT	cast(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\')			
+e	Enum8(\'hello\' = 1, \'world\' = 2)	DEFAULT	CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\')			
 1	hello
diff --git a/tests/queries/0_stateless/00643_cast_zookeeper.reference b/tests/queries/0_stateless/00643_cast_zookeeper.reference
index 658233be742..9123463de1a 100644
--- a/tests/queries/0_stateless/00643_cast_zookeeper.reference
+++ b/tests/queries/0_stateless/00643_cast_zookeeper.reference
@@ -1,12 +1,12 @@
 CREATE TABLE default.cast1
 (
     `x` UInt8,
-    `e` Enum8('hello' = 1, 'world' = 2) DEFAULT cast(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')
+    `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')
 )
 ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00643/cast', 'r1')
 ORDER BY e
 SETTINGS index_granularity = 8192
 x	UInt8					
-e	Enum8(\'hello\' = 1, \'world\' = 2)	DEFAULT	cast(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\')			
+e	Enum8(\'hello\' = 1, \'world\' = 2)	DEFAULT	CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\')			
 1	hello
 1	hello

From f402aa4057814078b7b7ef2e0175ab2753d2bced Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Wed, 17 Feb 2021 23:36:37 +0800
Subject: [PATCH 1154/1238] Normalize constant expression

---
 src/Interpreters/evaluateConstantExpression.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp
index 02ef3426483..70b9baa544f 100644
--- a/src/Interpreters/evaluateConstantExpression.cpp
+++ b/src/Interpreters/evaluateConstantExpression.cpp
@@ -15,6 +15,7 @@
 #include <Parsers/ExpressionElementParsers.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Common/typeid_cast.h>
+#include <Interpreters/FunctionNameNormalizer.h>
 #include <Interpreters/ReplaceQueryParameterVisitor.h>
 #include <Poco/Util/AbstractConfiguration.h>
 
@@ -35,6 +36,7 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
     auto ast = node->clone();
     ReplaceQueryParameterVisitor param_visitor(context.getQueryParameters());
     param_visitor.visit(ast);
+    FunctionNameNormalizer().visit(ast.get());
     String name = ast->getColumnName();
     auto syntax_result = TreeRewriter(context).analyze(ast, source_columns);
     ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions();

From 2c4bc43014c510292340954647fbebf0f72620e9 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Thu, 18 Feb 2021 11:27:24 +0800
Subject: [PATCH 1155/1238] Backward compatible

---
 src/Core/Settings.h                             | 1 +
 src/Interpreters/TreeRewriter.cpp               | 3 ++-
 src/Interpreters/evaluateConstantExpression.cpp | 5 ++++-
 src/Server/TCPHandler.cpp                       | 6 ++++++
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 9bb9ad30f15..4c5fe93bb03 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -383,6 +383,7 @@ class IColumn;
     M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
     M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
     M(Bool, optimize_monotonous_functions_in_order_by, true, "Replace monotonous function with its argument in ORDER BY", 0) \
+    M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
     M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
     M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
     M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 7b1a960d435..37f49874e0a 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -936,7 +936,8 @@ void TreeRewriter::normalize(ASTPtr & query, Aliases & aliases, const Settings &
     MarkTableIdentifiersVisitor(identifiers_data).visit(query);
 
     /// Rewrite function names to their canonical ones.
-    FunctionNameNormalizer().visit(query.get());
+    if (settings.normalize_function_names)
+        FunctionNameNormalizer().visit(query.get());
 
     /// Common subexpression elimination. Rewrite rules.
     QueryNormalizer::Data normalizer_data(aliases, settings);
diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp
index 70b9baa544f..42e96bae07b 100644
--- a/src/Interpreters/evaluateConstantExpression.cpp
+++ b/src/Interpreters/evaluateConstantExpression.cpp
@@ -36,7 +36,10 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
     auto ast = node->clone();
     ReplaceQueryParameterVisitor param_visitor(context.getQueryParameters());
     param_visitor.visit(ast);
-    FunctionNameNormalizer().visit(ast.get());
+
+    if (context.getSettingsRef().normalize_function_names)
+        FunctionNameNormalizer().visit(ast.get());
+
     String name = ast->getColumnName();
     auto syntax_result = TreeRewriter(context).analyze(ast, source_columns);
     ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions();
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index c207d188a85..430a01bb97a 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1133,6 +1133,12 @@ void TCPHandler::receiveQuery()
     }
     query_context->applySettingsChanges(settings_changes);
 
+    /// Disable function name normalization it's not an initial query.
+    if (client_info.query_kind != ClientInfo::QueryKind::INITIAL_QUERY)
+    {
+        query_context->setSetting("normalize_function_names", Field(0));
+    }
+
     // Use the received query id, or generate a random default. It is convenient
     // to also generate the default OpenTelemetry trace id at the same time, and
     // set the trace parent.

From 97f4c457ec979fc489892472dfb50a93062b4ce5 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Thu, 18 Feb 2021 16:27:51 +0300
Subject: [PATCH 1156/1238] fix MySQL COMM_FIELD_LIST response

---
 docker/test/fasttest/run.sh                   |  1 +
 docker/test/stateless/Dockerfile              |  3 ++-
 src/Core/MySQL/PacketsProtocolText.cpp        | 22 +++++++++++++---
 src/Core/MySQL/PacketsProtocolText.h          |  5 +++-
 src/Server/MySQLHandler.cpp                   |  2 +-
 .../01176_mysql_client_interactive.expect     | 26 +++++++++++++++++++
 .../01176_mysql_client_interactive.reference  |  0
 tests/queries/shell_config.sh                 | 13 ++++++++++
 8 files changed, 65 insertions(+), 7 deletions(-)
 create mode 100755 tests/queries/0_stateless/01176_mysql_client_interactive.expect
 create mode 100644 tests/queries/0_stateless/01176_mysql_client_interactive.reference

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index e6294b5d74d..7e7c8116901 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -259,6 +259,7 @@ function run_tests
         00929_multi_match_edit_distance
         01681_hyperscan_debug_assertion
 
+        01176_mysql_client_interactive          # requires mysql client
         01031_mutations_interpreter_and_context
         01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
         01083_expressions_in_engine_arguments
diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile
index b063f8d81f6..f2e3016692f 100644
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@@ -23,7 +23,8 @@ RUN apt-get update -y \
             telnet \
             tree \
             unixodbc \
-            wget
+            wget \
+            mysql-client-5.7
 
 RUN pip3 install numpy scipy pandas
 
diff --git a/src/Core/MySQL/PacketsProtocolText.cpp b/src/Core/MySQL/PacketsProtocolText.cpp
index ad34cd8c28d..62efe549b33 100644
--- a/src/Core/MySQL/PacketsProtocolText.cpp
+++ b/src/Core/MySQL/PacketsProtocolText.cpp
@@ -62,10 +62,10 @@ ColumnDefinition::ColumnDefinition()
 
 ColumnDefinition::ColumnDefinition(
     String schema_, String table_, String org_table_, String name_, String org_name_, uint16_t character_set_, uint32_t column_length_,
-    ColumnType column_type_, uint16_t flags_, uint8_t decimals_)
+    ColumnType column_type_, uint16_t flags_, uint8_t decimals_, bool with_defaults_)
     : schema(std::move(schema_)), table(std::move(table_)), org_table(std::move(org_table_)), name(std::move(name_)),
       org_name(std::move(org_name_)), character_set(character_set_), column_length(column_length_), column_type(column_type_),
-      flags(flags_), decimals(decimals_)
+      flags(flags_), decimals(decimals_), is_comm_field_list_response(with_defaults_)
 {
 }
 
@@ -77,8 +77,15 @@ ColumnDefinition::ColumnDefinition(
 
 size_t ColumnDefinition::getPayloadSize() const
 {
-    return 12 + getLengthEncodedStringSize("def") + getLengthEncodedStringSize(schema) + getLengthEncodedStringSize(table) + getLengthEncodedStringSize(org_table) + \
-            getLengthEncodedStringSize(name) + getLengthEncodedStringSize(org_name) + getLengthEncodedNumberSize(next_length);
+    return 12 +
+           getLengthEncodedStringSize("def") +
+           getLengthEncodedStringSize(schema) +
+           getLengthEncodedStringSize(table) +
+           getLengthEncodedStringSize(org_table) +
+           getLengthEncodedStringSize(name) +
+           getLengthEncodedStringSize(org_name) +
+           getLengthEncodedNumberSize(next_length) +
+           is_comm_field_list_response;
 }
 
 void ColumnDefinition::readPayloadImpl(ReadBuffer & payload)
@@ -115,6 +122,13 @@ void ColumnDefinition::writePayloadImpl(WriteBuffer & buffer) const
     buffer.write(reinterpret_cast<const char *>(&flags), 2);
     buffer.write(reinterpret_cast<const char *>(&decimals), 1);
     writeChar(0x0, 2, buffer);
+    if (is_comm_field_list_response)
+    {
+        /// We should write length encoded int with string size
+        /// followed by string with some "default values" (possibly it's column defaults).
+        /// But we just send NULL for simplicity.
+        writeChar(0xfb, buffer);
+    }
 }
 
 ColumnDefinition getColumnDefinition(const String & column_name, const TypeIndex type_index)
diff --git a/src/Core/MySQL/PacketsProtocolText.h b/src/Core/MySQL/PacketsProtocolText.h
index d449e94cff1..b54b1c5ca19 100644
--- a/src/Core/MySQL/PacketsProtocolText.h
+++ b/src/Core/MySQL/PacketsProtocolText.h
@@ -101,6 +101,9 @@ public:
     ColumnType column_type;
     uint16_t flags;
     uint8_t decimals = 0x00;
+    /// https://dev.mysql.com/doc/internals/en/com-query-response.html#column-definition
+    /// There are extra fields in the packet for column defaults
+    bool is_comm_field_list_response = false;
 
 protected:
     size_t getPayloadSize() const override;
@@ -114,7 +117,7 @@ public:
 
     ColumnDefinition(
         String schema_, String table_, String org_table_, String name_, String org_name_, uint16_t character_set_, uint32_t column_length_,
-        ColumnType column_type_, uint16_t flags_, uint8_t decimals_);
+        ColumnType column_type_, uint16_t flags_, uint8_t decimals_, bool with_defaults_ = false);
 
     /// Should be used when column metadata (original name, table, original table, database) is unknown.
     ColumnDefinition(
diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp
index 3cbe285615e..ea2813cf639 100644
--- a/src/Server/MySQLHandler.cpp
+++ b/src/Server/MySQLHandler.cpp
@@ -289,7 +289,7 @@ void MySQLHandler::comFieldList(ReadBuffer & payload)
     for (const NameAndTypePair & column : metadata_snapshot->getColumns().getAll())
     {
         ColumnDefinition column_definition(
-            database, packet.table, packet.table, column.name, column.name, CharacterSet::binary, 100, ColumnType::MYSQL_TYPE_STRING, 0, 0
+            database, packet.table, packet.table, column.name, column.name, CharacterSet::binary, 100, ColumnType::MYSQL_TYPE_STRING, 0, 0, true
         );
         packet_endpoint->sendPacket(column_definition);
     }
diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.expect b/tests/queries/0_stateless/01176_mysql_client_interactive.expect
new file mode 100755
index 00000000000..d592bbe1ce2
--- /dev/null
+++ b/tests/queries/0_stateless/01176_mysql_client_interactive.expect
@@ -0,0 +1,26 @@
+#!/usr/bin/expect -f
+
+log_user 0
+set timeout 5
+match_max 100000
+# A default timeout action is to do nothing, change it to fail
+expect_after {
+    timeout {
+        exit 1
+    }
+}
+
+set basedir [file dirname $argv0]
+spawn bash -c "source $basedir/../shell_config.sh ; \$MYSQL_CLIENT_BINARY \$MYSQL_CLIENT_OPT"
+expect "mysql> "
+
+send -- "USE system;\r"
+expect "Database changed"
+
+send -- "SELECT * FROM one;\r"
+expect "| dummy |"
+expect "|     0 |"
+expect "1 row in set"
+
+send -- "quit;\r"
+expect eof
diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.reference b/tests/queries/0_stateless/01176_mysql_client_interactive.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh
index eed77fb107d..d20b5669cc5 100644
--- a/tests/queries/shell_config.sh
+++ b/tests/queries/shell_config.sh
@@ -54,6 +54,8 @@ export CLICKHOUSE_PORT_HTTP=${CLICKHOUSE_PORT_HTTP:="8123"}
 export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:=$(${CLICKHOUSE_EXTRACT_CONFIG} --try --key=https_port 2>/dev/null)} 2>/dev/null
 export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:="8443"}
 export CLICKHOUSE_PORT_HTTP_PROTO=${CLICKHOUSE_PORT_HTTP_PROTO:="http"}
+export CLICKHOUSE_PORT_MYSQL=${CLICKHOUSE_PORT_MYSQL:=$(${CLICKHOUSE_EXTRACT_CONFIG} --try --key=mysql_port 2>/dev/null)} 2>/dev/null
+export CLICKHOUSE_PORT_MYSQL=${CLICKHOUSE_PORT_MYSQL:="9004"}
 
 # Add database and log comment to url params
 if [ -v CLICKHOUSE_URL_PARAMS ]
@@ -87,6 +89,17 @@ export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="${CLICKHOUSE_CURL_COMMAND} -q -s --ma
 export CLICKHOUSE_TMP=${CLICKHOUSE_TMP:="."}
 mkdir -p ${CLICKHOUSE_TMP}
 
+export MYSQL_CLIENT_BINARY=${MYSQL_CLIENT_BINARY:="mysql"}
+export MYSQL_CLIENT_CLICKHOUSE_USER=${MYSQL_CLIENT_CLICKHOUSE_USER:="default"}
+# Avoids "Can't connect to local MySQL server through socket '/var/run/mysqld/mysqld.sock'" when connecting to localhost
+[ -v CLICKHOUSE_HOST ] && MYSQL_CLIENT_OPT0+=" --protocol tcp "
+[ -v CLICKHOUSE_HOST ] && MYSQL_CLIENT_OPT0+=" --host ${CLICKHOUSE_HOST} "
+[ -v CLICKHOUSE_PORT_MYSQL ] && MYSQL_CLIENT_OPT0+=" --port ${CLICKHOUSE_PORT_MYSQL} "
+[ -v CLICKHOUSE_DATABASE ] && MYSQL_CLIENT_OPT0+=" --database ${CLICKHOUSE_DATABASE} "
+MYSQL_CLIENT_OPT0+=" --user ${MYSQL_CLIENT_CLICKHOUSE_USER} "
+export MYSQL_CLIENT_OPT="${MYSQL_CLIENT_OPT0:-} ${MYSQL_CLIENT_OPT:-}"
+export MYSQL_CLIENT=${MYSQL_CLIENT:="$MYSQL_CLIENT_BINARY ${MYSQL_CLIENT_OPT:-}"}
+
 function clickhouse_client_removed_host_parameter()
 {
     # removing only `--host=value` and `--host value` (removing '-hvalue' feels to dangerous) with python regex.

From 1bad1e3a7ca49af3c990999ae414bc1bcc4fc3ea Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Thu, 18 Feb 2021 17:37:51 +0300
Subject: [PATCH 1157/1238] fix dockerfile

---
 docker/test/stateless/Dockerfile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile
index f2e3016692f..ba3355db89b 100644
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@@ -3,6 +3,9 @@ FROM yandex/clickhouse-test-base
 
 ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz"
 
+RUN echo "deb [trusted=yes] http://repo.mysql.com/apt/ubuntu/ bionic mysql-5.7" >> /etc/apt/sources.list \
+    && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 8C718D3B5072E1F5
+
 RUN apt-get update -y \
     && env DEBIAN_FRONTEND=noninteractive \
         apt-get install --yes --no-install-recommends \
@@ -24,7 +27,7 @@ RUN apt-get update -y \
             tree \
             unixodbc \
             wget \
-            mysql-client-5.7
+            mysql-client=5.7*
 
 RUN pip3 install numpy scipy pandas
 

From b854a7b7f8e80b9701b02e5218e37965631541f7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 18 Feb 2021 21:41:50 +0300
Subject: [PATCH 1158/1238] Add some details into comment for
 first_stage/second_stage

Regardless
distributed_group_by_no_merge=2/optimize_distributed_group_by_sharding_key
---
 src/Interpreters/InterpreterSelectQuery.cpp | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 9f97160f77f..9f48a9a193b 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -561,10 +561,20 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
     if (storage && !options.only_analyze)
         from_stage = storage->getQueryProcessingStage(*context, options.to_stage, query_info);
 
-    /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing.
+    /// Do I need to perform the first part of the pipeline?
+    /// Running on remote servers during distributed processing or if query is not distributed.
+    ///
+    /// Also note that with distributed_group_by_no_merge=1 or when there is
+    /// only one remote server, it is equal to local query in terms of query
+    /// stages (or when due to optimize_distributed_group_by_sharding_key the query was processed up to Complete stage).
     bool first_stage = from_stage < QueryProcessingStage::WithMergeableState
         && options.to_stage >= QueryProcessingStage::WithMergeableState;
-    /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing.
+    /// Do I need to execute the second part of the pipeline?
+    /// Running on the initiating server during distributed processing or if query is not distributed.
+    ///
+    /// Also note that with distributed_group_by_no_merge=2 (i.e. when optimize_distributed_group_by_sharding_key takes place)
+    /// the query on the remote server will be processed up to WithMergeableStateAfterAggregation,
+    /// So it will do partial second stage (second_stage=true), and initiator will do the final part.
     bool second_stage = from_stage <= QueryProcessingStage::WithMergeableState
         && options.to_stage > QueryProcessingStage::WithMergeableState;
 

From af660140c320ca45bca0edfd89000b3c6da8ee6a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 18 Feb 2021 21:41:50 +0300
Subject: [PATCH 1159/1238] Do only merging of sorted blocks on initiator with
 distributed_group_by_no_merge=2

When distributed_group_by_no_merge=2 is used (or when
optimize_distributed_group_by_sharding_key takes place), remote servers
will do full ORDER BY, so initiator can skip this step and do only merge
of ordered blocks.
---
 src/Interpreters/InterpreterSelectQuery.cpp   |  8 +++++++-
 ...buted_group_by_no_merge_order_by.reference | 20 +++++++++++++++++++
 ...distributed_group_by_no_merge_order_by.sql | 20 +++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by.reference
 create mode 100644 tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by.sql

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 9f48a9a193b..3008c55973d 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1103,9 +1103,15 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
                 /** If there is an ORDER BY for distributed query processing,
                   *  but there is no aggregation, then on the remote servers ORDER BY was made
                   *  - therefore, we merge the sorted streams from remote servers.
+                  *
+                  * Also in case of remote servers was process the query up to WithMergeableStateAfterAggregation
+                  * (distributed_group_by_no_merge=2 or optimize_distributed_group_by_sharding_key=1 takes place),
+                  * then merge the sorted streams is enough, since remote servers already did full ORDER BY.
                   */
 
-                if (!expressions.first_stage && !expressions.need_aggregate && !(query.group_by_with_totals && !aggregate_final))
+                if (from_aggregation_stage)
+                    executeMergeSorted(query_plan, "for ORDER BY");
+                else if (!expressions.first_stage && !expressions.need_aggregate && !(query.group_by_with_totals && !aggregate_final))
                     executeMergeSorted(query_plan, "for ORDER BY");
                 else    /// Otherwise, just sort.
                     executeOrder(query_plan, query_info.input_order_info);
diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by.reference b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by.reference
new file mode 100644
index 00000000000..02ae8a37e52
--- /dev/null
+++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by.reference
@@ -0,0 +1,20 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by.sql
new file mode 100644
index 00000000000..e43b81dca48
--- /dev/null
+++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by.sql
@@ -0,0 +1,20 @@
+drop table if exists data_01730;
+
+-- does not use 127.1 due to prefer_localhost_replica
+
+select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 20 settings distributed_group_by_no_merge=0, max_memory_usage='100Mi'; -- { serverError 241 }
+-- no memory limit error, because with distributed_group_by_no_merge=2 remote servers will do ORDER BY and will cut to the LIMIT
+select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 20 settings distributed_group_by_no_merge=2, max_memory_usage='100Mi';
+
+-- since the MergingSortedTransform will start processing only when all ports (remotes) will have some data,
+-- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block,
+-- so the initiator will first receive all blocks from remotes and only after start merging,
+-- and will hit the memory limit.
+select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='100Mi'; -- { serverError 241 }
+
+-- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently,
+-- since they don't need to wait until the aggregation will be finished,
+-- and so the query will not hit the memory limit error.
+create table data_01730 engine=MergeTree() order by key as select number key from numbers(1e6);
+select * from remote('127.{2..11}', currentDatabase(), data_01730) group by key order by key limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='100Mi', optimize_aggregation_in_order=1 format Null;
+drop table data_01730;

From 9c01869090e873603b3bb7ec1cd17fbcf264bc4f Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 18 Feb 2021 21:28:42 +0300
Subject: [PATCH 1160/1238] Fix 'Empty task was returned from async task queue'
 on query cancellation

---
 src/Processors/Executors/PipelineExecutor.cpp        |  5 +++++
 .../01731_async_task_queue_wait.reference            |  0
 .../0_stateless/01731_async_task_queue_wait.sh       | 12 ++++++++++++
 3 files changed, 17 insertions(+)
 create mode 100644 tests/queries/0_stateless/01731_async_task_queue_wait.reference
 create mode 100755 tests/queries/0_stateless/01731_async_task_queue_wait.sh

diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp
index 6192828784f..a724f22ed31 100644
--- a/src/Processors/Executors/PipelineExecutor.cpp
+++ b/src/Processors/Executors/PipelineExecutor.cpp
@@ -540,7 +540,12 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, size_t num_threads, st
                     /// If we execute in single thread, wait for async tasks here.
                     auto res = async_task_queue.wait(lock);
                     if (!res)
+                    {
+                        /// The query had been cancelled (finished is also set)
+                        if (finished)
+                            break;
                         throw Exception("Empty task was returned from async task queue", ErrorCodes::LOGICAL_ERROR);
+                    }
 
                     node = static_cast<ExecutingGraph::Node *>(res.data);
                     break;
diff --git a/tests/queries/0_stateless/01731_async_task_queue_wait.reference b/tests/queries/0_stateless/01731_async_task_queue_wait.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01731_async_task_queue_wait.sh b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
new file mode 100755
index 00000000000..eddbfdf5322
--- /dev/null
+++ b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# regression for 'Empty task was returned from async task queue' during query
+# cancellation with async_socket_for_remote=1 (that ignores
+# max_distributed_connections)
+timeout 5s ${CLICKHOUSE_CLIENT} --max_distributed_connections=1 --format Null -q "select * from remote('127.{2..11}', view(select * from numbers(1e9))) group by number format Null"
+# timedout
+test $? -eq 124

From 865dca0b0d7c2327e56b609a56f0693d6b43c6d7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 18 Feb 2021 22:38:21 +0300
Subject: [PATCH 1161/1238] ccache 4.2+ does not requires any quirks for
 SOURCE_DATE_EPOCH

And besides "ccache <some ccache options> compiler" does not work, since
it interpret everything as ccache options.

Refs: https://github.com/ccache/ccache/commit/cad2416291c042443cf0c045047c34a2e07e103a
---
 cmake/find/ccache.cmake | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/cmake/find/ccache.cmake b/cmake/find/ccache.cmake
index d8e9cf9588d..d9ccd1a9ac6 100644
--- a/cmake/find/ccache.cmake
+++ b/cmake/find/ccache.cmake
@@ -37,15 +37,13 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
       #
       # - 4.0+ ccache always includes this environment variable into the hash
       #   of the manifest, which do not allow to use previous cache,
-      # - 4.2+ ccache ignores SOURCE_DATE_EPOCH under time_macros sloppiness.
+      # - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__
       #
       # So for:
-      # - 4.2+ time_macros sloppiness is used,
+      # - 4.2+ does not require any sloppiness
       # - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
       if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
-         message(STATUS "Use time_macros sloppiness for ccache")
-         set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
-         set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
+         message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required")
       elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
          message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
          set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")

From 7bcfe92cd7ba75f7d2ee2d58be3ec51f627a807f Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 18 Feb 2021 23:29:38 +0300
Subject: [PATCH 1162/1238] Mark 01730_distributed_group_by_no_merge_order_by
 as long

https://clickhouse-test-reports.s3.yandex.net/20882/af660140c320ca45bca0edfd89000b3c6da8ee6a/functional_stateless_tests_flaky_check_(address).html#fail1
---
 ...> 01730_distributed_group_by_no_merge_order_by_long.reference} | 0
 ....sql => 01730_distributed_group_by_no_merge_order_by_long.sql} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/queries/0_stateless/{01730_distributed_group_by_no_merge_order_by.reference => 01730_distributed_group_by_no_merge_order_by_long.reference} (100%)
 rename tests/queries/0_stateless/{01730_distributed_group_by_no_merge_order_by.sql => 01730_distributed_group_by_no_merge_order_by_long.sql} (100%)

diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by.reference b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.reference
similarity index 100%
rename from tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by.reference
rename to tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.reference
diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
similarity index 100%
rename from tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by.sql
rename to tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql

From fc185e5fb73dc0ac82ab8b0b7a79518832401379 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Fri, 19 Feb 2021 11:56:24 +0800
Subject: [PATCH 1163/1238] Another try

---
 src/Server/TCPHandler.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 430a01bb97a..9794a86d3e3 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1133,8 +1133,8 @@ void TCPHandler::receiveQuery()
     }
     query_context->applySettingsChanges(settings_changes);
 
-    /// Disable function name normalization it's not an initial query.
-    if (client_info.query_kind != ClientInfo::QueryKind::INITIAL_QUERY)
+    /// Disable function name normalization it's a secondary query.
+    if (client_info.query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
     {
         query_context->setSetting("normalize_function_names", Field(0));
     }

From b9d6df9618c6a1b0efcd17c66cfa22aaa023d97a Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 19 Feb 2021 11:49:41 +0300
Subject: [PATCH 1164/1238] Check for eintr in epoll_wait

---
 src/Client/PacketReceiver.h                   | 145 ++++++++++++++++++
 .../RemoteQueryExecutorReadContext.cpp        |  10 +-
 src/Processors/Executors/PollingQueue.cpp     |   7 +-
 3 files changed, 156 insertions(+), 6 deletions(-)
 create mode 100644 src/Client/PacketReceiver.h

diff --git a/src/Client/PacketReceiver.h b/src/Client/PacketReceiver.h
new file mode 100644
index 00000000000..c9475bafa71
--- /dev/null
+++ b/src/Client/PacketReceiver.h
@@ -0,0 +1,145 @@
+#pragma once
+
+#if defined(OS_LINUX)
+
+#include <Client/IConnections.h>
+#include <Common/FiberStack.h>
+#include <Common/Fiber.h>
+#include <Common/Epoll.h>
+#include <Common/TimerDescriptor.h>
+
+namespace DB
+{
+
+/// Class for nonblocking packet receiving. It runs connection->receivePacket
+/// in fiber and sets special read callback which is called when
+/// reading from socket blocks. When read callback is called,
+/// socket and receive timeout are added in epoll and execution returns to the main program.
+/// So, you can poll this epoll file descriptor to determine when to resume
+/// packet receiving (beside polling epoll descriptor, you also need to check connection->hasPendingData(),
+/// because small packet can be read in buffer with the previous one, so new packet will be ready in buffer,
+/// but there is no data socket to poll).
+class PacketReceiver
+{
+public:
+    PacketReceiver(Connection * connection_) : connection(connection_)
+    {
+        epoll.add(receive_timeout.getDescriptor());
+        epoll.add(connection->getSocket()->impl()->sockfd());
+        fiber = boost::context::fiber(std::allocator_arg_t(), fiber_stack, Routine{*this});
+    }
+
+    /// Resume packet receiving.
+    void resume()
+    {
+        /// If there is no pending data, check receive timeout.
+        if (!connection->hasReadPendingData() && !checkReceiveTimeout())
+            return;
+
+        fiber = std::move(fiber).resume();
+        if (exception)
+            std::rethrow_exception(std::move(exception));
+    }
+
+    void cancel()
+    {
+        Fiber to_destroy = std::move(fiber);
+        connection = nullptr;
+    }
+
+    Packet getPacket() { return std::move(packet); }
+
+    int getFileDescriptor() const { return epoll.getFileDescriptor(); }
+
+    bool isPacketReady() const { return !is_read_in_process; }
+
+    bool isReceiveTimeoutExpired() const { return is_receive_timeout_expired; }
+
+private:
+    /// When epoll file descriptor is ready, check if it's an expired timeout
+    bool checkReceiveTimeout()
+    {
+        bool is_socket_ready = false;
+        is_receive_timeout_expired = false;
+
+        epoll_event events[2];
+        events[0].data.fd = events[1].data.fd = -1;
+        size_t ready_count = epoll.getManyReady(2, events, true);
+
+        for (size_t i = 0; i != ready_count; ++i)
+        {
+            if (events[i].data.fd == connection->getSocket()->impl()->sockfd())
+                is_socket_ready = true;
+            if (events[i].data.fd == receive_timeout.getDescriptor())
+                is_receive_timeout_expired = true;
+        }
+
+        if (is_receive_timeout_expired && !is_socket_ready)
+        {
+            receive_timeout.reset();
+            return false;
+        }
+
+        return true;
+    }
+
+    struct Routine
+    {
+        PacketReceiver & receiver;
+
+        struct ReadCallback
+        {
+            PacketReceiver & receiver;
+            Fiber & sink;
+
+            void operator()(int, const Poco::Timespan & timeout, const std::string &)
+            {
+                receiver.receive_timeout.setRelative(timeout);
+                receiver.is_read_in_process = true;
+                sink = std::move(sink).resume();
+                receiver.is_read_in_process = false;
+                receiver.receive_timeout.reset();
+            }
+        };
+
+        Fiber operator()(Fiber && sink)
+        {
+            try
+            {
+                AsyncCallbackSetter async_setter(receiver.connection, ReadCallback{receiver, sink});
+                while (true)
+                {
+                    receiver.packet = receiver.connection->receivePacket();
+                    sink = std::move(sink).resume();
+                }
+
+            }
+            catch (const boost::context::detail::forced_unwind &)
+            {
+                /// This exception is thrown by fiber implementation in case if fiber is being deleted but hasn't exited
+                /// It should not be caught or it will segfault.
+                /// Other exceptions must be caught
+                throw;
+            }
+            catch (...)
+            {
+                receiver.exception = std::current_exception();
+            }
+
+            return std::move(sink);
+        }
+    };
+
+    Connection * connection;
+    TimerDescriptor receive_timeout;
+    Epoll epoll;
+    Fiber fiber;
+    FiberStack fiber_stack;
+    Packet packet;
+    bool is_read_in_process = false;
+    bool is_receive_timeout_expired = false;
+    std::exception_ptr exception;
+};
+
+}
+#endif
diff --git a/src/DataStreams/RemoteQueryExecutorReadContext.cpp b/src/DataStreams/RemoteQueryExecutorReadContext.cpp
index bc47b049407..c79fffafcb1 100644
--- a/src/DataStreams/RemoteQueryExecutorReadContext.cpp
+++ b/src/DataStreams/RemoteQueryExecutorReadContext.cpp
@@ -146,9 +146,13 @@ bool RemoteQueryExecutorReadContext::checkTimeoutImpl() const
     events[0].data.fd = events[1].data.fd = events[2].data.fd = -1;
 
     /// Wait for epoll_fd will not block if it was polled externally.
-    int num_events = epoll_wait(epoll_fd, events, 3, 0);
-    if (num_events == -1)
-        throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+    int num_events = 0;
+    while (num_events <= 0)
+    {
+        num_events = epoll_wait(epoll_fd, events, 3, 0);
+        if (num_events == -1 && errno != EINTR)
+            throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+    }
 
     bool is_socket_ready = false;
     bool is_pipe_alarmed = false;
diff --git a/src/Processors/Executors/PollingQueue.cpp b/src/Processors/Executors/PollingQueue.cpp
index 93edfe53987..b9c7bdade2d 100644
--- a/src/Processors/Executors/PollingQueue.cpp
+++ b/src/Processors/Executors/PollingQueue.cpp
@@ -88,11 +88,12 @@ PollingQueue::TaskData PollingQueue::wait(std::unique_lock<std::mutex> & lock)
     event.data.ptr = nullptr;
     int num_events = 0;
 
-    while (num_events == 0)
+    while (num_events <= 0)
     {
         num_events = epoll_wait(epoll_fd, &event, 1, 0);
-        if (num_events == -1)
-            throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+
+        if (num_events == -1 && errno != EINTR)
+                throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
     }
 
     lock.lock();

From 7d1119680e7881af7f5934773721cb48f40b35e7 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 19 Feb 2021 11:52:33 +0300
Subject: [PATCH 1165/1238] Remove not needed file.

---
 src/Client/PacketReceiver.h | 145 ------------------------------------
 1 file changed, 145 deletions(-)
 delete mode 100644 src/Client/PacketReceiver.h

diff --git a/src/Client/PacketReceiver.h b/src/Client/PacketReceiver.h
deleted file mode 100644
index c9475bafa71..00000000000
--- a/src/Client/PacketReceiver.h
+++ /dev/null
@@ -1,145 +0,0 @@
-#pragma once
-
-#if defined(OS_LINUX)
-
-#include <Client/IConnections.h>
-#include <Common/FiberStack.h>
-#include <Common/Fiber.h>
-#include <Common/Epoll.h>
-#include <Common/TimerDescriptor.h>
-
-namespace DB
-{
-
-/// Class for nonblocking packet receiving. It runs connection->receivePacket
-/// in fiber and sets special read callback which is called when
-/// reading from socket blocks. When read callback is called,
-/// socket and receive timeout are added in epoll and execution returns to the main program.
-/// So, you can poll this epoll file descriptor to determine when to resume
-/// packet receiving (beside polling epoll descriptor, you also need to check connection->hasPendingData(),
-/// because small packet can be read in buffer with the previous one, so new packet will be ready in buffer,
-/// but there is no data socket to poll).
-class PacketReceiver
-{
-public:
-    PacketReceiver(Connection * connection_) : connection(connection_)
-    {
-        epoll.add(receive_timeout.getDescriptor());
-        epoll.add(connection->getSocket()->impl()->sockfd());
-        fiber = boost::context::fiber(std::allocator_arg_t(), fiber_stack, Routine{*this});
-    }
-
-    /// Resume packet receiving.
-    void resume()
-    {
-        /// If there is no pending data, check receive timeout.
-        if (!connection->hasReadPendingData() && !checkReceiveTimeout())
-            return;
-
-        fiber = std::move(fiber).resume();
-        if (exception)
-            std::rethrow_exception(std::move(exception));
-    }
-
-    void cancel()
-    {
-        Fiber to_destroy = std::move(fiber);
-        connection = nullptr;
-    }
-
-    Packet getPacket() { return std::move(packet); }
-
-    int getFileDescriptor() const { return epoll.getFileDescriptor(); }
-
-    bool isPacketReady() const { return !is_read_in_process; }
-
-    bool isReceiveTimeoutExpired() const { return is_receive_timeout_expired; }
-
-private:
-    /// When epoll file descriptor is ready, check if it's an expired timeout
-    bool checkReceiveTimeout()
-    {
-        bool is_socket_ready = false;
-        is_receive_timeout_expired = false;
-
-        epoll_event events[2];
-        events[0].data.fd = events[1].data.fd = -1;
-        size_t ready_count = epoll.getManyReady(2, events, true);
-
-        for (size_t i = 0; i != ready_count; ++i)
-        {
-            if (events[i].data.fd == connection->getSocket()->impl()->sockfd())
-                is_socket_ready = true;
-            if (events[i].data.fd == receive_timeout.getDescriptor())
-                is_receive_timeout_expired = true;
-        }
-
-        if (is_receive_timeout_expired && !is_socket_ready)
-        {
-            receive_timeout.reset();
-            return false;
-        }
-
-        return true;
-    }
-
-    struct Routine
-    {
-        PacketReceiver & receiver;
-
-        struct ReadCallback
-        {
-            PacketReceiver & receiver;
-            Fiber & sink;
-
-            void operator()(int, const Poco::Timespan & timeout, const std::string &)
-            {
-                receiver.receive_timeout.setRelative(timeout);
-                receiver.is_read_in_process = true;
-                sink = std::move(sink).resume();
-                receiver.is_read_in_process = false;
-                receiver.receive_timeout.reset();
-            }
-        };
-
-        Fiber operator()(Fiber && sink)
-        {
-            try
-            {
-                AsyncCallbackSetter async_setter(receiver.connection, ReadCallback{receiver, sink});
-                while (true)
-                {
-                    receiver.packet = receiver.connection->receivePacket();
-                    sink = std::move(sink).resume();
-                }
-
-            }
-            catch (const boost::context::detail::forced_unwind &)
-            {
-                /// This exception is thrown by fiber implementation in case if fiber is being deleted but hasn't exited
-                /// It should not be caught or it will segfault.
-                /// Other exceptions must be caught
-                throw;
-            }
-            catch (...)
-            {
-                receiver.exception = std::current_exception();
-            }
-
-            return std::move(sink);
-        }
-    };
-
-    Connection * connection;
-    TimerDescriptor receive_timeout;
-    Epoll epoll;
-    Fiber fiber;
-    FiberStack fiber_stack;
-    Packet packet;
-    bool is_read_in_process = false;
-    bool is_receive_timeout_expired = false;
-    std::exception_ptr exception;
-};
-
-}
-#endif

From 39f07d62a42288b83f8c5e46e026ebf9d051601d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 19 Feb 2021 12:02:18 +0300
Subject: [PATCH 1166/1238] Disable in-memory compression by default

---
 src/Storages/MemorySettings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MemorySettings.h b/src/Storages/MemorySettings.h
index 4a1ba57475f..5e3b5f81ba5 100644
--- a/src/Storages/MemorySettings.h
+++ b/src/Storages/MemorySettings.h
@@ -9,7 +9,7 @@ class ASTStorage;
 
 
 #define MEMORY_SETTINGS(M) \
-    M(Bool, compress, true, "Compress data in memory", 0) \
+    M(Bool, compress, false, "Compress data in memory", 0) \
 
 DECLARE_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS)
 

From d438d7e390648d6be1c9718b58a18389d4d68650 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 19 Feb 2021 12:07:34 +0300
Subject: [PATCH 1167/1238] Fix timeout in epoll_wait for PollingQueue

---
 src/Processors/Executors/PollingQueue.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Executors/PollingQueue.cpp b/src/Processors/Executors/PollingQueue.cpp
index b9c7bdade2d..3636fa82f73 100644
--- a/src/Processors/Executors/PollingQueue.cpp
+++ b/src/Processors/Executors/PollingQueue.cpp
@@ -90,7 +90,7 @@ PollingQueue::TaskData PollingQueue::wait(std::unique_lock<std::mutex> & lock)
 
     while (num_events <= 0)
     {
-        num_events = epoll_wait(epoll_fd, &event, 1, 0);
+        num_events = epoll_wait(epoll_fd, &event, 1, -1);
 
         if (num_events == -1 && errno != EINTR)
                 throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);

From ed4697cffc83c3b4c34d11189e9e300c969da618 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Fri, 19 Feb 2021 12:20:24 +0300
Subject: [PATCH 1168/1238] Fix timeout in epoll_wait for
 RemoteQueryExecutorReadContext

---
 src/DataStreams/RemoteQueryExecutorReadContext.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/DataStreams/RemoteQueryExecutorReadContext.cpp b/src/DataStreams/RemoteQueryExecutorReadContext.cpp
index c79fffafcb1..3cc24ad5056 100644
--- a/src/DataStreams/RemoteQueryExecutorReadContext.cpp
+++ b/src/DataStreams/RemoteQueryExecutorReadContext.cpp
@@ -149,7 +149,7 @@ bool RemoteQueryExecutorReadContext::checkTimeoutImpl() const
     int num_events = 0;
     while (num_events <= 0)
     {
-        num_events = epoll_wait(epoll_fd, events, 3, 0);
+        num_events = epoll_wait(epoll_fd, events, 3, -1);
         if (num_events == -1 && errno != EINTR)
             throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
     }

From fc1885ea9b01714290fba8ee8fbbe1a78894e573 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Fri, 19 Feb 2021 17:28:01 +0800
Subject: [PATCH 1169/1238] Try fixing flaky tests

---
 tests/queries/0_stateless/00643_cast_zookeeper.sql              | 2 ++
 .../queries/0_stateless/01656_test_query_log_factories_info.sql | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/00643_cast_zookeeper.sql b/tests/queries/0_stateless/00643_cast_zookeeper.sql
index c52d44bd88b..c9760f00ca7 100644
--- a/tests/queries/0_stateless/00643_cast_zookeeper.sql
+++ b/tests/queries/0_stateless/00643_cast_zookeeper.sql
@@ -1,3 +1,5 @@
+SET database_atomic_wait_for_drop_and_detach_synchronously=1;
+
 DROP TABLE IF EXISTS cast1;
 DROP TABLE IF EXISTS cast2;
 
diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
index 9f374def8b5..17657cf60f5 100644
--- a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
+++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql
@@ -1,3 +1,5 @@
+SET database_atomic_wait_for_drop_and_detach_synchronously=1;
+
 SELECT uniqArray([1, 1, 2]),
        SUBSTRING('Hello, world', 7, 5),
        flatten([[[BIT_AND(123)]], [[mod(3, 2)], [CAST('1' AS INTEGER)]]]),

From 5bbd6f7480281a7acdf5c16ac1efc4626ba51175 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Fri, 19 Feb 2021 12:37:00 +0300
Subject: [PATCH 1170/1238] Fixed documentation

---
 docs/en/sql-reference/functions/hash-functions.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 9394426b20b..14ac288339b 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -9,7 +9,7 @@ Hash functions can be used for the deterministic pseudo-random shuffling of elem
 
 ## halfMD5 {#hash-functions-halfmd5}
 
-[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
+[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
 
 ``` sql
 halfMD5(par1, ...)
@@ -54,7 +54,7 @@ sipHash64(par1,...)
 
 This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) function.
 
-Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm:
+Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm:
 
 1.  After hashing all the input parameters, the function gets the array of hashes.
 2.  Function takes the first and the second elements and calculates a hash for the array of them.

From 1c5b10de41a8266b623f5bcc7f3b8d3b72c6982d Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 19 Feb 2021 09:23:51 +0000
Subject: [PATCH 1171/1238] Use fixed version for aerospike

---
 docker/test/integration/runner/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index 502dc3736b2..e0e5e36a3d6 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -58,7 +58,7 @@ RUN dockerd --version; docker --version
 
 RUN python3 -m pip install \
     PyMySQL \
-    aerospike \
+    aerospike==4.0.0 \
     avro \
     cassandra-driver \
     confluent-kafka==1.5.0 \

From 8f8a4f64235e6df11717fb9cb91be55c0673b3f5 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Fri, 19 Feb 2021 13:59:38 +0300
Subject: [PATCH 1172/1238] Update 01731_async_task_queue_wait.sh

---
 tests/queries/0_stateless/01731_async_task_queue_wait.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01731_async_task_queue_wait.sh b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
index eddbfdf5322..7545ad1e81a 100755
--- a/tests/queries/0_stateless/01731_async_task_queue_wait.sh
+++ b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
@@ -7,6 +7,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # regression for 'Empty task was returned from async task queue' during query
 # cancellation with async_socket_for_remote=1 (that ignores
 # max_distributed_connections)
-timeout 5s ${CLICKHOUSE_CLIENT} --max_distributed_connections=1 --format Null -q "select * from remote('127.{2..11}', view(select * from numbers(1e9))) group by number format Null"
-# timedout
-test $? -eq 124
+$(timeout --signal=SIGINT 1 clickhouse client --max_distributed_connections=1 --max_block_size=2  --interactive_delay=900000 -q "select x  from remote('127.{2,3}', view(select number + sleep(0.3) as x from numbers(16))) settings max_block_size = 2") 2>&1 | grep "Empty task was returned from async task queue"

From df1cf481cf118283c4d9b6afc6eaa419c5834d71 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Fri, 19 Feb 2021 14:14:31 +0300
Subject: [PATCH 1173/1238] Update 01731_async_task_queue_wait.sh

---
 tests/queries/0_stateless/01731_async_task_queue_wait.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01731_async_task_queue_wait.sh b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
index 7545ad1e81a..936f850791d 100755
--- a/tests/queries/0_stateless/01731_async_task_queue_wait.sh
+++ b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
@@ -7,4 +7,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # regression for 'Empty task was returned from async task queue' during query
 # cancellation with async_socket_for_remote=1 (that ignores
 # max_distributed_connections)
-$(timeout --signal=SIGINT 1 clickhouse client --max_distributed_connections=1 --max_block_size=2  --interactive_delay=900000 -q "select x  from remote('127.{2,3}', view(select number + sleep(0.3) as x from numbers(16))) settings max_block_size = 2") 2>&1 | grep "Empty task was returned from async task queue"
+$(timeout --signal=SIGINT 1 ${CLICKHOUSE_CLIENT} --max_distributed_connections=1 --max_block_size=2  --interactive_delay=900000 -q "select x  from remote('127.{2,3}', view(select number + sleep(0.3) as x from numbers(16))) settings max_block_size = 2") 2>&1 | grep "Empty task was returned from async task queue"

From 866dfaec793f764dc9ba167d3ac9f6521b9b3381 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Fri, 19 Feb 2021 15:25:22 +0300
Subject: [PATCH 1174/1238] Update 01731_async_task_queue_wait.sh

---
 tests/queries/0_stateless/01731_async_task_queue_wait.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01731_async_task_queue_wait.sh b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
index 936f850791d..89d8b63d745 100755
--- a/tests/queries/0_stateless/01731_async_task_queue_wait.sh
+++ b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
@@ -7,4 +7,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # regression for 'Empty task was returned from async task queue' during query
 # cancellation with async_socket_for_remote=1 (that ignores
 # max_distributed_connections)
-$(timeout --signal=SIGINT 1 ${CLICKHOUSE_CLIENT} --max_distributed_connections=1 --max_block_size=2  --interactive_delay=900000 -q "select x  from remote('127.{2,3}', view(select number + sleep(0.3) as x from numbers(16))) settings max_block_size = 2") 2>&1 | grep "Empty task was returned from async task queue"
+$(timeout --signal=SIGINT 1 ${CLICKHOUSE_CLIENT} --max_distributed_connections=1 --max_block_size=2  --interactive_delay=900000 -q "select x  from remote('127.{2,3}', view(select number + sleep(0.3) as x from numbers(16))) settings max_block_size = 2") 2>&1 | grep "Empty task was returned from async task queue" || true

From 414f470c79eb22b0ca47b82f11625cf80b0231aa Mon Sep 17 00:00:00 2001
From: Ivan <5627721+abyss7@users.noreply.github.com>
Date: Fri, 19 Feb 2021 15:51:26 +0300
Subject: [PATCH 1175/1238] Make Poco HTTP Server zero-copy again (#19516)

* Refactoring: part 1

* Refactoring: part 2

* Handle request using ReadBuffer interface

* Struggles with ReadBuffer's

* Fix URI parsing

* Implement parsing of multipart/form-data

* Check HTTP_LENGTH_REQUIRED before eof() or will hang

* Fix HTTPChunkedReadBuffer

* Fix build and style

* Fix test

* Resist double-eof

* Fix arcadian build
---
 base/daemon/BaseDaemon.h                      |   6 +-
 programs/odbc-bridge/ColumnInfoHandler.cpp    |  12 +-
 programs/odbc-bridge/ColumnInfoHandler.h      |   9 +-
 programs/odbc-bridge/HandlerFactory.cpp       |  15 +-
 programs/odbc-bridge/HandlerFactory.h         |  15 +-
 .../odbc-bridge/IdentifierQuoteHandler.cpp    |  12 +-
 programs/odbc-bridge/IdentifierQuoteHandler.h |   7 +-
 programs/odbc-bridge/MainHandler.cpp          |  22 +-
 programs/odbc-bridge/MainHandler.h            |  11 +-
 programs/odbc-bridge/ODBCBridge.cpp           |  10 +-
 programs/odbc-bridge/PingHandler.cpp          |   2 +-
 programs/odbc-bridge/PingHandler.h            |  14 +-
 programs/odbc-bridge/SchemaAllowedHandler.cpp |  12 +-
 programs/odbc-bridge/SchemaAllowedHandler.h   |  11 +-
 programs/server/Server.cpp                    |  43 +-
 programs/server/Server.h                      |   3 +-
 src/CMakeLists.txt                            |   1 +
 src/Common/HTMLForm.h                         |  42 --
 src/Common/StringUtils/StringUtils.h          |   6 +
 src/Common/formatIPv6.h                       |  12 +-
 src/Common/hex.h                              |   4 +-
 src/Core/ExternalTable.cpp                    |   9 +-
 src/Core/ExternalTable.h                      |  24 +-
 src/IO/EmptyReadBuffer.h                      |  18 +
 src/IO/HTTPChunkedReadBuffer.cpp              |  92 +++++
 src/IO/HTTPChunkedReadBuffer.h                |  25 ++
 src/IO/HTTPCommon.cpp                         |   4 +-
 src/IO/HTTPCommon.h                           |  17 +-
 src/IO/LimitReadBuffer.cpp                    |  42 +-
 src/IO/LimitReadBuffer.h                      |  15 +-
 src/IO/PeekableReadBuffer.cpp                 |  17 +-
 src/IO/PeekableReadBuffer.h                   |   2 +-
 src/IO/ReadBuffer.h                           |  52 ++-
 src/IO/ReadBufferFromPocoSocket.cpp           |   2 +-
 src/IO/ReadBufferFromPocoSocket.h             |  13 +-
 src/IO/ReadHelpers.cpp                        |  19 +
 src/IO/ReadHelpers.h                          |  15 +-
 src/IO/ya.make                                |   2 +-
 src/Interpreters/InterserverIOHandler.h       |  15 +-
 src/Server/HTTP/HTMLForm.cpp                  | 381 ++++++++++++++++++
 src/Server/HTTP/HTMLForm.h                    | 175 ++++++++
 src/Server/HTTP/HTTPRequest.h                 |  10 +
 src/Server/HTTP/HTTPRequestHandler.h          |  19 +
 src/Server/HTTP/HTTPRequestHandlerFactory.h   |  20 +
 src/Server/HTTP/HTTPResponse.h                |  10 +
 src/Server/HTTP/HTTPServer.cpp                |  48 +++
 src/Server/HTTP/HTTPServer.h                  |  46 +++
 src/Server/HTTP/HTTPServerConnection.cpp      | 128 ++++++
 src/Server/HTTP/HTTPServerConnection.h        |  36 ++
 .../HTTP/HTTPServerConnectionFactory.cpp      |  19 +
 src/Server/HTTP/HTTPServerConnectionFactory.h |  25 ++
 src/Server/HTTP/HTTPServerRequest.cpp         | 123 ++++++
 src/Server/HTTP/HTTPServerRequest.h           |  59 +++
 src/Server/HTTP/HTTPServerResponse.cpp        | 163 ++++++++
 src/Server/HTTP/HTTPServerResponse.h          |  91 +++++
 src/Server/HTTP/ReadHeaders.cpp               |  88 ++++
 src/Server/HTTP/ReadHeaders.h                 |  17 +
 .../WriteBufferFromHTTPServerResponse.cpp     |  44 +-
 .../HTTP}/WriteBufferFromHTTPServerResponse.h |  41 +-
 src/Server/HTTPHandler.cpp                    | 194 ++++-----
 src/Server/HTTPHandler.h                      |  36 +-
 src/Server/HTTPHandlerFactory.cpp             | 101 +++--
 src/Server/HTTPHandlerFactory.h               | 112 ++---
 src/Server/HTTPHandlerRequestFilter.h         |  48 +--
 src/Server/InterserverIOHTTPHandler.cpp       |  37 +-
 src/Server/InterserverIOHTTPHandler.h         |  16 +-
 src/Server/NotFoundHandler.cpp                |  31 +-
 src/Server/NotFoundHandler.h                  |   9 +-
 src/Server/PrometheusRequestHandler.cpp       |  34 +-
 src/Server/PrometheusRequestHandler.h         |  16 +-
 src/Server/ReplicasStatusHandler.cpp          |  27 +-
 src/Server/ReplicasStatusHandler.h            |  10 +-
 src/Server/StaticRequestHandler.cpp           |  31 +-
 src/Server/StaticRequestHandler.h             |   6 +-
 src/Server/WebUIRequestHandler.cpp            |   6 +-
 src/Server/WebUIRequestHandler.h              |   6 +-
 src/Server/ya.make                            |   8 +
 src/Storages/MergeTree/DataPartsExchange.cpp  |  17 +-
 src/Storages/MergeTree/DataPartsExchange.h    |  15 +-
 tests/queries/query_test.py                   |   2 +-
 80 files changed, 2303 insertions(+), 654 deletions(-)
 delete mode 100644 src/Common/HTMLForm.h
 create mode 100644 src/IO/EmptyReadBuffer.h
 create mode 100644 src/IO/HTTPChunkedReadBuffer.cpp
 create mode 100644 src/IO/HTTPChunkedReadBuffer.h
 create mode 100644 src/Server/HTTP/HTMLForm.cpp
 create mode 100644 src/Server/HTTP/HTMLForm.h
 create mode 100644 src/Server/HTTP/HTTPRequest.h
 create mode 100644 src/Server/HTTP/HTTPRequestHandler.h
 create mode 100644 src/Server/HTTP/HTTPRequestHandlerFactory.h
 create mode 100644 src/Server/HTTP/HTTPResponse.h
 create mode 100644 src/Server/HTTP/HTTPServer.cpp
 create mode 100644 src/Server/HTTP/HTTPServer.h
 create mode 100644 src/Server/HTTP/HTTPServerConnection.cpp
 create mode 100644 src/Server/HTTP/HTTPServerConnection.h
 create mode 100644 src/Server/HTTP/HTTPServerConnectionFactory.cpp
 create mode 100644 src/Server/HTTP/HTTPServerConnectionFactory.h
 create mode 100644 src/Server/HTTP/HTTPServerRequest.cpp
 create mode 100644 src/Server/HTTP/HTTPServerRequest.h
 create mode 100644 src/Server/HTTP/HTTPServerResponse.cpp
 create mode 100644 src/Server/HTTP/HTTPServerResponse.h
 create mode 100644 src/Server/HTTP/ReadHeaders.cpp
 create mode 100644 src/Server/HTTP/ReadHeaders.h
 rename src/{IO => Server/HTTP}/WriteBufferFromHTTPServerResponse.cpp (81%)
 rename src/{IO => Server/HTTP}/WriteBufferFromHTTPServerResponse.h (86%)

diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h
index 42d94629ae9..8b9d765cf2e 100644
--- a/base/daemon/BaseDaemon.h
+++ b/base/daemon/BaseDaemon.h
@@ -83,7 +83,7 @@ public:
     template <class T>
     void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
     {
-        auto writer = getGraphiteWriter(config_name);
+        auto *writer = getGraphiteWriter(config_name);
         if (writer)
             writer->write(key, value, timestamp, custom_root_path);
     }
@@ -91,7 +91,7 @@ public:
     template <class T>
     void writeToGraphite(const GraphiteWriter::KeyValueVector<T> & key_vals, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
     {
-        auto writer = getGraphiteWriter(config_name);
+        auto *writer = getGraphiteWriter(config_name);
         if (writer)
             writer->write(key_vals, timestamp, custom_root_path);
     }
@@ -99,7 +99,7 @@ public:
     template <class T>
     void writeToGraphite(const GraphiteWriter::KeyValueVector<T> & key_vals, const std::chrono::system_clock::time_point & current_time, const std::string & custom_root_path)
     {
-        auto writer = getGraphiteWriter();
+        auto *writer = getGraphiteWriter();
         if (writer)
             writer->write(key_vals, std::chrono::system_clock::to_time_t(current_time), custom_root_path);
     }
diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp
index ee4daa3e16d..5aef7f1ac38 100644
--- a/programs/odbc-bridge/ColumnInfoHandler.cpp
+++ b/programs/odbc-bridge/ColumnInfoHandler.cpp
@@ -4,14 +4,14 @@
 
 #    include <DataTypes/DataTypeFactory.h>
 #    include <DataTypes/DataTypeNullable.h>
-#    include <IO/WriteBufferFromHTTPServerResponse.h>
+#    include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
 #    include <IO/WriteHelpers.h>
 #    include <Parsers/ParserQueryWithOutput.h>
 #    include <Parsers/parseQuery.h>
 #    include <Poco/Data/ODBC/ODBCException.h>
 #    include <Poco/Data/ODBC/SessionImpl.h>
 #    include <Poco/Data/ODBC/Utility.h>
-#    include <Poco/Net/HTMLForm.h>
+#    include <Server/HTTP/HTMLForm.h>
 #    include <Poco/Net/HTTPServerRequest.h>
 #    include <Poco/Net/HTTPServerResponse.h>
 #    include <Poco/NumberParser.h>
@@ -59,16 +59,16 @@ namespace
     }
 }
 
-void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
-    Poco::Net::HTMLForm params(request, request.stream());
+    HTMLForm params(request, request.getStream());
     LOG_TRACE(log, "Request URI: {}", request.getURI());
 
     auto process_error = [&response, this](const std::string & message)
     {
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
         if (!response.sent())
-            response.send() << message << std::endl;
+            *response.send() << message << std::endl;
         LOG_WARNING(log, message);
     };
 
@@ -159,7 +159,7 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques
             columns.emplace_back(reinterpret_cast<char *>(column_name), std::move(column_type));
         }
 
-        WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout);
+        WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
         writeStringBinary(columns.toString(), out);
     }
     catch (...)
diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h
index 04b4c06693b..9b5b470b31d 100644
--- a/programs/odbc-bridge/ColumnInfoHandler.h
+++ b/programs/odbc-bridge/ColumnInfoHandler.h
@@ -3,10 +3,11 @@
 #if USE_ODBC
 
 #    include <Interpreters/Context.h>
-#    include <Poco/Logger.h>
-#    include <Poco/Net/HTTPRequestHandler.h>
+#    include <Server/HTTP/HTTPRequestHandler.h>
 #    include <Common/config.h>
 
+#    include <Poco/Logger.h>
+
 /** The structure of the table is taken from the query "SELECT * FROM table WHERE 1=0".
   * TODO: It would be much better to utilize ODBC methods dedicated for columns description.
   * If there is no such table, an exception is thrown.
@@ -14,7 +15,7 @@
 namespace DB
 {
 
-class ODBCColumnsInfoHandler : public Poco::Net::HTTPRequestHandler
+class ODBCColumnsInfoHandler : public HTTPRequestHandler
 {
 public:
     ODBCColumnsInfoHandler(size_t keep_alive_timeout_, Context & context_)
@@ -22,7 +23,7 @@ public:
     {
     }
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
 private:
     Poco::Logger * log;
diff --git a/programs/odbc-bridge/HandlerFactory.cpp b/programs/odbc-bridge/HandlerFactory.cpp
index 0cc40480b87..9ac48af4ace 100644
--- a/programs/odbc-bridge/HandlerFactory.cpp
+++ b/programs/odbc-bridge/HandlerFactory.cpp
@@ -7,39 +7,40 @@
 
 namespace DB
 {
-Poco::Net::HTTPRequestHandler * HandlerFactory::createRequestHandler(const Poco::Net::HTTPServerRequest & request)
+
+std::unique_ptr<HTTPRequestHandler> HandlerFactory::createRequestHandler(const HTTPServerRequest & request)
 {
     Poco::URI uri{request.getURI()};
     LOG_TRACE(log, "Request URI: {}", uri.toString());
 
     if (uri.getPath() == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
-        return new PingHandler(keep_alive_timeout);
+        return std::make_unique<PingHandler>(keep_alive_timeout);
 
     if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
     {
 
         if (uri.getPath() == "/columns_info")
 #if USE_ODBC
-            return new ODBCColumnsInfoHandler(keep_alive_timeout, context);
+            return std::make_unique<ODBCColumnsInfoHandler>(keep_alive_timeout, context);
 #else
             return nullptr;
 #endif
         else if (uri.getPath() == "/identifier_quote")
 #if USE_ODBC
-            return new IdentifierQuoteHandler(keep_alive_timeout, context);
+            return std::make_unique<IdentifierQuoteHandler>(keep_alive_timeout, context);
 #else
             return nullptr;
 #endif
         else if (uri.getPath() == "/schema_allowed")
 #if USE_ODBC
-            return new SchemaAllowedHandler(keep_alive_timeout, context);
+            return std::make_unique<SchemaAllowedHandler>(keep_alive_timeout, context);
 #else
             return nullptr;
 #endif
         else if (uri.getPath() == "/write")
-            return new ODBCHandler(pool_map, keep_alive_timeout, context, "write");
+            return std::make_unique<ODBCHandler>(pool_map, keep_alive_timeout, context, "write");
         else
-            return new ODBCHandler(pool_map, keep_alive_timeout, context, "read");
+            return std::make_unique<ODBCHandler>(pool_map, keep_alive_timeout, context, "read");
     }
     return nullptr;
 }
diff --git a/programs/odbc-bridge/HandlerFactory.h b/programs/odbc-bridge/HandlerFactory.h
index 1d4edfc9dd1..5dce6f02ecd 100644
--- a/programs/odbc-bridge/HandlerFactory.h
+++ b/programs/odbc-bridge/HandlerFactory.h
@@ -1,16 +1,17 @@
 #pragma once
+
 #include <Interpreters/Context.h>
-#include <Poco/Logger.h>
-#include <Poco/Net/HTTPRequestHandler.h>
-#include <Poco/Net/HTTPRequestHandlerFactory.h>
-#include "MainHandler.h"
+#include <Server/HTTP/HTTPRequestHandlerFactory.h>
 #include "ColumnInfoHandler.h"
 #include "IdentifierQuoteHandler.h"
+#include "MainHandler.h"
 #include "SchemaAllowedHandler.h"
 
+#include <Poco/Logger.h>
+
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-parameter"
-    #include <Poco/Data/SessionPool.h>
+#include <Poco/Data/SessionPool.h>
 #pragma GCC diagnostic pop
 
 
@@ -19,7 +20,7 @@ namespace DB
 /** Factory for '/ping', '/', '/columns_info', '/identifier_quote', '/schema_allowed' handlers.
   * Also stores Session pools for ODBC connections
   */
-class HandlerFactory : public Poco::Net::HTTPRequestHandlerFactory
+class HandlerFactory : public HTTPRequestHandlerFactory
 {
 public:
     HandlerFactory(const std::string & name_, size_t keep_alive_timeout_, Context & context_)
@@ -28,7 +29,7 @@ public:
         pool_map = std::make_shared<ODBCHandler::PoolMap>();
     }
 
-    Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest & request) override;
+    std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
 
 private:
     Poco::Logger * log;
diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp
index 2c3701cfff9..ec4e4493d61 100644
--- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp
+++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp
@@ -3,14 +3,14 @@
 #if USE_ODBC
 
 #    include <DataTypes/DataTypeFactory.h>
-#    include <IO/WriteBufferFromHTTPServerResponse.h>
+#    include <Server/HTTP/HTMLForm.h>
+#    include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
 #    include <IO/WriteHelpers.h>
 #    include <Parsers/ParserQueryWithOutput.h>
 #    include <Parsers/parseQuery.h>
 #    include <Poco/Data/ODBC/ODBCException.h>
 #    include <Poco/Data/ODBC/SessionImpl.h>
 #    include <Poco/Data/ODBC/Utility.h>
-#    include <Poco/Net/HTMLForm.h>
 #    include <Poco/Net/HTTPServerRequest.h>
 #    include <Poco/Net/HTTPServerResponse.h>
 #    include <common/logger_useful.h>
@@ -22,16 +22,16 @@
 
 namespace DB
 {
-void IdentifierQuoteHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
-    Poco::Net::HTMLForm params(request, request.stream());
+    HTMLForm params(request, request.getStream());
     LOG_TRACE(log, "Request URI: {}", request.getURI());
 
     auto process_error = [&response, this](const std::string & message)
     {
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
         if (!response.sent())
-            response.send() << message << std::endl;
+            *response.send() << message << std::endl;
         LOG_WARNING(log, message);
     };
 
@@ -49,7 +49,7 @@ void IdentifierQuoteHandler::handleRequest(Poco::Net::HTTPServerRequest & reques
 
         auto identifier = getIdentifierQuote(hdbc);
 
-        WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout);
+        WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
         writeStringBinary(identifier, out);
     }
     catch (...)
diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.h b/programs/odbc-bridge/IdentifierQuoteHandler.h
index fd357e32786..dad88c72ad8 100644
--- a/programs/odbc-bridge/IdentifierQuoteHandler.h
+++ b/programs/odbc-bridge/IdentifierQuoteHandler.h
@@ -1,8 +1,9 @@
 #pragma once
 
 #include <Interpreters/Context.h>
+#include <Server/HTTP/HTTPRequestHandler.h>
+
 #include <Poco/Logger.h>
-#include <Poco/Net/HTTPRequestHandler.h>
 
 #if USE_ODBC
 
@@ -10,7 +11,7 @@
 namespace DB
 {
 
-class IdentifierQuoteHandler : public Poco::Net::HTTPRequestHandler
+class IdentifierQuoteHandler : public HTTPRequestHandler
 {
 public:
     IdentifierQuoteHandler(size_t keep_alive_timeout_, Context &)
@@ -18,7 +19,7 @@ public:
     {
     }
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
 private:
     Poco::Logger * log;
diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp
index 64cb7bc0b46..b9670397878 100644
--- a/programs/odbc-bridge/MainHandler.cpp
+++ b/programs/odbc-bridge/MainHandler.cpp
@@ -7,7 +7,7 @@
 #include <DataStreams/copyData.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <Formats/FormatFactory.h>
-#include <IO/WriteBufferFromHTTPServerResponse.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadBufferFromIStream.h>
@@ -17,6 +17,7 @@
 #include <Poco/ThreadPool.h>
 #include <Processors/Formats/InputStreamFromInputFormat.h>
 #include <common/logger_useful.h>
+#include <Server/HTTP/HTMLForm.h>
 
 #include <mutex>
 #include <memory>
@@ -73,19 +74,19 @@ ODBCHandler::PoolPtr ODBCHandler::getPool(const std::string & connection_str)
     return pool_map->at(connection_str);
 }
 
-void ODBCHandler::processError(Poco::Net::HTTPServerResponse & response, const std::string & message)
+void ODBCHandler::processError(HTTPServerResponse & response, const std::string & message)
 {
-    response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
+    response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
     if (!response.sent())
-        response.send() << message << std::endl;
+        *response.send() << message << std::endl;
     LOG_WARNING(log, message);
 }
 
-void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
-    Poco::Net::HTMLForm params(request);
+    HTMLForm params(request);
     if (mode == "read")
-        params.read(request.stream());
+        params.read(request.getStream());
     LOG_TRACE(log, "Request URI: {}", request.getURI());
 
     if (mode == "read" && !params.has("query"))
@@ -136,7 +137,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
     std::string connection_string = params.get("connection_string");
     LOG_TRACE(log, "Connection string: '{}'", connection_string);
 
-    WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout);
+    WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
 
     try
     {
@@ -163,9 +164,8 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
 #endif
 
             auto pool = getPool(connection_string);
-            ReadBufferFromIStream read_buf(request.stream());
-            auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block,
-                                                                   context, max_block_size);
+            auto & read_buf = request.getStream();
+            auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block, context, max_block_size);
             auto input_stream = std::make_shared<InputStreamFromInputFormat>(input_format);
             ODBCBlockOutputStream output_stream(pool->get(), db_name, table_name, *sample_block, quoting_style);
             copyData(*input_stream, output_stream);
diff --git a/programs/odbc-bridge/MainHandler.h b/programs/odbc-bridge/MainHandler.h
index ec5e6693a60..e237ede5814 100644
--- a/programs/odbc-bridge/MainHandler.h
+++ b/programs/odbc-bridge/MainHandler.h
@@ -1,12 +1,13 @@
 #pragma once
 
 #include <Interpreters/Context.h>
+#include <Server/HTTP/HTTPRequestHandler.h>
+
 #include <Poco/Logger.h>
-#include <Poco/Net/HTTPRequestHandler.h>
 
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-parameter"
-    #include <Poco/Data/SessionPool.h>
+#include <Poco/Data/SessionPool.h>
 #pragma GCC diagnostic pop
 
 namespace DB
@@ -16,7 +17,7 @@ namespace DB
   * and also query in request body
   * response in RowBinary format
   */
-class ODBCHandler : public Poco::Net::HTTPRequestHandler
+class ODBCHandler : public HTTPRequestHandler
 {
 public:
     using PoolPtr = std::shared_ptr<Poco::Data::SessionPool>;
@@ -34,7 +35,7 @@ public:
     {
     }
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
 private:
     Poco::Logger * log;
@@ -47,7 +48,7 @@ private:
     static inline std::mutex mutex;
 
     PoolPtr getPool(const std::string & connection_str);
-    void processError(Poco::Net::HTTPServerResponse & response, const std::string & message);
+    void processError(HTTPServerResponse & response, const std::string & message);
 };
 
 }
diff --git a/programs/odbc-bridge/ODBCBridge.cpp b/programs/odbc-bridge/ODBCBridge.cpp
index 9deefaf7895..8869a2639c1 100644
--- a/programs/odbc-bridge/ODBCBridge.cpp
+++ b/programs/odbc-bridge/ODBCBridge.cpp
@@ -11,7 +11,6 @@
 #    include <Poco/Data/ODBC/Connector.h>
 #endif
 
-#include <Poco/Net/HTTPServer.h>
 #include <Poco/Net/NetException.h>
 #include <Poco/String.h>
 #include <Poco/Util/HelpFormatter.h>
@@ -23,6 +22,7 @@
 #include <ext/scope_guard.h>
 #include <ext/range.h>
 #include <Common/SensitiveDataMasker.h>
+#include <Server/HTTP/HTTPServer.h>
 
 
 namespace DB
@@ -212,8 +212,12 @@ int ODBCBridge::main(const std::vector<std::string> & /*args*/)
         SensitiveDataMasker::setInstance(std::make_unique<SensitiveDataMasker>(config(), "query_masking_rules"));
     }
 
-    auto server = Poco::Net::HTTPServer(
-        new HandlerFactory("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context), server_pool, socket, http_params);
+    auto server = HTTPServer(
+        context,
+        std::make_shared<HandlerFactory>("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context),
+        server_pool,
+        socket,
+        http_params);
     server.start();
 
     LOG_INFO(log, "Listening http://{}", address.toString());
diff --git a/programs/odbc-bridge/PingHandler.cpp b/programs/odbc-bridge/PingHandler.cpp
index b0313e46bf3..e3ab5e5cd00 100644
--- a/programs/odbc-bridge/PingHandler.cpp
+++ b/programs/odbc-bridge/PingHandler.cpp
@@ -6,7 +6,7 @@
 
 namespace DB
 {
-void PingHandler::handleRequest(Poco::Net::HTTPServerRequest & /*request*/, Poco::Net::HTTPServerResponse & response)
+void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response)
 {
     try
     {
diff --git a/programs/odbc-bridge/PingHandler.h b/programs/odbc-bridge/PingHandler.h
index d8109a50bb6..c969ec55af7 100644
--- a/programs/odbc-bridge/PingHandler.h
+++ b/programs/odbc-bridge/PingHandler.h
@@ -1,17 +1,19 @@
 #pragma once
-#include <Poco/Net/HTTPRequestHandler.h>
+
+#include <Server/HTTP/HTTPRequestHandler.h>
 
 namespace DB
 {
-/** Simple ping handler, answers "Ok." to GET request
- */
-class PingHandler : public Poco::Net::HTTPRequestHandler
+
+/// Simple ping handler, answers "Ok." to GET request
+class PingHandler : public HTTPRequestHandler
 {
 public:
-    PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {}
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    explicit PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {}
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
 private:
     size_t keep_alive_timeout;
 };
+
 }
diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp
index fa08a27da59..48744b6d2ca 100644
--- a/programs/odbc-bridge/SchemaAllowedHandler.cpp
+++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp
@@ -2,12 +2,12 @@
 
 #if USE_ODBC
 
-#    include <IO/WriteBufferFromHTTPServerResponse.h>
+#    include <Server/HTTP/HTMLForm.h>
+#    include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
 #    include <IO/WriteHelpers.h>
 #    include <Poco/Data/ODBC/ODBCException.h>
 #    include <Poco/Data/ODBC/SessionImpl.h>
 #    include <Poco/Data/ODBC/Utility.h>
-#    include <Poco/Net/HTMLForm.h>
 #    include <Poco/Net/HTTPServerRequest.h>
 #    include <Poco/Net/HTTPServerResponse.h>
 #    include <common/logger_useful.h>
@@ -33,16 +33,16 @@ namespace
 }
 
 
-void SchemaAllowedHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
-    Poco::Net::HTMLForm params(request, request.stream());
+    HTMLForm params(request, request.getStream());
     LOG_TRACE(log, "Request URI: {}", request.getURI());
 
     auto process_error = [&response, this](const std::string & message)
     {
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
         if (!response.sent())
-            response.send() << message << std::endl;
+            *response.send() << message << std::endl;
         LOG_WARNING(log, message);
     };
 
@@ -60,7 +60,7 @@ void SchemaAllowedHandler::handleRequest(Poco::Net::HTTPServerRequest & request,
 
         bool result = isSchemaAllowed(hdbc);
 
-        WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout);
+        WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
         writeBoolText(result, out);
     }
     catch (...)
diff --git a/programs/odbc-bridge/SchemaAllowedHandler.h b/programs/odbc-bridge/SchemaAllowedHandler.h
index 76aa23b903c..91eddf67803 100644
--- a/programs/odbc-bridge/SchemaAllowedHandler.h
+++ b/programs/odbc-bridge/SchemaAllowedHandler.h
@@ -1,17 +1,18 @@
 #pragma once
 
+#include <Server/HTTP/HTTPRequestHandler.h>
+
 #include <Poco/Logger.h>
-#include <Poco/Net/HTTPRequestHandler.h>
 
 #if USE_ODBC
 
 namespace DB
 {
+
 class Context;
 
-
-/// This handler establishes connection to database, and retrieve whether schema is allowed.
-class SchemaAllowedHandler : public Poco::Net::HTTPRequestHandler
+/// This handler establishes connection to database, and retrieves whether schema is allowed.
+class SchemaAllowedHandler : public HTTPRequestHandler
 {
 public:
     SchemaAllowedHandler(size_t keep_alive_timeout_, Context &)
@@ -19,7 +20,7 @@ public:
     {
     }
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
 private:
     Poco::Logger * log;
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index a96cb2b8973..4194bb4a06b 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -69,6 +69,7 @@
 #include <Server/MySQLHandlerFactory.h>
 #include <Server/PostgreSQLHandlerFactory.h>
 #include <Server/ProtocolServerAdapter.h>
+#include <Server/HTTP/HTTPServer.h>
 
 
 #if !defined(ARCADIA_BUILD)
@@ -1070,8 +1071,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
 
-                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
-                    createHandlerFactory(*this, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
+                servers->emplace_back(
+                    port_name,
+                    std::make_unique<HTTPServer>(
+                        context(), createHandlerFactory(*this, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
 
                 LOG_INFO(log, "Listening for http://{}", address.toString());
             });
@@ -1085,8 +1088,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
-                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
-                    createHandlerFactory(*this, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
+                servers->emplace_back(
+                    port_name,
+                    std::make_unique<HTTPServer>(
+                        context(), createHandlerFactory(*this, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
 
                 LOG_INFO(log, "Listening for https://{}", address.toString());
 #else
@@ -1160,8 +1165,14 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port);
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
-                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
-                    createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"), server_pool, socket, http_params));
+                servers->emplace_back(
+                    port_name,
+                    std::make_unique<HTTPServer>(
+                        context(),
+                        createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"),
+                        server_pool,
+                        socket,
+                        http_params));
 
                 LOG_INFO(log, "Listening for replica communication (interserver): http://{}", address.toString());
             });
@@ -1174,8 +1185,14 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
-                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
-                    createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"), server_pool, socket, http_params));
+                servers->emplace_back(
+                    port_name,
+                    std::make_unique<HTTPServer>(
+                        context(),
+                        createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"),
+                        server_pool,
+                        socket,
+                        http_params));
 
                 LOG_INFO(log, "Listening for secure replica communication (interserver): https://{}", address.toString());
 #else
@@ -1235,8 +1252,14 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port);
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
-                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
-                    createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
+                servers->emplace_back(
+                    port_name,
+                    std::make_unique<HTTPServer>(
+                        context(),
+                        createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory"),
+                        server_pool,
+                        socket,
+                        http_params));
 
                 LOG_INFO(log, "Listening for Prometheus: http://{}", address.toString());
             });
diff --git a/programs/server/Server.h b/programs/server/Server.h
index c582e475308..fbfc26f6ee5 100644
--- a/programs/server/Server.h
+++ b/programs/server/Server.h
@@ -51,6 +51,7 @@ public:
     }
 
     void defineOptions(Poco::Util::OptionSet & _options) override;
+
 protected:
     int run() override;
 
@@ -65,8 +66,6 @@ protected:
 private:
     Context * global_context_ptr = nullptr;
 
-private:
-
     Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const;
 
     using CreateServerFunc = std::function<void(UInt16)>;
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d370016da00..215a13cce1a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -181,6 +181,7 @@ add_object_library(clickhouse_storages_mergetree Storages/MergeTree)
 add_object_library(clickhouse_storages_liveview Storages/LiveView)
 add_object_library(clickhouse_client Client)
 add_object_library(clickhouse_server Server)
+add_object_library(clickhouse_server_http Server/HTTP)
 add_object_library(clickhouse_formats Formats)
 add_object_library(clickhouse_processors Processors)
 add_object_library(clickhouse_processors_executors Processors/Executors)
diff --git a/src/Common/HTMLForm.h b/src/Common/HTMLForm.h
deleted file mode 100644
index 2b62167dce7..00000000000
--- a/src/Common/HTMLForm.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#pragma once
-
-#include <sstream>
-#include <Poco/Net/HTMLForm.h>
-#include <Poco/Net/HTTPRequest.h>
-#include <Poco/URI.h>
-
-#include <IO/ReadHelpers.h>
-
-
-/** Somehow, in case of POST, Poco::Net::HTMLForm doesn't read parameters from URL, only from body.
-  * This helper allows to read parameters just from URL.
-  */
-struct HTMLForm : public Poco::Net::HTMLForm
-{
-    HTMLForm(const Poco::Net::HTTPRequest & request)
-    {
-        Poco::URI uri(request.getURI());
-        std::istringstream istr(uri.getRawQuery());     // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-        readUrl(istr);
-    }
-
-    HTMLForm(const Poco::URI & uri)
-    {
-        std::istringstream istr(uri.getRawQuery());     // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-        readUrl(istr);
-    }
-
-
-    template <typename T>
-    T getParsed(const std::string & key, T default_value)
-    {
-        auto it = find(key);
-        return (it != end()) ? DB::parse<T>(it->second) : default_value;
-    }
-
-    template <typename T>
-    T getParsed(const std::string & key)
-    {
-        return DB::parse<T>(get(key));
-    }
-};
diff --git a/src/Common/StringUtils/StringUtils.h b/src/Common/StringUtils/StringUtils.h
index 904e3035dd8..cb2227f01a8 100644
--- a/src/Common/StringUtils/StringUtils.h
+++ b/src/Common/StringUtils/StringUtils.h
@@ -120,6 +120,12 @@ inline bool isWhitespaceASCII(char c)
     return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v';
 }
 
+/// Since |isWhiteSpaceASCII()| is used inside algorithms it's easier to implement another function than add extra argument.
+inline bool isWhitespaceASCIIOneLine(char c)
+{
+    return c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v';
+}
+
 inline bool isControlASCII(char c)
 {
     return static_cast<unsigned char>(c) <= 31;
diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h
index 63c064b21f8..bd0c68d70f9 100644
--- a/src/Common/formatIPv6.h
+++ b/src/Common/formatIPv6.h
@@ -85,9 +85,9 @@ inline bool parseIPv6(const char * src, unsigned char * dst)
             return clear_dst();
 
     unsigned char tmp[IPV6_BINARY_LENGTH]{};
-    auto tp = tmp;
-    auto endp = tp + IPV6_BINARY_LENGTH;
-    auto curtok = src;
+    auto * tp = tmp;
+    auto * endp = tp + IPV6_BINARY_LENGTH;
+    const auto * curtok = src;
     auto saw_xdigit = false;
     UInt32 val{};
     unsigned char * colonp = nullptr;
@@ -97,14 +97,14 @@ inline bool parseIPv6(const char * src, unsigned char * dst)
     {
         const auto num = unhex(ch);
 
-        if (num != '\xff')
+        if (num != u8'\xff')
         {
             val <<= 4;
             val |= num;
             if (val > 0xffffu)
                 return clear_dst();
 
-            saw_xdigit = 1;
+            saw_xdigit = true;
             continue;
         }
 
@@ -204,7 +204,7 @@ inline void formatIPv4(const unsigned char * src, char *& dst, uint8_t mask_tail
     for (size_t octet = 0; octet < limit; ++octet)
     {
         const uint8_t value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
-        auto rep = one_byte_to_string_lookup_table[value];
+        const auto * rep = one_byte_to_string_lookup_table[value];
         const uint8_t len = rep[0];
         const char* str = rep + 1;
 
diff --git a/src/Common/hex.h b/src/Common/hex.h
index db094e1dfd1..a1fa7b32465 100644
--- a/src/Common/hex.h
+++ b/src/Common/hex.h
@@ -90,12 +90,12 @@ std::string getHexUIntLowercase(TUInt uint_)
 
 extern const char * const hex_char_to_digit_table;
 
-inline char unhex(char c)
+inline UInt8 unhex(char c)
 {
     return hex_char_to_digit_table[static_cast<UInt8>(c)];
 }
 
-inline char unhex2(const char * data)
+inline UInt8 unhex2(const char * data)
 {
     return
           static_cast<UInt8>(unhex(data[0])) * 0x10
diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp
index 767ed959950..afc9fe00ef5 100644
--- a/src/Core/ExternalTable.cpp
+++ b/src/Core/ExternalTable.cpp
@@ -125,19 +125,16 @@ ExternalTable::ExternalTable(const boost::program_options::variables_map & exter
 }
 
 
-void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, std::istream & stream)
+void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, ReadBuffer & stream)
 {
     const Settings & settings = context.getSettingsRef();
 
-    /// The buffer is initialized here, not in the virtual function initReadBuffer
-    read_buffer_impl = std::make_unique<ReadBufferFromIStream>(stream);
-
     if (settings.http_max_multipart_form_data_size)
         read_buffer = std::make_unique<LimitReadBuffer>(
-            *read_buffer_impl, settings.http_max_multipart_form_data_size,
+            stream, settings.http_max_multipart_form_data_size,
             true, "the maximum size of multipart/form-data. This limit can be tuned by 'http_max_multipart_form_data_size' setting");
     else
-        read_buffer = std::move(read_buffer_impl);
+        read_buffer = wrapReadBufferReference(stream);
 
     /// Retrieve a collection of parameters from MessageHeader
     Poco::Net::NameValueCollection content;
diff --git a/src/Core/ExternalTable.h b/src/Core/ExternalTable.h
index 0d8e0aaf8ac..aa15846d48a 100644
--- a/src/Core/ExternalTable.h
+++ b/src/Core/ExternalTable.h
@@ -1,15 +1,14 @@
 #pragma once
 
+#include <Client/Connection.h>
+#include <Core/Block.h>
+#include <IO/ReadBuffer.h>
+#include <Server/HTTP/HTMLForm.h>
+
+#include <iosfwd>
+#include <memory>
 #include <string>
 #include <vector>
-#include <memory>
-#include <iosfwd>
-
-#include <Poco/Net/PartHandler.h>
-
-#include <Core/Block.h>
-#include <Client/Connection.h>
-#include <IO/ReadBuffer.h>
 
 
 namespace Poco
@@ -51,7 +50,7 @@ public:
     std::unique_ptr<ReadBuffer> read_buffer;
     Block sample_block;
 
-    virtual ~BaseExternalTable() {}
+    virtual ~BaseExternalTable() = default;
 
     /// Initialize read_buffer, depending on the data source. By default, does nothing.
     virtual void initReadBuffer() {}
@@ -82,24 +81,23 @@ public:
     void initReadBuffer() override;
 
     /// Extract parameters from variables_map, which is built on the client command line
-    ExternalTable(const boost::program_options::variables_map & external_options);
+    explicit ExternalTable(const boost::program_options::variables_map & external_options);
 };
 
 
 /// Parsing of external table used when sending tables via http
 /// The `handlePart` function will be called for each table passed,
 /// so it's also necessary to call `clean` at the end of the `handlePart`.
-class ExternalTablesHandler : public Poco::Net::PartHandler, BaseExternalTable
+class ExternalTablesHandler : public HTMLForm::PartHandler, BaseExternalTable
 {
 public:
     ExternalTablesHandler(Context & context_, const Poco::Net::NameValueCollection & params_) : context(context_), params(params_) {}
 
-    void handlePart(const Poco::Net::MessageHeader & header, std::istream & stream) override;
+    void handlePart(const Poco::Net::MessageHeader & header, ReadBuffer & stream) override;
 
 private:
     Context & context;
     const Poco::Net::NameValueCollection & params;
-    std::unique_ptr<ReadBuffer> read_buffer_impl;
 };
 
 
diff --git a/src/IO/EmptyReadBuffer.h b/src/IO/EmptyReadBuffer.h
new file mode 100644
index 00000000000..e2189b9943f
--- /dev/null
+++ b/src/IO/EmptyReadBuffer.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <IO/ReadBuffer.h>
+
+namespace DB
+{
+
+/// Just a stub - reads nothing from nowhere.
+class EmptyReadBuffer : public ReadBuffer
+{
+public:
+    EmptyReadBuffer() : ReadBuffer(nullptr, 0) {}
+
+private:
+    bool nextImpl() override { return false; }
+};
+
+}
diff --git a/src/IO/HTTPChunkedReadBuffer.cpp b/src/IO/HTTPChunkedReadBuffer.cpp
new file mode 100644
index 00000000000..bd9bbba4c6c
--- /dev/null
+++ b/src/IO/HTTPChunkedReadBuffer.cpp
@@ -0,0 +1,92 @@
+#include <IO/HTTPChunkedReadBuffer.h>
+
+#include <IO/ReadHelpers.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/hex.h>
+#include <common/arithmeticOverflow.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ARGUMENT_OUT_OF_BOUND;
+    extern const int UNEXPECTED_END_OF_FILE;
+    extern const int CORRUPTED_DATA;
+    extern const int TOO_MANY_BYTES;
+}
+
+size_t HTTPChunkedReadBuffer::readChunkHeader()
+{
+    if (in->eof())
+        throw Exception("Unexpected end of file while reading chunk header of HTTP chunked data", ErrorCodes::UNEXPECTED_END_OF_FILE);
+
+    if (!isHexDigit(*in->position()))
+        throw Exception("Unexpected data instead of HTTP chunk header", ErrorCodes::CORRUPTED_DATA);
+
+    size_t res = 0;
+    do
+    {
+        if (common::mulOverflow(res, 16ul, res) || common::addOverflow<size_t>(res, unhex(*in->position()), res))
+            throw Exception("Chunk size is out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+        ++in->position();
+    } while (!in->eof() && isHexDigit(*in->position()));
+
+    /// NOTE: If we want to read any chunk extensions, it should be done here.
+
+    skipToCarriageReturnOrEOF(*in);
+
+    if (in->eof())
+        throw Exception("Unexpected end of file while reading chunk header of HTTP chunked data", ErrorCodes::UNEXPECTED_END_OF_FILE);
+
+    if (res > max_size)
+        throw Exception("Chunk size is too large", ErrorCodes::TOO_MANY_BYTES);
+
+    assertString("\n", *in);
+    return res;
+}
+
+void HTTPChunkedReadBuffer::readChunkFooter()
+{
+    assertString("\r\n", *in);
+}
+
+bool HTTPChunkedReadBuffer::nextImpl()
+{
+    if (!in)
+        return false;
+
+    /// The footer of previous chunk.
+    if (count())
+        readChunkFooter();
+
+    size_t chunk_size = readChunkHeader();
+    if (0 == chunk_size)
+    {
+        readChunkFooter();
+        in.reset();  // prevent double-eof situation.
+        return false;
+    }
+
+    if (in->available() >= chunk_size)
+    {
+        /// Zero-copy read from input.
+        working_buffer = Buffer(in->position(), in->position() + chunk_size);
+        in->position() += chunk_size;
+    }
+    else
+    {
+        /// Chunk is not completely in buffer, copy it to scratch space.
+        memory.resize(chunk_size);
+        in->readStrict(memory.data(), chunk_size);
+        working_buffer = Buffer(memory.data(), memory.data() + chunk_size);
+    }
+
+    /// NOTE: We postpone reading the footer to the next iteration, because it may not be completely in buffer,
+    ///       but we need to keep the current data in buffer available.
+
+    return true;
+}
+
+}
diff --git a/src/IO/HTTPChunkedReadBuffer.h b/src/IO/HTTPChunkedReadBuffer.h
new file mode 100644
index 00000000000..0ccebc69d08
--- /dev/null
+++ b/src/IO/HTTPChunkedReadBuffer.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <IO/BufferWithOwnMemory.h>
+#include <IO/ReadBuffer.h>
+
+namespace DB
+{
+
+/// Reads data with HTTP Chunked Transfer Encoding.
+class HTTPChunkedReadBuffer : public BufferWithOwnMemory<ReadBuffer>
+{
+public:
+    HTTPChunkedReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t max_chunk_size) : in(std::move(in_)), max_size(max_chunk_size) {}
+
+private:
+    std::unique_ptr<ReadBuffer> in;
+    const size_t max_size;
+
+    size_t readChunkHeader();
+    void readChunkFooter();
+
+    bool nextImpl() override;
+};
+
+}
diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp
index d12aa10fe6a..346bbf0427e 100644
--- a/src/IO/HTTPCommon.cpp
+++ b/src/IO/HTTPCommon.cpp
@@ -1,5 +1,6 @@
 #include <IO/HTTPCommon.h>
 
+#include <Server/HTTP/HTTPServerResponse.h>
 #include <Common/DNSResolver.h>
 #include <Common/Exception.h>
 #include <Common/PoolBase.h>
@@ -23,7 +24,6 @@
 #    include <Poco/Net/SecureStreamSocket.h>
 #endif
 
-#include <Poco/Net/HTTPServerResponse.h>
 #include <Poco/Util/Application.h>
 
 #include <tuple>
@@ -266,7 +266,7 @@ namespace
     };
 }
 
-void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigned keep_alive_timeout)
+void setResponseDefaultHeaders(HTTPServerResponse & response, unsigned keep_alive_timeout)
 {
     if (!response.getKeepAlive())
         return;
diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h
index 4a81d23a8a3..18e83abb83b 100644
--- a/src/IO/HTTPCommon.h
+++ b/src/IO/HTTPCommon.h
@@ -14,20 +14,13 @@
 #include <IO/ConnectionTimeouts.h>
 
 
-namespace Poco
-{
-namespace Net
-{
-    class HTTPServerResponse;
-}
-}
-
-
 namespace DB
 {
 
 constexpr int HTTP_TOO_MANY_REQUESTS = 429;
 
+class HTTPServerResponse;
+
 class SingleEndpointHTTPSessionPool : public PoolBase<Poco::Net::HTTPClientSession>
 {
 private:
@@ -45,7 +38,7 @@ public:
 using PooledHTTPSessionPtr = SingleEndpointHTTPSessionPool::Entry;
 using HTTPSessionPtr = std::shared_ptr<Poco::Net::HTTPClientSession>;
 
-void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigned keep_alive_timeout);
+void setResponseDefaultHeaders(HTTPServerResponse & response, unsigned keep_alive_timeout);
 
 /// Create session object to perform requests and set required parameters.
 HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, bool resolve_host = true);
@@ -54,7 +47,7 @@ HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts &
 PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host = true);
 PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const Poco::URI & proxy_uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host = true);
 
-bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status);
+bool isRedirect(Poco::Net::HTTPResponse::HTTPStatus status);
 
 /** Used to receive response (response headers and possibly body)
   *  after sending data (request headers and possibly body).
@@ -65,5 +58,5 @@ std::istream * receiveResponse(
     Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, bool allow_redirects);
 
 void assertResponseIsOk(
-    const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects = false);
+    const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, bool allow_redirects = false);
 }
diff --git a/src/IO/LimitReadBuffer.cpp b/src/IO/LimitReadBuffer.cpp
index baa9e487688..9daffa3a1d3 100644
--- a/src/IO/LimitReadBuffer.cpp
+++ b/src/IO/LimitReadBuffer.cpp
@@ -14,10 +14,10 @@ namespace ErrorCodes
 
 bool LimitReadBuffer::nextImpl()
 {
-    assert(position() >= in.position());
+    assert(position() >= in->position());
 
     /// Let underlying buffer calculate read bytes in `next()` call.
-    in.position() = position();
+    in->position() = position();
 
     if (bytes >= limit)
     {
@@ -27,13 +27,13 @@ bool LimitReadBuffer::nextImpl()
             return false;
     }
 
-    if (!in.next())
+    if (!in->next())
     {
-        working_buffer = in.buffer();
+        working_buffer = in->buffer();
         return false;
     }
 
-    working_buffer = in.buffer();
+    working_buffer = in->buffer();
 
     if (limit - bytes < working_buffer.size())
         working_buffer.resize(limit - bytes);
@@ -42,14 +42,33 @@ bool LimitReadBuffer::nextImpl()
 }
 
 
-LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_)
-    : ReadBuffer(in_.position(), 0), in(in_), limit(limit_), throw_exception(throw_exception_), exception_message(std::move(exception_message_))
+LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, std::string exception_message_)
+    : ReadBuffer(in_ ? in_->position() : nullptr, 0)
+    , in(in_)
+    , owns_in(owns)
+    , limit(limit_)
+    , throw_exception(throw_exception_)
+    , exception_message(std::move(exception_message_))
 {
-    size_t remaining_bytes_in_buffer = in.buffer().end() - in.position();
+    assert(in);
+
+    size_t remaining_bytes_in_buffer = in->buffer().end() - in->position();
     if (remaining_bytes_in_buffer > limit)
         remaining_bytes_in_buffer = limit;
 
-    working_buffer = Buffer(in.position(), in.position() + remaining_bytes_in_buffer);
+    working_buffer = Buffer(in->position(), in->position() + remaining_bytes_in_buffer);
+}
+
+
+LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_)
+    : LimitReadBuffer(&in_, false, limit_, throw_exception_, exception_message_)
+{
+}
+
+
+LimitReadBuffer::LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_, std::string exception_message_)
+    : LimitReadBuffer(in_.release(), true, limit_, throw_exception_, exception_message_)
+{
 }
 
 
@@ -57,7 +76,10 @@ LimitReadBuffer::~LimitReadBuffer()
 {
     /// Update underlying buffer's position in case when limit wasn't reached.
     if (!working_buffer.empty())
-        in.position() = position();
+        in->position() = position();
+
+    if (owns_in)
+        delete in;
 }
 
 }
diff --git a/src/IO/LimitReadBuffer.h b/src/IO/LimitReadBuffer.h
index db3d2684ef7..a5fa0f0d5cc 100644
--- a/src/IO/LimitReadBuffer.h
+++ b/src/IO/LimitReadBuffer.h
@@ -12,17 +12,22 @@ namespace DB
   */
 class LimitReadBuffer : public ReadBuffer
 {
+public:
+    LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_ = {});
+    LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_, std::string exception_message_ = {});
+    ~LimitReadBuffer() override;
+
 private:
-    ReadBuffer & in;
+    ReadBuffer * in;
+    bool owns_in;
+
     UInt64 limit;
     bool throw_exception;
     std::string exception_message;
 
-    bool nextImpl() override;
+    LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, std::string exception_message_);
 
-public:
-    LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_ = {});
-    ~LimitReadBuffer() override;
+    bool nextImpl() override;
 };
 
 }
diff --git a/src/IO/PeekableReadBuffer.cpp b/src/IO/PeekableReadBuffer.cpp
index e0e99afbfec..1d999d586b2 100644
--- a/src/IO/PeekableReadBuffer.cpp
+++ b/src/IO/PeekableReadBuffer.cpp
@@ -1,7 +1,9 @@
 #include <IO/PeekableReadBuffer.h>
 
+
 namespace DB
 {
+
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
@@ -107,22 +109,29 @@ bool PeekableReadBuffer::peekNext()
     return sub_buf.next();
 }
 
-void PeekableReadBuffer::rollbackToCheckpoint()
+void PeekableReadBuffer::rollbackToCheckpoint(bool drop)
 {
     checkStateCorrect();
+
     if (!checkpoint)
         throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR);
     else if (checkpointInOwnMemory() == currentlyReadFromOwnMemory())
         pos = *checkpoint;
     else /// Checkpoint is in own memory and pos is not. Switch to reading from own memory
         BufferBase::set(memory.data(), peeked_size, *checkpoint - memory.data());
+
+    if (drop)
+        dropCheckpoint();
+
     checkStateCorrect();
 }
 
 bool PeekableReadBuffer::nextImpl()
 {
-    /// FIXME wrong bytes count because it can read the same data again after rollbackToCheckpoint()
-    /// However, changing bytes count on every call of next() (even after rollback) allows to determine if some pointers were invalidated.
+    /// FIXME: wrong bytes count because it can read the same data again after rollbackToCheckpoint()
+    ///        however, changing bytes count on every call of next() (even after rollback) allows to determine
+    ///        if some pointers were invalidated.
+
     checkStateCorrect();
     bool res;
 
@@ -138,7 +147,7 @@ bool PeekableReadBuffer::nextImpl()
         if (useSubbufferOnly())
         {
             /// Load next data to sub_buf
-            sub_buf.position() = pos;
+            sub_buf.position() = position();
             res = sub_buf.next();
         }
         else
diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h
index e425f9bc953..4f6e669b31d 100644
--- a/src/IO/PeekableReadBuffer.h
+++ b/src/IO/PeekableReadBuffer.h
@@ -58,7 +58,7 @@ public:
 
     /// Sets position at checkpoint.
     /// All pointers (such as this->buffer().end()) may be invalidated
-    void rollbackToCheckpoint();
+    void rollbackToCheckpoint(bool drop = false);
 
     /// If checkpoint and current position are in different buffers, appends data from sub-buffer to own memory,
     /// so data between checkpoint and position will be in continuous memory.
diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h
index 5cbe04f8348..e3166ba8180 100644
--- a/src/IO/ReadBuffer.h
+++ b/src/IO/ReadBuffer.h
@@ -134,15 +134,27 @@ public:
         tryIgnore(std::numeric_limits<size_t>::max());
     }
 
-    /** Reads a single byte. */
-    bool ALWAYS_INLINE read(char & c)
+    /// Peeks a single byte.
+    bool ALWAYS_INLINE peek(char & c)
     {
         if (eof())
             return false;
-        c = *pos++;
+        c = *pos;
         return true;
     }
 
+    /// Reads a single byte.
+    bool ALWAYS_INLINE read(char & c)
+    {
+        if (peek(c))
+        {
+            ++pos;
+            return true;
+        }
+
+        return false;
+    }
+
     void ALWAYS_INLINE readStrict(char & c)
     {
         if (read(c))
@@ -207,5 +219,39 @@ private:
 
 using ReadBufferPtr = std::shared_ptr<ReadBuffer>;
 
+/// Due to inconsistencies in ReadBuffer-family interfaces:
+///  - some require to fully wrap underlying buffer and own it,
+///  - some just wrap the reference without ownership,
+/// we need to be able to wrap reference-only buffers with movable transparent proxy-buffer.
+/// The uniqueness of such wraps is responsibility of the code author.
+inline std::unique_ptr<ReadBuffer> wrapReadBufferReference(ReadBuffer & buf)
+{
+    class ReadBufferWrapper : public ReadBuffer
+    {
+        public:
+            explicit ReadBufferWrapper(ReadBuffer & buf_) : ReadBuffer(buf_.position(), 0), buf(buf_)
+            {
+                working_buffer = Buffer(buf.position(), buf.buffer().end());
+            }
+
+        private:
+            ReadBuffer & buf;
+
+            bool nextImpl() override
+            {
+                buf.position() = position();
+
+                if (!buf.next())
+                    return false;
+
+                working_buffer = buf.buffer();
+
+                return true;
+            }
+    };
+
+    return std::make_unique<ReadBufferWrapper>(buf);
+}
+
 
 }
diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp
index 2c13446e693..59f0dc25667 100644
--- a/src/IO/ReadBufferFromPocoSocket.cpp
+++ b/src/IO/ReadBufferFromPocoSocket.cpp
@@ -78,7 +78,7 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_,
 {
 }
 
-bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds)
+bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const
 {
     return available() || socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR);
 }
diff --git a/src/IO/ReadBufferFromPocoSocket.h b/src/IO/ReadBufferFromPocoSocket.h
index 8064cd39246..d182d48d1f8 100644
--- a/src/IO/ReadBufferFromPocoSocket.h
+++ b/src/IO/ReadBufferFromPocoSocket.h
@@ -1,15 +1,14 @@
 #pragma once
 
-#include <Poco/Net/Socket.h>
-
-#include <IO/ReadBuffer.h>
 #include <IO/BufferWithOwnMemory.h>
+#include <IO/ReadBuffer.h>
+
+#include <Poco/Net/Socket.h>
 
 namespace DB
 {
 
-/** Works with the ready Poco::Net::Socket. Blocking operations.
-  */
+/// Works with the ready Poco::Net::Socket. Blocking operations.
 class ReadBufferFromPocoSocket : public BufferWithOwnMemory<ReadBuffer>
 {
 protected:
@@ -24,9 +23,9 @@ protected:
     bool nextImpl() override;
 
 public:
-    ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+    explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
 
-    bool poll(size_t timeout_microseconds);
+    bool poll(size_t timeout_microseconds) const;
 
     void setAsyncCallback(std::function<void(Poco::Net::Socket &)> async_callback_) { async_callback = std::move(async_callback_); }
 
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index baa12297718..fe563021d2e 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1050,6 +1050,25 @@ void readAndThrowException(ReadBuffer & buf, const String & additional_message)
 }
 
 
+void skipToCarriageReturnOrEOF(ReadBuffer & buf)
+{
+    while (!buf.eof())
+    {
+        char * next_pos = find_first_symbols<'\r'>(buf.position(), buf.buffer().end());
+        buf.position() = next_pos;
+
+        if (!buf.hasPendingData())
+            continue;
+
+        if (*buf.position() == '\r')
+        {
+            ++buf.position();
+            return;
+        }
+    }
+}
+
+
 void skipToNextLineOrEOF(ReadBuffer & buf)
 {
     while (!buf.eof())
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 4482667f447..d203bd7bbee 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -536,7 +536,7 @@ void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16);
 void parseUUIDWithoutSeparator(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16);
 
 template <typename IteratorSrc, typename IteratorDst>
-void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes);
+void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes);
 
 
 template <typename ReturnType>
@@ -1046,10 +1046,14 @@ void readText(std::vector<T> & x, ReadBuffer & buf)
 
 
 /// Skip whitespace characters.
-inline void skipWhitespaceIfAny(ReadBuffer & buf)
+inline void skipWhitespaceIfAny(ReadBuffer & buf, bool one_line = false)
 {
-    while (!buf.eof() && isWhitespaceASCII(*buf.position()))
-        ++buf.position();
+    if (!one_line)
+        while (!buf.eof() && isWhitespaceASCII(*buf.position()))
+            ++buf.position();
+    else
+        while (!buf.eof() && isWhitespaceASCIIOneLine(*buf.position()))
+            ++buf.position();
 }
 
 /// Skips json value.
@@ -1212,6 +1216,9 @@ inline void skipBOMIfExists(ReadBuffer & buf)
 /// Skip to next character after next \n. If no \n in stream, skip to end.
 void skipToNextLineOrEOF(ReadBuffer & buf);
 
+/// Skip to next character after next \r. If no \r in stream, skip to end.
+void skipToCarriageReturnOrEOF(ReadBuffer & buf);
+
 /// Skip to next character after next unescaped \n. If no \n in stream, skip to end. Does not throw on invalid escape sequences.
 void skipToUnescapedNextLineOrEOF(ReadBuffer & buf);
 
diff --git a/src/IO/ya.make b/src/IO/ya.make
index 2ef8bd0a986..980719aa74f 100644
--- a/src/IO/ya.make
+++ b/src/IO/ya.make
@@ -26,6 +26,7 @@ SRCS(
     CascadeWriteBuffer.cpp
     CompressionMethod.cpp
     DoubleConverter.cpp
+    HTTPChunkedReadBuffer.cpp
     HTTPCommon.cpp
     HashingWriteBuffer.cpp
     HexWriteBuffer.cpp
@@ -56,7 +57,6 @@ SRCS(
     WriteBufferFromFileDescriptor.cpp
     WriteBufferFromFileDescriptorDiscardOnFailure.cpp
     WriteBufferFromHTTP.cpp
-    WriteBufferFromHTTPServerResponse.cpp
     WriteBufferFromOStream.cpp
     WriteBufferFromPocoSocket.cpp
     WriteBufferFromTemporaryFile.cpp
diff --git a/src/Interpreters/InterserverIOHandler.h b/src/Interpreters/InterserverIOHandler.h
index 6d62c9651ca..db95a00d0f7 100644
--- a/src/Interpreters/InterserverIOHandler.h
+++ b/src/Interpreters/InterserverIOHandler.h
@@ -8,13 +8,13 @@
 #include <IO/WriteHelpers.h>
 #include <Common/ActionBlocker.h>
 #include <common/types.h>
-#include <map>
-#include <atomic>
-#include <utility>
-#include <shared_mutex>
+
 #include <Poco/Net/HTMLForm.h>
 
-namespace Poco { namespace Net { class HTTPServerResponse; } }
+#include <atomic>
+#include <map>
+#include <shared_mutex>
+#include <utility>
 
 namespace DB
 {
@@ -25,13 +25,16 @@ namespace ErrorCodes
     extern const int NO_SUCH_INTERSERVER_IO_ENDPOINT;
 }
 
+class HTMLForm;
+class HTTPServerResponse;
+
 /** Query processor from other servers.
   */
 class InterserverIOEndpoint
 {
 public:
     virtual std::string getId(const std::string & path) const = 0;
-    virtual void processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body, WriteBuffer & out, Poco::Net::HTTPServerResponse & response) = 0;
+    virtual void processQuery(const HTMLForm & params, ReadBuffer & body, WriteBuffer & out, HTTPServerResponse & response) = 0;
     virtual ~InterserverIOEndpoint() = default;
 
     /// You need to stop the data transfer if blocker is activated.
diff --git a/src/Server/HTTP/HTMLForm.cpp b/src/Server/HTTP/HTMLForm.cpp
new file mode 100644
index 00000000000..ca407858c33
--- /dev/null
+++ b/src/Server/HTTP/HTMLForm.cpp
@@ -0,0 +1,381 @@
+#include <Server/HTTP/HTMLForm.h>
+
+#include <IO/EmptyReadBuffer.h>
+#include <IO/ReadBufferFromString.h>
+#include <Server/HTTP/ReadHeaders.h>
+
+#include <Poco/CountingStream.h>
+#include <Poco/Net/MultipartReader.h>
+#include <Poco/Net/MultipartWriter.h>
+#include <Poco/Net/NetException.h>
+#include <Poco/Net/NullPartHandler.h>
+#include <Poco/NullStream.h>
+#include <Poco/StreamCopier.h>
+#include <Poco/UTF8String.h>
+
+#include <sstream>
+
+
+namespace DB
+{
+
+namespace
+{
+
+class NullPartHandler : public HTMLForm::PartHandler
+{
+public:
+    void handlePart(const Poco::Net::MessageHeader &, ReadBuffer &) override {}
+};
+
+}
+
+const std::string HTMLForm::ENCODING_URL = "application/x-www-form-urlencoded";
+const std::string HTMLForm::ENCODING_MULTIPART = "multipart/form-data";
+const int HTMLForm::UNKNOWN_CONTENT_LENGTH = -1;
+
+
+HTMLForm::HTMLForm() : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH), encoding(ENCODING_URL)
+{
+}
+
+
+HTMLForm::HTMLForm(const std::string & encoding_)
+    : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH), encoding(encoding_)
+{
+}
+
+
+HTMLForm::HTMLForm(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody, PartHandler & handler)
+    : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH)
+{
+    load(request, requestBody, handler);
+}
+
+
+HTMLForm::HTMLForm(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody)
+    : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH)
+{
+    load(request, requestBody);
+}
+
+
+HTMLForm::HTMLForm(const Poco::Net::HTTPRequest & request) : HTMLForm(Poco::URI(request.getURI()))
+{
+}
+
+HTMLForm::HTMLForm(const Poco::URI & uri) : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH)
+{
+    ReadBufferFromString istr(uri.getRawQuery()); // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+    readQuery(istr);
+}
+
+
+void HTMLForm::setEncoding(const std::string & encoding_)
+{
+    encoding = encoding_;
+}
+
+
+void HTMLForm::addPart(const std::string & name, Poco::Net::PartSource * source)
+{
+    poco_check_ptr(source);
+
+    Part part;
+    part.name = name;
+    part.source = std::unique_ptr<Poco::Net::PartSource>(source);
+    parts.push_back(std::move(part));
+}
+
+
+void HTMLForm::load(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody, PartHandler & handler)
+{
+    clear();
+
+    Poco::URI uri(request.getURI());
+    const std::string & query = uri.getRawQuery();
+    if (!query.empty())
+    {
+        ReadBufferFromString istr(query);
+        readQuery(istr);
+    }
+
+    if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST || request.getMethod() == Poco::Net::HTTPRequest::HTTP_PUT)
+    {
+        std::string media_type;
+        NameValueCollection params;
+        Poco::Net::MessageHeader::splitParameters(request.getContentType(), media_type, params);
+        encoding = media_type;
+        if (encoding == ENCODING_MULTIPART)
+        {
+            boundary = params["boundary"];
+            readMultipart(requestBody, handler);
+        }
+        else
+        {
+            readQuery(requestBody);
+        }
+    }
+}
+
+
+void HTMLForm::load(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody)
+{
+    NullPartHandler nah;
+    load(request, requestBody, nah);
+}
+
+
+void HTMLForm::load(const Poco::Net::HTTPRequest & request)
+{
+    NullPartHandler nah;
+    EmptyReadBuffer nis;
+    load(request, nis, nah);
+}
+
+
+void HTMLForm::read(ReadBuffer & in, PartHandler & handler)
+{
+    if (encoding == ENCODING_URL)
+        readQuery(in);
+    else
+        readMultipart(in, handler);
+}
+
+
+void HTMLForm::read(ReadBuffer & in)
+{
+    readQuery(in);
+}
+
+
+void HTMLForm::read(const std::string & queryString)
+{
+    ReadBufferFromString istr(queryString);
+    readQuery(istr);
+}
+
+
+void HTMLForm::readQuery(ReadBuffer & in)
+{
+    size_t fields = 0;
+    char ch = 0;  // silence "uninitialized" warning from gcc-*
+    bool is_first = true;
+
+    while (true)
+    {
+        if (field_limit > 0 && fields == field_limit)
+            throw Poco::Net::HTMLFormException("Too many form fields");
+
+        std::string name;
+        std::string value;
+
+        while (in.read(ch) && ch != '=' && ch != '&')
+        {
+            if (ch == '+')
+                ch = ' ';
+            if (name.size() < MAX_NAME_LENGTH)
+                name += ch;
+            else
+                throw Poco::Net::HTMLFormException("Field name too long");
+        }
+
+        if (ch == '=')
+        {
+            while (in.read(ch) && ch != '&')
+            {
+                if (ch == '+')
+                    ch = ' ';
+                if (value.size() < value_length_limit)
+                    value += ch;
+                else
+                    throw Poco::Net::HTMLFormException("Field value too long");
+            }
+        }
+
+        // Remove UTF-8 BOM from first name, if present
+        if (is_first)
+            Poco::UTF8::removeBOM(name);
+
+        std::string decoded_name;
+        std::string decoded_value;
+        Poco::URI::decode(name, decoded_name);
+        Poco::URI::decode(value, decoded_value);
+        add(decoded_name, decoded_value);
+        ++fields;
+
+        is_first = false;
+
+        if (in.eof())
+            break;
+    }
+}
+
+
+void HTMLForm::readMultipart(ReadBuffer & in_, PartHandler & handler)
+{
+    /// Assume there is always a boundary provided.
+    assert(!boundary.empty());
+
+    size_t fields = 0;
+    MultipartReadBuffer in(in_, boundary);
+
+    /// Assume there is at least one part
+    in.skipToNextBoundary();
+
+    /// Read each part until next boundary (or last boundary)
+    while (!in.eof())
+    {
+        if (field_limit && fields > field_limit)
+            throw Poco::Net::HTMLFormException("Too many form fields");
+
+        Poco::Net::MessageHeader header;
+        readHeaders(header, in);
+        skipToNextLineOrEOF(in);
+
+        NameValueCollection params;
+        if (header.has("Content-Disposition"))
+        {
+            std::string unused;
+            Poco::Net::MessageHeader::splitParameters(header.get("Content-Disposition"), unused, params);
+        }
+
+        if (params.has("filename"))
+            handler.handlePart(header, in);
+        else
+        {
+            std::string name = params["name"];
+            std::string value;
+            char ch;
+
+            while (in.read(ch))
+            {
+                if (value.size() > value_length_limit)
+                    throw Poco::Net::HTMLFormException("Field value too long");
+                value += ch;
+            }
+
+            add(name, value);
+        }
+
+        ++fields;
+
+        /// If we already encountered EOF for the buffer |in|, it's possible that the next symbol is a start of boundary line.
+        /// In this case reading the boundary line will reset the EOF state, potentially breaking invariant of EOF idempotency -
+        /// if there is such invariant in the first place.
+        if (!in.skipToNextBoundary())
+            break;
+    }
+}
+
+
+void HTMLForm::setFieldLimit(int limit)
+{
+    poco_assert(limit >= 0);
+
+    field_limit = limit;
+}
+
+
+void HTMLForm::setValueLengthLimit(int limit)
+{
+    poco_assert(limit >= 0);
+
+    value_length_limit = limit;
+}
+
+
+HTMLForm::MultipartReadBuffer::MultipartReadBuffer(ReadBuffer & in_, const std::string & boundary_)
+    : ReadBuffer(nullptr, 0), in(in_), boundary("--" + boundary_)
+{
+    /// For consistency with |nextImpl()|
+    position() = in.position();
+}
+
+bool HTMLForm::MultipartReadBuffer::skipToNextBoundary()
+{
+    assert(working_buffer.empty() || eof());
+    assert(boundary_hit);
+
+    boundary_hit = false;
+
+    while (!in.eof())
+    {
+        auto line = readLine();
+        if (startsWith(line, boundary))
+        {
+            set(in.position(), 0);
+            next();  /// We need to restrict our buffer to size of next available line.
+            return !startsWith(line, boundary + "--");
+        }
+    }
+
+    throw Poco::Net::HTMLFormException("No boundary line found");
+}
+
+std::string HTMLForm::MultipartReadBuffer::readLine(bool strict)
+{
+    std::string line;
+    char ch = 0;  // silence "uninitialized" warning from gcc-*
+
+    while (in.read(ch) && ch != '\r' && ch != '\n')
+        line += ch;
+
+    if (in.eof())
+    {
+        if (strict)
+            throw Poco::Net::HTMLFormException("Unexpected end of message");
+        return line;
+    }
+
+    line += ch;
+
+    if (ch == '\r')
+    {
+        if (!in.read(ch) || ch != '\n')
+            throw Poco::Net::HTMLFormException("No CRLF found");
+        else
+            line += ch;
+    }
+
+    return line;
+}
+
+bool HTMLForm::MultipartReadBuffer::nextImpl()
+{
+    if (boundary_hit)
+        return false;
+
+    assert(position() >= in.position());
+
+    in.position() = position();
+
+    /// We expect to start from the first symbol after EOL, so we can put checkpoint
+    /// and safely try to read til the next EOL and check for boundary.
+    in.setCheckpoint();
+
+    /// FIXME: there is an extra copy because we cannot traverse PeekableBuffer from checkpoint to position()
+    ///        since it may store different data parts in different sub-buffers,
+    ///        anyway calling makeContinuousMemoryFromCheckpointToPos() will also make an extra copy.
+    std::string line = readLine(false);
+
+    /// According to RFC2046 the preceding CRLF is a part of boundary line.
+    if (line == "\r\n")
+    {
+        line = readLine(false);
+        boundary_hit = startsWith(line, boundary);
+        if (!boundary_hit) line = "\r\n";
+    }
+    else
+        boundary_hit = startsWith(line, boundary);
+
+    in.rollbackToCheckpoint(true);
+
+    /// Rolling back to checkpoint may change underlying buffers.
+    /// Limit readable data to a single line.
+    BufferBase::set(in.position(), line.size(), 0);
+
+    return !boundary_hit && !line.empty();
+}
+
+}
diff --git a/src/Server/HTTP/HTMLForm.h b/src/Server/HTTP/HTMLForm.h
new file mode 100644
index 00000000000..27be712e1d5
--- /dev/null
+++ b/src/Server/HTTP/HTMLForm.h
@@ -0,0 +1,175 @@
+#pragma once
+
+#include <IO/PeekableReadBuffer.h>
+#include <IO/ReadHelpers.h>
+
+#include <boost/noncopyable.hpp>
+#include <Poco/Net/HTTPRequest.h>
+#include <Poco/Net/NameValueCollection.h>
+#include <Poco/Net/PartSource.h>
+#include <Poco/URI.h>
+
+namespace DB
+{
+
+class HTMLForm : public Poco::Net::NameValueCollection, private boost::noncopyable
+{
+public:
+    class PartHandler;
+
+    enum Options
+    {
+        OPT_USE_CONTENT_LENGTH = 0x01 // don't use Chunked Transfer-Encoding for multipart requests.
+    };
+
+    /// Creates an empty HTMLForm and sets the
+    /// encoding to "application/x-www-form-urlencoded".
+    HTMLForm();
+
+    /// Creates an empty HTMLForm that uses the given encoding.
+    /// Encoding must be either "application/x-www-form-urlencoded" (which is the default) or "multipart/form-data".
+    explicit HTMLForm(const std::string & encoding);
+
+    /// Creates a HTMLForm from the given HTTP request.
+    /// Uploaded files are passed to the given PartHandler.
+    HTMLForm(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody, PartHandler & handler);
+
+    /// Creates a HTMLForm from the given HTTP request.
+    /// Uploaded files are silently discarded.
+    HTMLForm(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody);
+
+    /// Creates a HTMLForm from the given HTTP request.
+    /// The request must be a GET request and the form data must be in the query string (URL encoded).
+    /// For POST requests, you must use one of the constructors taking an additional input stream for the request body.
+    explicit HTMLForm(const Poco::Net::HTTPRequest & request);
+
+    explicit HTMLForm(const Poco::URI & uri);
+
+    template <typename T>
+    T getParsed(const std::string & key, T default_value)
+    {
+        auto it = find(key);
+        return (it != end()) ? DB::parse<T>(it->second) : default_value;
+    }
+
+    template <typename T>
+    T getParsed(const std::string & key)
+    {
+        return DB::parse<T>(get(key));
+    }
+
+    /// Sets the encoding used for posting the form.
+    /// Encoding must be either "application/x-www-form-urlencoded" (which is the default) or "multipart/form-data".
+    void setEncoding(const std::string & encoding);
+
+    /// Returns the encoding used for posting the form.
+    const std::string & getEncoding() const { return encoding; }
+
+    /// Adds an part/attachment (file upload) to the form.
+    /// The form takes ownership of the PartSource and deletes it when it is no longer needed.
+    /// The part will only be sent if the encoding set for the form is "multipart/form-data"
+    void addPart(const std::string & name, Poco::Net::PartSource * pSource);
+
+    /// Reads the form data from the given HTTP request.
+    /// Uploaded files are passed to the given PartHandler.
+    void load(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody, PartHandler & handler);
+
+    /// Reads the form data from the given HTTP request.
+    /// Uploaded files are silently discarded.
+    void load(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody);
+
+    /// Reads the form data from the given HTTP request.
+    /// The request must be a GET request and the form data must be in the query string (URL encoded).
+    /// For POST requests, you must use one of the overloads taking an additional input stream for the request body.
+    void load(const Poco::Net::HTTPRequest & request);
+
+    /// Reads the form data from the given input stream.
+    /// The form data read from the stream must be in the encoding specified for the form.
+    /// Note that read() does not clear the form before reading the new values.
+    void read(ReadBuffer & in, PartHandler & handler);
+
+    /// Reads the URL-encoded form data from the given input stream.
+    /// Note that read() does not clear the form before reading the new values.
+    void read(ReadBuffer & in);
+
+    /// Reads the form data from the given HTTP query string.
+    /// Note that read() does not clear the form before reading the new values.
+    void read(const std::string & queryString);
+
+    /// Returns the MIME boundary used for writing multipart form data.
+    const std::string & getBoundary() const { return boundary; }
+
+    /// Returns the maximum number of header fields allowed.
+    /// See setFieldLimit() for more information.
+    int getFieldLimit() const { return field_limit; }
+
+    /// Sets the maximum number of header fields allowed. This limit is used to defend certain kinds of denial-of-service attacks.
+    /// Specify 0 for unlimited (not recommended). The default limit is 100.
+    void setFieldLimit(int limit);
+
+    /// Sets the maximum size for form field values stored as strings.
+    void setValueLengthLimit(int limit);
+
+    /// Returns the maximum size for form field values stored as strings.
+    int getValueLengthLimit() const { return value_length_limit; }
+
+    static const std::string ENCODING_URL; /// "application/x-www-form-urlencoded"
+    static const std::string ENCODING_MULTIPART; /// "multipart/form-data"
+    static const int UNKNOWN_CONTENT_LENGTH;
+
+protected:
+    void readQuery(ReadBuffer & in);
+    void readMultipart(ReadBuffer & in, PartHandler & handler);
+
+private:
+    /// This buffer provides data line by line to check for boundary line in a convenient way.
+    class MultipartReadBuffer;
+
+    enum Limits
+    {
+        DFL_FIELD_LIMIT = 100,
+        MAX_NAME_LENGTH = 1024,
+        DFL_MAX_VALUE_LENGTH = 256 * 1024
+    };
+
+    struct Part
+    {
+        std::string name;
+        std::unique_ptr<Poco::Net::PartSource> source;
+    };
+
+    using PartVec = std::vector<Part>;
+
+    size_t field_limit;
+    size_t value_length_limit;
+    std::string encoding;
+    std::string boundary;
+    PartVec parts;
+};
+
+class HTMLForm::PartHandler
+{
+public:
+    virtual ~PartHandler() = default;
+    virtual void handlePart(const Poco::Net::MessageHeader &, ReadBuffer &) = 0;
+};
+
+class HTMLForm::MultipartReadBuffer : public ReadBuffer
+{
+public:
+    MultipartReadBuffer(ReadBuffer & in, const std::string & boundary);
+
+    /// Returns false if last boundary found.
+    bool skipToNextBoundary();
+
+private:
+    PeekableReadBuffer in;
+    const std::string boundary;
+    bool boundary_hit = true;
+
+    std::string readLine(bool strict = true);
+
+    bool nextImpl() override;
+};
+
+}
diff --git a/src/Server/HTTP/HTTPRequest.h b/src/Server/HTTP/HTTPRequest.h
new file mode 100644
index 00000000000..40839cbcdd2
--- /dev/null
+++ b/src/Server/HTTP/HTTPRequest.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <Poco/Net/HTTPRequest.h>
+
+namespace DB
+{
+
+using HTTPRequest = Poco::Net::HTTPRequest;
+
+}
diff --git a/src/Server/HTTP/HTTPRequestHandler.h b/src/Server/HTTP/HTTPRequestHandler.h
new file mode 100644
index 00000000000..19340866bb7
--- /dev/null
+++ b/src/Server/HTTP/HTTPRequestHandler.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <Server/HTTP/HTTPServerRequest.h>
+#include <Server/HTTP/HTTPServerResponse.h>
+
+#include <boost/noncopyable.hpp>
+
+namespace DB
+{
+
+class HTTPRequestHandler : private boost::noncopyable
+{
+public:
+    virtual ~HTTPRequestHandler() = default;
+
+    virtual void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) = 0;
+};
+
+}
diff --git a/src/Server/HTTP/HTTPRequestHandlerFactory.h b/src/Server/HTTP/HTTPRequestHandlerFactory.h
new file mode 100644
index 00000000000..3d50bf0a2ed
--- /dev/null
+++ b/src/Server/HTTP/HTTPRequestHandlerFactory.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <Server/HTTP/HTTPRequestHandler.h>
+
+#include <boost/noncopyable.hpp>
+
+namespace DB
+{
+
+class HTTPRequestHandlerFactory : private boost::noncopyable
+{
+public:
+    virtual ~HTTPRequestHandlerFactory() = default;
+
+    virtual std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) = 0;
+};
+
+using HTTPRequestHandlerFactoryPtr = std::shared_ptr<HTTPRequestHandlerFactory>;
+
+}
diff --git a/src/Server/HTTP/HTTPResponse.h b/src/Server/HTTP/HTTPResponse.h
new file mode 100644
index 00000000000..c73bcec6c39
--- /dev/null
+++ b/src/Server/HTTP/HTTPResponse.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <Poco/Net/HTTPResponse.h>
+
+namespace DB
+{
+
+using HTTPResponse = Poco::Net::HTTPResponse;
+
+}
diff --git a/src/Server/HTTP/HTTPServer.cpp b/src/Server/HTTP/HTTPServer.cpp
new file mode 100644
index 00000000000..3e050080bdd
--- /dev/null
+++ b/src/Server/HTTP/HTTPServer.cpp
@@ -0,0 +1,48 @@
+#include <Server/HTTP/HTTPServer.h>
+
+#include <Server/HTTP/HTTPServerConnectionFactory.h>
+
+
+namespace DB
+{
+HTTPServer::HTTPServer(
+    const Context & context,
+    HTTPRequestHandlerFactoryPtr factory_,
+    UInt16 portNumber,
+    Poco::Net::HTTPServerParams::Ptr params)
+    : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), portNumber, params), factory(factory_)
+{
+}
+
+HTTPServer::HTTPServer(
+    const Context & context,
+    HTTPRequestHandlerFactoryPtr factory_,
+    const Poco::Net::ServerSocket & socket,
+    Poco::Net::HTTPServerParams::Ptr params)
+    : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), socket, params), factory(factory_)
+{
+}
+
+HTTPServer::HTTPServer(
+    const Context & context,
+    HTTPRequestHandlerFactoryPtr factory_,
+    Poco::ThreadPool & threadPool,
+    const Poco::Net::ServerSocket & socket,
+    Poco::Net::HTTPServerParams::Ptr params)
+    : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), threadPool, socket, params), factory(factory_)
+{
+}
+
+HTTPServer::~HTTPServer()
+{
+    /// We should call stop and join thread here instead of destructor of parent TCPHandler,
+    /// because there's possible race on 'vptr' between this virtual destructor and 'run' method.
+    stop();
+}
+
+void HTTPServer::stopAll(bool /* abortCurrent */)
+{
+    stop();
+}
+
+}
diff --git a/src/Server/HTTP/HTTPServer.h b/src/Server/HTTP/HTTPServer.h
new file mode 100644
index 00000000000..1ce62c65ca2
--- /dev/null
+++ b/src/Server/HTTP/HTTPServer.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <Server/HTTP/HTTPRequestHandlerFactory.h>
+
+#include <Poco/Net/HTTPServerParams.h>
+#include <Poco/Net/TCPServer.h>
+
+#include <common/types.h>
+
+
+namespace DB
+{
+
+class Context;
+
+class HTTPServer : public Poco::Net::TCPServer
+{
+public:
+    explicit HTTPServer(
+        const Context & context,
+        HTTPRequestHandlerFactoryPtr factory,
+        UInt16 portNumber = 80,
+        Poco::Net::HTTPServerParams::Ptr params = new Poco::Net::HTTPServerParams);
+
+    HTTPServer(
+        const Context & context,
+        HTTPRequestHandlerFactoryPtr factory,
+        const Poco::Net::ServerSocket & socket,
+        Poco::Net::HTTPServerParams::Ptr params);
+
+    HTTPServer(
+        const Context & context,
+        HTTPRequestHandlerFactoryPtr factory,
+        Poco::ThreadPool & threadPool,
+        const Poco::Net::ServerSocket & socket,
+        Poco::Net::HTTPServerParams::Ptr params);
+
+    ~HTTPServer() override;
+
+    void stopAll(bool abortCurrent = false);
+
+private:
+    HTTPRequestHandlerFactoryPtr factory;
+};
+
+}
diff --git a/src/Server/HTTP/HTTPServerConnection.cpp b/src/Server/HTTP/HTTPServerConnection.cpp
new file mode 100644
index 00000000000..e2ee4c8882b
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerConnection.cpp
@@ -0,0 +1,128 @@
+#include <Server/HTTP/HTTPServerConnection.h>
+
+#include <Poco/Net/NetException.h>
+
+namespace DB
+{
+
+HTTPServerConnection::HTTPServerConnection(
+    const Context & context_,
+    const Poco::Net::StreamSocket & socket,
+    Poco::Net::HTTPServerParams::Ptr params_,
+    HTTPRequestHandlerFactoryPtr factory_)
+    : TCPServerConnection(socket), context(context_), params(params_), factory(factory_), stopped(false)
+{
+    poco_check_ptr(factory);
+}
+
+void HTTPServerConnection::run()
+{
+    std::string server = params->getSoftwareVersion();
+    Poco::Net::HTTPServerSession session(socket(), params);
+
+    while (!stopped && session.hasMoreRequests())
+    {
+        try
+        {
+            std::unique_lock<std::mutex> lock(mutex);
+            if (!stopped)
+            {
+                HTTPServerResponse response(session);
+                HTTPServerRequest request(context, response, session);
+
+                Poco::Timestamp now;
+                response.setDate(now);
+                response.setVersion(request.getVersion());
+                response.setKeepAlive(params->getKeepAlive() && request.getKeepAlive() && session.canKeepAlive());
+                if (!server.empty())
+                    response.set("Server", server);
+                try
+                {
+                    std::unique_ptr<HTTPRequestHandler> handler(factory->createRequestHandler(request));
+
+                    if (handler)
+                    {
+                        if (request.getExpectContinue() && response.getStatus() == Poco::Net::HTTPResponse::HTTP_OK)
+                            response.sendContinue();
+
+                        handler->handleRequest(request, response);
+                        session.setKeepAlive(params->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive());
+                    }
+                    else
+                        sendErrorResponse(session, Poco::Net::HTTPResponse::HTTP_NOT_IMPLEMENTED);
+                }
+                catch (Poco::Exception &)
+                {
+                    if (!response.sent())
+                    {
+                        try
+                        {
+                            sendErrorResponse(session, Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
+                        }
+                        catch (...)
+                        {
+                        }
+                    }
+                    throw;
+                }
+            }
+        }
+        catch (Poco::Net::NoMessageException &)
+        {
+            break;
+        }
+        catch (Poco::Net::MessageException &)
+        {
+            sendErrorResponse(session, Poco::Net::HTTPResponse::HTTP_BAD_REQUEST);
+        }
+        catch (Poco::Exception &)
+        {
+            if (session.networkException())
+            {
+                session.networkException()->rethrow();
+            }
+            else
+                throw;
+        }
+    }
+}
+
+// static
+void HTTPServerConnection::sendErrorResponse(Poco::Net::HTTPServerSession & session, Poco::Net::HTTPResponse::HTTPStatus status)
+{
+    HTTPServerResponse response(session);
+    response.setVersion(Poco::Net::HTTPMessage::HTTP_1_1);
+    response.setStatusAndReason(status);
+    response.setKeepAlive(false);
+    response.send();
+    session.setKeepAlive(false);
+}
+
+void HTTPServerConnection::onServerStopped(const bool & abortCurrent)
+{
+    stopped = true;
+    if (abortCurrent)
+    {
+        try
+        {
+            socket().shutdown();
+        }
+        catch (...)
+        {
+        }
+    }
+    else
+    {
+        std::unique_lock<std::mutex> lock(mutex);
+
+        try
+        {
+            socket().shutdown();
+        }
+        catch (...)
+        {
+        }
+    }
+}
+
+}
diff --git a/src/Server/HTTP/HTTPServerConnection.h b/src/Server/HTTP/HTTPServerConnection.h
new file mode 100644
index 00000000000..589c33025bf
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerConnection.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <Interpreters/Context.h>
+#include <Server/HTTP/HTTPRequestHandlerFactory.h>
+
+#include <Poco/Net/HTTPServerParams.h>
+#include <Poco/Net/HTTPServerSession.h>
+#include <Poco/Net/TCPServerConnection.h>
+
+namespace DB
+{
+
+class HTTPServerConnection : public Poco::Net::TCPServerConnection
+{
+public:
+    HTTPServerConnection(
+        const Context & context,
+        const Poco::Net::StreamSocket & socket,
+        Poco::Net::HTTPServerParams::Ptr params,
+        HTTPRequestHandlerFactoryPtr factory);
+
+    void run() override;
+
+protected:
+    static void sendErrorResponse(Poco::Net::HTTPServerSession & session, Poco::Net::HTTPResponse::HTTPStatus status);
+    void onServerStopped(const bool & abortCurrent);
+
+private:
+    Context context;
+    Poco::Net::HTTPServerParams::Ptr params;
+    HTTPRequestHandlerFactoryPtr factory;
+    bool stopped;
+    std::mutex mutex;  // guards the |factory| with assumption that creating handlers is not thread-safe.
+};
+
+}
diff --git a/src/Server/HTTP/HTTPServerConnectionFactory.cpp b/src/Server/HTTP/HTTPServerConnectionFactory.cpp
new file mode 100644
index 00000000000..876ccb9096b
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerConnectionFactory.cpp
@@ -0,0 +1,19 @@
+#include <Server/HTTP/HTTPServerConnectionFactory.h>
+
+#include <Server/HTTP/HTTPServerConnection.h>
+
+namespace DB
+{
+HTTPServerConnectionFactory::HTTPServerConnectionFactory(
+    const Context & context_, Poco::Net::HTTPServerParams::Ptr params_, HTTPRequestHandlerFactoryPtr factory_)
+    : context(context_), params(params_), factory(factory_)
+{
+    poco_check_ptr(factory);
+}
+
+Poco::Net::TCPServerConnection * HTTPServerConnectionFactory::createConnection(const Poco::Net::StreamSocket & socket)
+{
+    return new HTTPServerConnection(context, socket, params, factory);
+}
+
+}
diff --git a/src/Server/HTTP/HTTPServerConnectionFactory.h b/src/Server/HTTP/HTTPServerConnectionFactory.h
new file mode 100644
index 00000000000..4f8ca43cbfb
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerConnectionFactory.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <Interpreters/Context.h>
+#include <Server/HTTP/HTTPRequestHandlerFactory.h>
+
+#include <Poco/Net/HTTPServerParams.h>
+#include <Poco/Net/TCPServerConnectionFactory.h>
+
+namespace DB
+{
+
+class HTTPServerConnectionFactory : public Poco::Net::TCPServerConnectionFactory
+{
+public:
+    HTTPServerConnectionFactory(const Context & context, Poco::Net::HTTPServerParams::Ptr params, HTTPRequestHandlerFactoryPtr factory);
+
+    Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override;
+
+private:
+    Context context;
+    Poco::Net::HTTPServerParams::Ptr params;
+    HTTPRequestHandlerFactoryPtr factory;
+};
+
+}
diff --git a/src/Server/HTTP/HTTPServerRequest.cpp b/src/Server/HTTP/HTTPServerRequest.cpp
new file mode 100644
index 00000000000..bdba6a51d91
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerRequest.cpp
@@ -0,0 +1,123 @@
+#include <Server/HTTP/HTTPServerRequest.h>
+
+#include <Interpreters/Context.h>
+#include <IO/EmptyReadBuffer.h>
+#include <IO/HTTPChunkedReadBuffer.h>
+#include <IO/LimitReadBuffer.h>
+#include <IO/ReadBufferFromPocoSocket.h>
+#include <IO/ReadHelpers.h>
+#include <Server/HTTP/HTTPServerResponse.h>
+#include <Server/HTTP/ReadHeaders.h>
+
+#include <Poco/Net/HTTPHeaderStream.h>
+#include <Poco/Net/HTTPStream.h>
+#include <Poco/Net/NetException.h>
+
+namespace DB
+{
+
+HTTPServerRequest::HTTPServerRequest(const Context & context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session)
+{
+    response.attachRequest(this);
+
+    /// Now that we know socket is still connected, obtain addresses
+    client_address = session.clientAddress();
+    server_address = session.serverAddress();
+
+    auto receive_timeout = context.getSettingsRef().http_receive_timeout;
+    auto send_timeout = context.getSettingsRef().http_send_timeout;
+    auto max_query_size = context.getSettingsRef().max_query_size;
+
+    session.socket().setReceiveTimeout(receive_timeout);
+    session.socket().setSendTimeout(send_timeout);
+
+    auto in = std::make_unique<ReadBufferFromPocoSocket>(session.socket());
+    socket = session.socket().impl();
+
+    readRequest(*in);  /// Try parse according to RFC7230
+
+    if (getChunkedTransferEncoding())
+        stream = std::make_unique<HTTPChunkedReadBuffer>(std::move(in), max_query_size);
+    else if (hasContentLength())
+        stream = std::make_unique<LimitReadBuffer>(std::move(in), getContentLength(), false);
+    else if (getMethod() != HTTPRequest::HTTP_GET && getMethod() != HTTPRequest::HTTP_HEAD && getMethod() != HTTPRequest::HTTP_DELETE)
+        stream = std::move(in);
+    else
+        /// We have to distinguish empty buffer and nullptr.
+        stream = std::make_unique<EmptyReadBuffer>();
+}
+
+bool HTTPServerRequest::checkPeerConnected() const
+{
+    try
+    {
+        char b;
+        if (!socket->receiveBytes(&b, 1, MSG_DONTWAIT | MSG_PEEK))
+            return false;
+    }
+    catch (Poco::TimeoutException &)
+    {
+    }
+    catch (...)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+void HTTPServerRequest::readRequest(ReadBuffer & in)
+{
+    char ch;
+    std::string method;
+    std::string uri;
+    std::string version;
+
+    method.reserve(16);
+    uri.reserve(64);
+    version.reserve(16);
+
+    if (in.eof())
+        throw Poco::Net::NoMessageException();
+
+    skipWhitespaceIfAny(in);
+
+    if (in.eof())
+        throw Poco::Net::MessageException("No HTTP request header");
+
+    while (in.read(ch) && !Poco::Ascii::isSpace(ch) && method.size() <= MAX_METHOD_LENGTH)
+        method += ch;
+
+    if (method.size() > MAX_METHOD_LENGTH)
+        throw Poco::Net::MessageException("HTTP request method invalid or too long");
+
+    skipWhitespaceIfAny(in);
+
+    while (in.read(ch) && !Poco::Ascii::isSpace(ch) && uri.size() <= MAX_URI_LENGTH)
+        uri += ch;
+
+    if (uri.size() > MAX_URI_LENGTH)
+        throw Poco::Net::MessageException("HTTP request URI invalid or too long");
+
+    skipWhitespaceIfAny(in);
+
+    while (in.read(ch) && !Poco::Ascii::isSpace(ch) && version.size() <= MAX_VERSION_LENGTH)
+        version += ch;
+
+    if (version.size() > MAX_VERSION_LENGTH)
+        throw Poco::Net::MessageException("Invalid HTTP version string");
+
+    // since HTTP always use Windows-style EOL '\r\n' we always can safely skip to '\n'
+
+    skipToNextLineOrEOF(in);
+
+    readHeaders(*this, in);
+
+    skipToNextLineOrEOF(in);
+
+    setMethod(method);
+    setURI(uri);
+    setVersion(version);
+}
+
+}
diff --git a/src/Server/HTTP/HTTPServerRequest.h b/src/Server/HTTP/HTTPServerRequest.h
new file mode 100644
index 00000000000..7fd54850212
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerRequest.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <IO/ReadBuffer.h>
+#include <Server/HTTP/HTTPRequest.h>
+
+#include <Poco/Net/HTTPServerSession.h>
+
+namespace DB
+{
+
+class Context;
+class HTTPServerResponse;
+class ReadBufferFromPocoSocket;
+
+class HTTPServerRequest : public HTTPRequest
+{
+public:
+    HTTPServerRequest(const Context & context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session);
+
+    /// FIXME: it's a little bit inconvenient interface. The rationale is that all other ReadBuffer's wrap each other
+    ///        via unique_ptr - but we can't inherit HTTPServerRequest from ReadBuffer and pass it around,
+    ///        since we also need it in other places.
+
+    /// Returns the input stream for reading the request body.
+    ReadBuffer & getStream()
+    {
+        poco_check_ptr(stream);
+        return *stream;
+    }
+
+    bool checkPeerConnected() const;
+
+    /// Returns the client's address.
+    const Poco::Net::SocketAddress & clientAddress() const { return client_address; }
+
+    /// Returns the server's address.
+    const Poco::Net::SocketAddress & serverAddress() const { return server_address; }
+
+private:
+    /// Limits for basic sanity checks when reading a header
+    enum Limits
+    {
+        MAX_NAME_LENGTH = 256,
+        MAX_VALUE_LENGTH = 8192,
+        MAX_METHOD_LENGTH = 32,
+        MAX_URI_LENGTH = 16384,
+        MAX_VERSION_LENGTH = 8,
+        MAX_FIELDS_NUMBER = 100,
+    };
+
+    std::unique_ptr<ReadBuffer> stream;
+    Poco::Net::SocketImpl * socket;
+    Poco::Net::SocketAddress client_address;
+    Poco::Net::SocketAddress server_address;
+
+    void readRequest(ReadBuffer & in);
+};
+
+}
diff --git a/src/Server/HTTP/HTTPServerResponse.cpp b/src/Server/HTTP/HTTPServerResponse.cpp
new file mode 100644
index 00000000000..e3d52fffa80
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerResponse.cpp
@@ -0,0 +1,163 @@
+#include <Server/HTTP/HTTPServerResponse.h>
+
+#include <Server/HTTP/HTTPServerRequest.h>
+
+#include <Poco/CountingStream.h>
+#include <Poco/DateTimeFormat.h>
+#include <Poco/DateTimeFormatter.h>
+#include <Poco/File.h>
+#include <Poco/FileStream.h>
+#include <Poco/Net/HTTPChunkedStream.h>
+#include <Poco/Net/HTTPFixedLengthStream.h>
+#include <Poco/Net/HTTPHeaderStream.h>
+#include <Poco/Net/HTTPStream.h>
+#include <Poco/StreamCopier.h>
+
+namespace DB
+{
+
+HTTPServerResponse::HTTPServerResponse(Poco::Net::HTTPServerSession & session_) : session(session_)
+{
+}
+
+void HTTPServerResponse::sendContinue()
+{
+    Poco::Net::HTTPHeaderOutputStream hs(session);
+    hs << getVersion() << " 100 Continue\r\n\r\n";
+}
+
+std::shared_ptr<std::ostream> HTTPServerResponse::send()
+{
+    poco_assert(!stream);
+
+    if ((request && request->getMethod() == HTTPRequest::HTTP_HEAD) || getStatus() < 200 || getStatus() == HTTPResponse::HTTP_NO_CONTENT
+        || getStatus() == HTTPResponse::HTTP_NOT_MODIFIED)
+    {
+        Poco::CountingOutputStream cs;
+        write(cs);
+        stream = std::make_shared<Poco::Net::HTTPFixedLengthOutputStream>(session, cs.chars());
+        write(*stream);
+    }
+    else if (getChunkedTransferEncoding())
+    {
+        Poco::Net::HTTPHeaderOutputStream hs(session);
+        write(hs);
+        stream = std::make_shared<Poco::Net::HTTPChunkedOutputStream>(session);
+    }
+    else if (hasContentLength())
+    {
+        Poco::CountingOutputStream cs;
+        write(cs);
+        stream = std::make_shared<Poco::Net::HTTPFixedLengthOutputStream>(session, getContentLength64() + cs.chars());
+        write(*stream);
+    }
+    else
+    {
+        stream = std::make_shared<Poco::Net::HTTPOutputStream>(session);
+        setKeepAlive(false);
+        write(*stream);
+    }
+
+    return stream;
+}
+
+std::pair<std::shared_ptr<std::ostream>, std::shared_ptr<std::ostream>> HTTPServerResponse::beginSend()
+{
+    poco_assert(!stream);
+    poco_assert(!header_stream);
+
+    /// NOTE: Code is not exception safe.
+
+    if ((request && request->getMethod() == HTTPRequest::HTTP_HEAD) || getStatus() < 200 || getStatus() == HTTPResponse::HTTP_NO_CONTENT
+        || getStatus() == HTTPResponse::HTTP_NOT_MODIFIED)
+    {
+        throw Poco::Exception("HTTPServerResponse::beginSend is invalid for HEAD request");
+    }
+    else if (getChunkedTransferEncoding())
+    {
+        header_stream = std::make_shared<Poco::Net::HTTPHeaderOutputStream>(session);
+        beginWrite(*header_stream);
+        stream = std::make_shared<Poco::Net::HTTPChunkedOutputStream>(session);
+    }
+    else if (hasContentLength())
+    {
+        throw Poco::Exception("HTTPServerResponse::beginSend is invalid for response with Content-Length header");
+    }
+    else
+    {
+        stream = std::make_shared<Poco::Net::HTTPOutputStream>(session);
+        header_stream = stream;
+        setKeepAlive(false);
+        beginWrite(*stream);
+    }
+
+    return std::make_pair(header_stream, stream);
+}
+
+void HTTPServerResponse::sendFile(const std::string & path, const std::string & mediaType)
+{
+    poco_assert(!stream);
+
+    Poco::File f(path);
+    Poco::Timestamp date_time = f.getLastModified();
+    Poco::File::FileSize length = f.getSize();
+    set("Last-Modified", Poco::DateTimeFormatter::format(date_time, Poco::DateTimeFormat::HTTP_FORMAT));
+    setContentLength64(length);
+    setContentType(mediaType);
+    setChunkedTransferEncoding(false);
+
+    Poco::FileInputStream istr(path);
+    if (istr.good())
+    {
+        stream = std::make_shared<Poco::Net::HTTPHeaderOutputStream>(session);
+        write(*stream);
+        if (request && request->getMethod() != HTTPRequest::HTTP_HEAD)
+        {
+            Poco::StreamCopier::copyStream(istr, *stream);
+        }
+    }
+    else
+        throw Poco::OpenFileException(path);
+}
+
+void HTTPServerResponse::sendBuffer(const void * buffer, std::size_t length)
+{
+    poco_assert(!stream);
+
+    setContentLength(static_cast<int>(length));
+    setChunkedTransferEncoding(false);
+
+    stream = std::make_shared<Poco::Net::HTTPHeaderOutputStream>(session);
+    write(*stream);
+    if (request && request->getMethod() != HTTPRequest::HTTP_HEAD)
+    {
+        stream->write(static_cast<const char *>(buffer), static_cast<std::streamsize>(length));
+    }
+}
+
+void HTTPServerResponse::redirect(const std::string & uri, HTTPStatus status)
+{
+    poco_assert(!stream);
+
+    setContentLength(0);
+    setChunkedTransferEncoding(false);
+
+    setStatusAndReason(status);
+    set("Location", uri);
+
+    stream = std::make_shared<Poco::Net::HTTPHeaderOutputStream>(session);
+    write(*stream);
+}
+
+void HTTPServerResponse::requireAuthentication(const std::string & realm)
+{
+    poco_assert(!stream);
+
+    setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
+    std::string auth("Basic realm=\"");
+    auth.append(realm);
+    auth.append("\"");
+    set("WWW-Authenticate", auth);
+}
+
+}
diff --git a/src/Server/HTTP/HTTPServerResponse.h b/src/Server/HTTP/HTTPServerResponse.h
new file mode 100644
index 00000000000..82221ce3a83
--- /dev/null
+++ b/src/Server/HTTP/HTTPServerResponse.h
@@ -0,0 +1,91 @@
+#pragma once
+
+#include <Server/HTTP/HTTPResponse.h>
+
+#include <Poco/Net/HTTPServerSession.h>
+#include <Poco/Net/HTTPResponse.h>
+
+#include <iostream>
+#include <memory>
+
+namespace DB
+{
+
+class HTTPServerRequest;
+
+class HTTPServerResponse : public HTTPResponse
+{
+public:
+    explicit HTTPServerResponse(Poco::Net::HTTPServerSession & session);
+
+    void sendContinue(); /// Sends a 100 Continue response to the client.
+
+    /// Sends the response header to the client and
+    /// returns an output stream for sending the
+    /// response body.
+    ///
+    /// Must not be called after beginSend(), sendFile(), sendBuffer()
+    /// or redirect() has been called.
+    std::shared_ptr<std::ostream> send(); /// TODO: use some WriteBuffer implementation here.
+
+    /// Sends the response headers to the client
+    /// but do not finish headers with \r\n,
+    /// allowing to continue sending additional header fields.
+    ///
+    /// Must not be called after send(), sendFile(), sendBuffer()
+    /// or redirect() has been called.
+    std::pair<std::shared_ptr<std::ostream>, std::shared_ptr<std::ostream>> beginSend(); /// TODO: use some WriteBuffer implementation here.
+
+    /// Sends the response header to the client, followed
+    /// by the content of the given file.
+    ///
+    /// Must not be called after send(), sendBuffer()
+    /// or redirect() has been called.
+    ///
+    /// Throws a FileNotFoundException if the file
+    /// cannot be found, or an OpenFileException if
+    /// the file cannot be opened.
+    void sendFile(const std::string & path, const std::string & mediaType);
+
+    /// Sends the response header to the client, followed
+    /// by the contents of the given buffer.
+    ///
+    /// The Content-Length header of the response is set
+    /// to length and chunked transfer encoding is disabled.
+    ///
+    /// If both the HTTP message header and body (from the
+    /// given buffer) fit into one single network packet, the
+    /// complete response can be sent in one network packet.
+    ///
+    /// Must not be called after send(), sendFile()
+    /// or redirect() has been called.
+    void sendBuffer(const void * pBuffer, std::size_t length); /// FIXME: do we need this one?
+
+    /// Sets the status code, which must be one of
+    /// HTTP_MOVED_PERMANENTLY (301), HTTP_FOUND (302),
+    /// or HTTP_SEE_OTHER (303),
+    /// and sets the "Location" header field
+    /// to the given URI, which according to
+    /// the HTTP specification, must be absolute.
+    ///
+    /// Must not be called after send() has been called.
+    void redirect(const std::string & uri, Poco::Net::HTTPResponse::HTTPStatus status = Poco::Net::HTTPResponse::HTTP_FOUND);
+
+    void requireAuthentication(const std::string & realm);
+    /// Sets the status code to 401 (Unauthorized)
+    /// and sets the "WWW-Authenticate" header field
+    /// according to the given realm.
+
+    /// Returns true if the response (header) has been sent.
+    bool sent() const { return !!stream; }
+
+    void attachRequest(HTTPServerRequest * request_) { request = request_; }
+
+private:
+    Poco::Net::HTTPServerSession & session;
+    HTTPServerRequest * request;
+    std::shared_ptr<std::ostream> stream;
+    std::shared_ptr<std::ostream> header_stream;
+};
+
+}
diff --git a/src/Server/HTTP/ReadHeaders.cpp b/src/Server/HTTP/ReadHeaders.cpp
new file mode 100644
index 00000000000..77ec48c11b1
--- /dev/null
+++ b/src/Server/HTTP/ReadHeaders.cpp
@@ -0,0 +1,88 @@
+#include <Server/HTTP/ReadHeaders.h>
+
+#include <IO/ReadBuffer.h>
+#include <IO/ReadHelpers.h>
+
+#include <Poco/Net/NetException.h>
+
+namespace DB
+{
+
+void readHeaders(
+    Poco::Net::MessageHeader & headers, ReadBuffer & in, size_t max_fields_number, size_t max_name_length, size_t max_value_length)
+{
+    char ch = 0;  // silence uninitialized warning from gcc-*
+    std::string name;
+    std::string value;
+
+    name.reserve(32);
+    value.reserve(64);
+
+    size_t fields = 0;
+
+    while (true)
+    {
+        if (fields > max_fields_number)
+            throw Poco::Net::MessageException("Too many header fields");
+
+        name.clear();
+        value.clear();
+
+        /// Field name
+        while (in.peek(ch) && ch != ':' && !Poco::Ascii::isSpace(ch) && name.size() <= max_name_length)
+        {
+            name += ch;
+            in.ignore();
+        }
+
+        if (in.eof())
+            throw Poco::Net::MessageException("Field is invalid");
+
+        if (name.empty())
+        {
+            if (ch == '\r')
+                /// Start of the empty-line delimiter
+                break;
+            if (ch == ':')
+                throw Poco::Net::MessageException("Field name is empty");
+        }
+        else
+        {
+            if (name.size() > max_name_length)
+                throw Poco::Net::MessageException("Field name is too long");
+            if (ch != ':')
+                throw Poco::Net::MessageException("Field name is invalid or no colon found");
+        }
+
+        in.ignore();
+
+        skipWhitespaceIfAny(in, true);
+
+        if (in.eof())
+            throw Poco::Net::MessageException("Field is invalid");
+
+        /// Field value - folded values not supported.
+        while (in.read(ch) && ch != '\r' && ch != '\n' && value.size() <= max_value_length)
+            value += ch;
+
+        if (in.eof())
+            throw Poco::Net::MessageException("Field is invalid");
+
+        if (value.empty())
+            throw Poco::Net::MessageException("Field value is empty");
+
+        if (ch == '\n')
+            throw Poco::Net::MessageException("No CRLF found");
+
+        if (value.size() > max_value_length)
+            throw Poco::Net::MessageException("Field value is too long");
+
+        skipToNextLineOrEOF(in);
+
+        Poco::trimRightInPlace(value);
+        headers.add(name, headers.decodeWord(value));
+        ++fields;
+    }
+}
+
+}
diff --git a/src/Server/HTTP/ReadHeaders.h b/src/Server/HTTP/ReadHeaders.h
new file mode 100644
index 00000000000..e94cddcf489
--- /dev/null
+++ b/src/Server/HTTP/ReadHeaders.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <Poco/Net/MessageHeader.h>
+
+namespace DB
+{
+
+class ReadBuffer;
+
+void readHeaders(
+    Poco::Net::MessageHeader & headers,
+    ReadBuffer & in,
+    size_t max_fields_number = 100,
+    size_t max_name_length = 256,
+    size_t max_value_length = 8192);
+
+}
diff --git a/src/IO/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
similarity index 81%
rename from src/IO/WriteBufferFromHTTPServerResponse.cpp
rename to src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
index ac2eeac1652..86133fc2ffe 100644
--- a/src/IO/WriteBufferFromHTTPServerResponse.cpp
+++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
@@ -1,9 +1,8 @@
-#include <Poco/Version.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <IO/WriteBufferFromHTTPServerResponse.h>
-#include <IO/WriteBufferFromString.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
+
 #include <IO/HTTPCommon.h>
 #include <IO/Progress.h>
+#include <IO/WriteBufferFromString.h>
 #include <Common/Exception.h>
 #include <Common/NetException.h>
 #include <Common/Stopwatch.h>
@@ -13,6 +12,8 @@
 #    include <Common/config.h>
 #endif
 
+#include <Poco/Version.h>
+
 
 namespace DB
 {
@@ -33,16 +34,13 @@ void WriteBufferFromHTTPServerResponse::startSendHeaders()
 
         setResponseDefaultHeaders(response, keep_alive_timeout);
 
-#if defined(POCO_CLICKHOUSE_PATCH)
-        if (request.getMethod() != Poco::Net::HTTPRequest::HTTP_HEAD)
+        if (!is_http_method_head)
             std::tie(response_header_ostr, response_body_ostr) = response.beginSend();
-#endif
     }
 }
 
 void WriteBufferFromHTTPServerResponse::writeHeaderSummary()
 {
-#if defined(POCO_CLICKHOUSE_PATCH)
     if (headers_finished_sending)
         return;
 
@@ -51,12 +49,10 @@ void WriteBufferFromHTTPServerResponse::writeHeaderSummary()
 
     if (response_header_ostr)
         *response_header_ostr << "X-ClickHouse-Summary: " << progress_string_writer.str() << "\r\n" << std::flush;
-#endif
 }
 
 void WriteBufferFromHTTPServerResponse::writeHeaderProgress()
 {
-#if defined(POCO_CLICKHOUSE_PATCH)
     if (headers_finished_sending)
         return;
 
@@ -65,7 +61,6 @@ void WriteBufferFromHTTPServerResponse::writeHeaderProgress()
 
     if (response_header_ostr)
         *response_header_ostr << "X-ClickHouse-Progress: " << progress_string_writer.str() << "\r\n" << std::flush;
-#endif
 }
 
 void WriteBufferFromHTTPServerResponse::finishSendHeaders()
@@ -75,23 +70,16 @@ void WriteBufferFromHTTPServerResponse::finishSendHeaders()
         writeHeaderSummary();
         headers_finished_sending = true;
 
-        if (request.getMethod() != Poco::Net::HTTPRequest::HTTP_HEAD)
+        if (!is_http_method_head)
         {
-#if defined(POCO_CLICKHOUSE_PATCH)
             /// Send end of headers delimiter.
             if (response_header_ostr)
                 *response_header_ostr << "\r\n" << std::flush;
-#else
-            /// Newline autosent by response.send()
-            /// if nothing to send in body:
-            if (!response_body_ostr)
-                response_body_ostr = &(response.send());
-#endif
         }
         else
         {
             if (!response_body_ostr)
-                response_body_ostr = &(response.send());
+                response_body_ostr = response.send();
         }
     }
 }
@@ -104,23 +92,15 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
 
         startSendHeaders();
 
-        if (!out && request.getMethod() != Poco::Net::HTTPRequest::HTTP_HEAD)
+        if (!out && !is_http_method_head)
         {
             if (compress)
             {
                 auto content_encoding_name = toContentEncodingName(compression_method);
 
-#if defined(POCO_CLICKHOUSE_PATCH)
                 *response_header_ostr << "Content-Encoding: " << content_encoding_name << "\r\n";
-#else
-                response.set("Content-Encoding", content_encoding_name);
-#endif
             }
 
-#if !defined(POCO_CLICKHOUSE_PATCH)
-            response_body_ostr = &(response.send());
-#endif
-
             /// We reuse our buffer in "out" to avoid extra allocations and copies.
 
             if (compress)
@@ -150,14 +130,14 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
 
 
 WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse(
-    Poco::Net::HTTPServerRequest & request_,
-    Poco::Net::HTTPServerResponse & response_,
+    HTTPServerResponse & response_,
+    bool is_http_method_head_,
     unsigned keep_alive_timeout_,
     bool compress_,
     CompressionMethod compression_method_)
     : BufferWithOwnMemory<WriteBuffer>(DBMS_DEFAULT_BUFFER_SIZE)
-    , request(request_)
     , response(response_)
+    , is_http_method_head(is_http_method_head_)
     , keep_alive_timeout(keep_alive_timeout_)
     , compress(compress_)
     , compression_method(compression_method_)
diff --git a/src/IO/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
similarity index 86%
rename from src/IO/WriteBufferFromHTTPServerResponse.h
rename to src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
index 85a81c3dda7..b4ff454195f 100644
--- a/src/IO/WriteBufferFromHTTPServerResponse.h
+++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
@@ -1,31 +1,17 @@
 #pragma once
 
-#include <optional>
-#include <mutex>
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <Poco/Version.h>
-#include <IO/CompressionMethod.h>
-#include <IO/WriteBuffer.h>
 #include <IO/BufferWithOwnMemory.h>
-#include <IO/WriteBufferFromOStream.h>
+#include <IO/CompressionMethod.h>
 #include <IO/HTTPCommon.h>
 #include <IO/Progress.h>
+#include <IO/WriteBuffer.h>
+#include <IO/WriteBufferFromOStream.h>
+#include <Server/HTTP/HTTPServerResponse.h>
 #include <Common/NetException.h>
 #include <Common/Stopwatch.h>
 
-#if !defined(ARCADIA_BUILD)
-#    include <Common/config.h>
-#endif
-
-
-namespace Poco
-{
-    namespace Net
-    {
-        class HTTPServerResponse;
-    }
-}
+#include <mutex>
+#include <optional>
 
 
 namespace DB
@@ -47,20 +33,17 @@ namespace DB
 class WriteBufferFromHTTPServerResponse final : public BufferWithOwnMemory<WriteBuffer>
 {
 private:
-    Poco::Net::HTTPServerRequest & request;
-    Poco::Net::HTTPServerResponse & response;
+    HTTPServerResponse & response;
 
+    bool is_http_method_head;
     bool add_cors_header = false;
     unsigned keep_alive_timeout = 0;
     bool compress = false;
     CompressionMethod compression_method;
     int compression_level = 1;
 
-    std::ostream * response_body_ostr = nullptr;
-
-#if defined(POCO_CLICKHOUSE_PATCH)
-    std::ostream * response_header_ostr = nullptr;
-#endif
+    std::shared_ptr<std::ostream> response_body_ostr;
+    std::shared_ptr<std::ostream> response_header_ostr;
 
     std::unique_ptr<WriteBuffer> out;
 
@@ -91,8 +74,8 @@ private:
 
 public:
     WriteBufferFromHTTPServerResponse(
-        Poco::Net::HTTPServerRequest & request_,
-        Poco::Net::HTTPServerResponse & response_,
+        HTTPServerResponse & response_,
+        bool is_http_method_head_,
         unsigned keep_alive_timeout_,
         bool compress_ = false,        /// If true - set Content-Encoding header and compress the result.
         CompressionMethod compression_method_ = CompressionMethod::None);
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index e9a77c3b433..d200ee7421f 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -1,49 +1,47 @@
-#include "HTTPHandler.h"
+#include <Server/HTTPHandler.h>
 
-#include "HTTPHandlerFactory.h"
-#include "HTTPHandlerRequestFilter.h"
-
-#include <chrono>
-#include <iomanip>
-#include <Poco/File.h>
-#include <Poco/Net/HTTPBasicCredentials.h>
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerRequestImpl.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <Poco/Net/HTTPRequestHandlerFactory.h>
-#include <Poco/Net/NetException.h>
-#include <ext/scope_guard.h>
-#include <Core/ExternalTable.h>
-#include <Common/StringUtils/StringUtils.h>
-#include <Common/escapeForFileName.h>
-#include <common/getFQDNOrHostName.h>
-#include <Common/setThreadName.h>
-#include <Common/SettingsChanges.h>
-#include <Disks/IVolume.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Compression/CompressedWriteBuffer.h>
+#include <Core/ExternalTable.h>
+#include <DataStreams/IBlockInputStream.h>
+#include <Disks/StoragePolicy.h>
+#include <IO/CascadeWriteBuffer.h>
+#include <IO/ConcatReadBuffer.h>
+#include <IO/MemoryReadWriteBuffer.h>
 #include <IO/ReadBufferFromIStream.h>
 #include <IO/ReadBufferFromString.h>
-#include <IO/WriteBufferFromString.h>
-#include <IO/WriteBufferFromHTTPServerResponse.h>
 #include <IO/WriteBufferFromFile.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/WriteBufferFromTemporaryFile.h>
 #include <IO/WriteHelpers.h>
 #include <IO/copyData.h>
-#include <IO/ConcatReadBuffer.h>
-#include <IO/CascadeWriteBuffer.h>
-#include <IO/MemoryReadWriteBuffer.h>
-#include <IO/WriteBufferFromTemporaryFile.h>
-#include <DataStreams/IBlockInputStream.h>
-#include <Interpreters/executeQuery.h>
-#include <Interpreters/QueryParameterVisitor.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/QueryParameterVisitor.h>
+#include <Interpreters/executeQuery.h>
+#include <Server/HTTPHandlerFactory.h>
+#include <Server/HTTPHandlerRequestFilter.h>
+#include <Server/IServer.h>
+#include <Common/SettingsChanges.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/escapeForFileName.h>
+#include <Common/setThreadName.h>
 #include <Common/typeid_cast.h>
-#include <Poco/Net/HTTPStream.h>
+#include <common/getFQDNOrHostName.h>
+#include <ext/scope_guard.h>
 
 #if !defined(ARCADIA_BUILD)
 #    include <Common/config.h>
 #endif
 
+#include <Poco/File.h>
+#include <Poco/Net/HTTPBasicCredentials.h>
+#include <Poco/Net/HTTPStream.h>
+#include <Poco/Net/NetException.h>
+
+#include <chrono>
+#include <iomanip>
+
 
 namespace DB
 {
@@ -237,16 +235,14 @@ HTTPHandler::HTTPHandler(IServer & server_, const std::string & name)
 
 void HTTPHandler::processQuery(
     Context & context,
-    Poco::Net::HTTPServerRequest & request,
+    HTTPServerRequest & request,
     HTMLForm & params,
-    Poco::Net::HTTPServerResponse & response,
+    HTTPServerResponse & response,
     Output & used_output,
     std::optional<CurrentThread::QueryScope> & query_scope)
 {
     LOG_TRACE(log, "Request URI: {}", request.getURI());
 
-    std::istream & istr = request.stream();
-
     /// The user and password can be passed by headers (similar to X-Auth-*),
     /// which is used by load balancers to pass authentication information.
     std::string user = request.get("X-ClickHouse-User", "");
@@ -291,9 +287,9 @@ void HTTPHandler::processQuery(
     client_info.interface = ClientInfo::Interface::HTTP;
 
     ClientInfo::HTTPMethod http_method = ClientInfo::HTTPMethod::UNKNOWN;
-    if (request.getMethod() == Poco::Net::HTTPServerRequest::HTTP_GET)
+    if (request.getMethod() == HTTPServerRequest::HTTP_GET)
         http_method = ClientInfo::HTTPMethod::GET;
-    else if (request.getMethod() == Poco::Net::HTTPServerRequest::HTTP_POST)
+    else if (request.getMethod() == HTTPServerRequest::HTTP_POST)
         http_method = ClientInfo::HTTPMethod::POST;
 
     client_info.http_method = http_method;
@@ -356,10 +352,8 @@ void HTTPHandler::processQuery(
     }
 #endif
 
-    // Set the query id supplied by the user, if any, and also update the
-    // OpenTelemetry fields.
-    context.setCurrentQueryId(params.get("query_id",
-        request.get("X-ClickHouse-Query-Id", "")));
+    // Set the query id supplied by the user, if any, and also update the OpenTelemetry fields.
+    context.setCurrentQueryId(params.get("query_id", request.get("X-ClickHouse-Query-Id", "")));
 
     client_info.initial_query_id = client_info.current_query_id;
 
@@ -405,7 +399,11 @@ void HTTPHandler::processQuery(
     unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10);
 
     used_output.out = std::make_shared<WriteBufferFromHTTPServerResponse>(
-        request, response, keep_alive_timeout, client_supports_http_compression, http_response_compression_method);
+        response,
+        request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD,
+        keep_alive_timeout,
+        client_supports_http_compression,
+        http_response_compression_method);
 
     if (internal_compression)
         used_output.out_maybe_compressed = std::make_shared<CompressedWriteBuffer>(*used_output.out);
@@ -459,8 +457,8 @@ void HTTPHandler::processQuery(
 
     /// Request body can be compressed using algorithm specified in the Content-Encoding header.
     String http_request_compression_method_str = request.get("Content-Encoding", "");
-    std::unique_ptr<ReadBuffer> in_post = wrapReadBufferWithCompressionMethod(
-        std::make_unique<ReadBufferFromIStream>(istr), chooseCompressionMethod({}, http_request_compression_method_str));
+    auto in_post = wrapReadBufferWithCompressionMethod(
+        wrapReadBufferReference(request.getStream()), chooseCompressionMethod({}, http_request_compression_method_str));
 
     /// The data can also be compressed using incompatible internal algorithm. This is indicated by
     /// 'decompress' query parameter.
@@ -513,7 +511,7 @@ void HTTPHandler::processQuery(
     const auto & settings = context.getSettingsRef();
 
     /// Only readonly queries are allowed for HTTP GET requests.
-    if (request.getMethod() == Poco::Net::HTTPServerRequest::HTTP_GET)
+    if (request.getMethod() == HTTPServerRequest::HTTP_GET)
     {
         if (settings.readonly == 0)
             context.setSetting("readonly", 2);
@@ -608,26 +606,12 @@ void HTTPHandler::processQuery(
 
     if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close)
     {
-        Poco::Net::StreamSocket & socket = dynamic_cast<Poco::Net::HTTPServerRequestImpl &>(request).socket();
-
-        append_callback([&context, &socket](const Progress &)
+        append_callback([&context, &request](const Progress &)
         {
-            /// Assume that at the point this method is called no one is reading data from the socket any more.
-            /// True for read-only queries.
-            try
-            {
-                char b;
-                int status = socket.receiveBytes(&b, 1, MSG_DONTWAIT | MSG_PEEK);
-                if (status == 0)
-                    context.killCurrentQuery();
-            }
-            catch (Poco::TimeoutException &)
-            {
-            }
-            catch (...)
-            {
+            /// Assume that at the point this method is called no one is reading data from the socket any more:
+            /// should be true for read-only queries.
+            if (!request.checkPeerConnected())
                 context.killCurrentQuery();
-            }
         });
     }
 
@@ -656,22 +640,23 @@ void HTTPHandler::processQuery(
     used_output.out->finalize();
 }
 
-void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_code,
-    Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response,
-    Output & used_output)
+void HTTPHandler::trySendExceptionToClient(
+    const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output)
 {
     try
     {
         response.set("X-ClickHouse-Exception-Code", toString<int>(exception_code));
 
+        /// FIXME: make sure that no one else is reading from the same stream at the moment.
+
         /// If HTTP method is POST and Keep-Alive is turned on, we should read the whole request body
         /// to avoid reading part of the current request body in the next request.
         if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST
             && response.getKeepAlive()
-            && !request.stream().eof()
-            && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED)
+            && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED
+            && !request.getStream().eof())
         {
-            request.stream().ignore(std::numeric_limits<std::streamsize>::max());
+            request.getStream().ignoreAll();
         }
 
         bool auth_fail = exception_code == ErrorCodes::UNKNOWN_USER ||
@@ -690,7 +675,7 @@ void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_
         if (!response.sent() && !used_output.out_maybe_compressed)
         {
             /// If nothing was sent yet and we don't even know if we must compress the response.
-            response.send() << s << std::endl;
+            *response.send() << s << std::endl;
         }
         else if (used_output.out_maybe_compressed)
         {
@@ -717,6 +702,11 @@ void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_
             used_output.out_maybe_compressed->next();
             used_output.out->finalize();
         }
+        else
+        {
+            assert(false);
+            __builtin_unreachable();
+        }
     }
     catch (...)
     {
@@ -725,7 +715,7 @@ void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_
 }
 
 
-void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     setThreadName("HTTPHandler");
     ThreadStatus thread_status;
@@ -746,17 +736,18 @@ void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
         response.setContentType("text/plain; charset=UTF-8");
         response.set("X-ClickHouse-Server-Display-Name", server_display_name);
         /// For keep-alive to work.
-        if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1)
+        if (request.getVersion() == HTTPServerRequest::HTTP_1_1)
             response.setChunkedTransferEncoding(true);
 
         HTMLForm params(request);
         with_stacktrace = params.getParsed<bool>("stacktrace", false);
 
         /// Workaround. Poco does not detect 411 Length Required case.
-        if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && !request.getChunkedTransferEncoding() &&
-            !request.hasContentLength())
+        if (request.getMethod() == HTTPRequest::HTTP_POST && !request.getChunkedTransferEncoding() && !request.hasContentLength())
         {
-            throw Exception("The Transfer-Encoding is not chunked and there is no Content-Length header for POST request", ErrorCodes::HTTP_LENGTH_REQUIRED);
+            throw Exception(
+                "The Transfer-Encoding is not chunked and there is no Content-Length header for POST request",
+                ErrorCodes::HTTP_LENGTH_REQUIRED);
         }
 
         processQuery(context, request, params, response, used_output, query_scope);
@@ -800,7 +791,7 @@ bool DynamicQueryHandler::customizeQueryParam(Context & context, const std::stri
     return false;
 }
 
-std::string DynamicQueryHandler::getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context)
+std::string DynamicQueryHandler::getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context)
 {
     if (likely(!startsWith(request.getContentType(), "multipart/form-data")))
     {
@@ -814,7 +805,7 @@ std::string DynamicQueryHandler::getQuery(Poco::Net::HTTPServerRequest & request
     /// Support for "external data for query processing".
     /// Used in case of POST request with form-data, but it isn't expected to be deleted after that scope.
     ExternalTablesHandler handler(context, params);
-    params.load(request, request.stream(), handler);
+    params.load(request, request.getStream(), handler);
 
     std::string full_query;
     /// Params are of both form params POST and uri (GET params)
@@ -844,7 +835,7 @@ bool PredefinedQueryHandler::customizeQueryParam(Context & context, const std::s
     return false;
 }
 
-void PredefinedQueryHandler::customizeContext(Poco::Net::HTTPServerRequest & request, DB::Context & context)
+void PredefinedQueryHandler::customizeContext(HTTPServerRequest & request, DB::Context & context)
 {
     /// If in the configuration file, the handler's header is regex and contains named capture group
     /// We will extract regex named capture groups as query parameters
@@ -880,22 +871,26 @@ void PredefinedQueryHandler::customizeContext(Poco::Net::HTTPServerRequest & req
     }
 }
 
-std::string PredefinedQueryHandler::getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context)
+std::string PredefinedQueryHandler::getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context)
 {
     if (unlikely(startsWith(request.getContentType(), "multipart/form-data")))
     {
         /// Support for "external data for query processing".
         ExternalTablesHandler handler(context, params);
-        params.load(request, request.stream(), handler);
+        params.load(request, request.getStream(), handler);
     }
 
     return predefined_query;
 }
 
-Poco::Net::HTTPRequestHandlerFactory * createDynamicHandlerFactory(IServer & server, const std::string & config_prefix)
+HTTPRequestHandlerFactoryPtr createDynamicHandlerFactory(IServer & server, const std::string & config_prefix)
 {
-    std::string query_param_name = server.config().getString(config_prefix + ".handler.query_param_name", "query");
-    return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>(server, std::move(query_param_name)), server.config(), config_prefix);
+    const auto & query_param_name = server.config().getString(config_prefix + ".handler.query_param_name", "query");
+    auto factory = std::make_shared<HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>>(server, std::move(query_param_name));
+
+    factory->addFiltersFromConfig(server.config(), config_prefix);
+
+    return factory;
 }
 
 static inline bool capturingNamedQueryParam(NameSet receive_params, const CompiledRegexPtr & compiled_regex)
@@ -913,18 +908,20 @@ static inline CompiledRegexPtr getCompiledRegex(const std::string & expression)
     auto compiled_regex = std::make_shared<const re2::RE2>(expression);
 
     if (!compiled_regex->ok())
-        throw Exception("Cannot compile re2: " + expression + " for http handling rule, error: " +
-                        compiled_regex->error() + ". Look at https://github.com/google/re2/wiki/Syntax for reference.", ErrorCodes::CANNOT_COMPILE_REGEXP);
+        throw Exception(
+            "Cannot compile re2: " + expression + " for http handling rule, error: " + compiled_regex->error()
+                + ". Look at https://github.com/google/re2/wiki/Syntax for reference.",
+            ErrorCodes::CANNOT_COMPILE_REGEXP);
 
     return compiled_regex;
 }
 
-Poco::Net::HTTPRequestHandlerFactory * createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix)
+HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix)
 {
     Poco::Util::AbstractConfiguration & configuration = server.config();
 
     if (!configuration.has(config_prefix + ".handler.query"))
-        throw Exception("There is no path '" + config_prefix + ".handler.query" + "' in configuration file.", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
+        throw Exception("There is no path '" + config_prefix + ".handler.query' in configuration file.", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
 
     std::string predefined_query = configuration.getString(config_prefix + ".handler.query");
     NameSet analyze_receive_params = analyzeReceiveQueryParams(predefined_query);
@@ -946,6 +943,8 @@ Poco::Net::HTTPRequestHandlerFactory * createPredefinedHandlerFactory(IServer &
             headers_name_with_regex.emplace(std::make_pair(header_name, regex));
     }
 
+    std::shared_ptr<HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>> factory;
+
     if (configuration.has(config_prefix + ".url"))
     {
         auto url_expression = configuration.getString(config_prefix + ".url");
@@ -955,14 +954,23 @@ Poco::Net::HTTPRequestHandlerFactory * createPredefinedHandlerFactory(IServer &
 
         auto regex = getCompiledRegex(url_expression);
         if (capturingNamedQueryParam(analyze_receive_params, regex))
-            return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>(
-                server, std::move(analyze_receive_params), std::move(predefined_query), std::move(regex),
-                std::move(headers_name_with_regex)), configuration, config_prefix);
+        {
+            factory = std::make_shared<HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>>(
+                server,
+                std::move(analyze_receive_params),
+                std::move(predefined_query),
+                std::move(regex),
+                std::move(headers_name_with_regex));
+            factory->addFiltersFromConfig(configuration, config_prefix);
+            return factory;
+        }
     }
 
-    return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>(
-        server, std::move(analyze_receive_params), std::move(predefined_query), CompiledRegexPtr{} ,std::move(headers_name_with_regex)),
-        configuration, config_prefix);
+    factory = std::make_shared<HandlingRuleHTTPHandlerFactory<PredefinedQueryHandler>>(
+        server, std::move(analyze_receive_params), std::move(predefined_query), CompiledRegexPtr{}, std::move(headers_name_with_regex));
+    factory->addFiltersFromConfig(configuration, config_prefix);
+
+    return factory;
 }
 
 }
diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h
index 96727df5404..e903fbfbff7 100644
--- a/src/Server/HTTPHandler.h
+++ b/src/Server/HTTPHandler.h
@@ -1,13 +1,10 @@
 #pragma once
 
-#include "IServer.h"
-
-#include <Poco/Net/HTTPRequestHandler.h>
-
-#include <Common/CurrentThread.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/HTMLForm.h>
 #include <Core/Names.h>
+#include <Server/HTTP/HTMLForm.h>
+#include <Server/HTTP/HTTPRequestHandler.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/CurrentThread.h>
 
 #include <re2/re2.h>
 
@@ -21,23 +18,24 @@ namespace Poco { class Logger; }
 namespace DB
 {
 
+class IServer;
 class WriteBufferFromHTTPServerResponse;
 
 using CompiledRegexPtr = std::shared_ptr<const re2::RE2>;
 
-class HTTPHandler : public Poco::Net::HTTPRequestHandler
+class HTTPHandler : public HTTPRequestHandler
 {
 public:
-    explicit HTTPHandler(IServer & server_, const std::string & name);
+    HTTPHandler(IServer & server_, const std::string & name);
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
     /// This method is called right before the query execution.
-    virtual void customizeContext(Poco::Net::HTTPServerRequest & /*request*/, Context & /* context */) {}
+    virtual void customizeContext(HTTPServerRequest & /* request */, Context & /* context */) {}
 
     virtual bool customizeQueryParam(Context & context, const std::string & key, const std::string & value) = 0;
 
-    virtual std::string getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context) = 0;
+    virtual std::string getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context) = 0;
 
 private:
     struct Output
@@ -74,17 +72,17 @@ private:
     /// Also initializes 'used_output'.
     void processQuery(
         Context & context,
-        Poco::Net::HTTPServerRequest & request,
+        HTTPServerRequest & request,
         HTMLForm & params,
-        Poco::Net::HTTPServerResponse & response,
+        HTTPServerResponse & response,
         Output & used_output,
         std::optional<CurrentThread::QueryScope> & query_scope);
 
     void trySendExceptionToClient(
         const std::string & s,
         int exception_code,
-        Poco::Net::HTTPServerRequest & request,
-        Poco::Net::HTTPServerResponse & response,
+        HTTPServerRequest & request,
+        HTTPServerResponse & response,
         Output & used_output);
 
     static void pushDelayedResults(Output & used_output);
@@ -97,7 +95,7 @@ private:
 public:
     explicit DynamicQueryHandler(IServer & server_, const std::string & param_name_ = "query");
 
-    std::string getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context) override;
+    std::string getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context) override;
 
     bool customizeQueryParam(Context &context, const std::string &key, const std::string &value) override;
 };
@@ -114,9 +112,9 @@ public:
         IServer & server_, const NameSet & receive_params_, const std::string & predefined_query_
         , const CompiledRegexPtr & url_regex_, const std::unordered_map<String, CompiledRegexPtr> & header_name_with_regex_);
 
-    virtual void customizeContext(Poco::Net::HTTPServerRequest & request, Context & context) override;
+    virtual void customizeContext(HTTPServerRequest & request, Context & context) override;
 
-    std::string getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context) override;
+    std::string getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context) override;
 
     bool customizeQueryParam(Context & context, const std::string & key, const std::string & value) override;
 };
diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp
index 9eac60355d2..db80750beb8 100644
--- a/src/Server/HTTPHandlerFactory.cpp
+++ b/src/Server/HTTPHandlerFactory.cpp
@@ -1,4 +1,7 @@
-#include "HTTPHandlerFactory.h"
+#include <Server/HTTPHandlerFactory.h>
+
+#include <Server/HTTP/HTTPRequestHandler.h>
+#include <Server/IServer.h>
 
 #include <Poco/Util/LayeredConfiguration.h>
 
@@ -29,7 +32,7 @@ HTTPRequestHandlerFactoryMain::HTTPRequestHandlerFactoryMain(const std::string &
 {
 }
 
-Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHandler(const Poco::Net::HTTPServerRequest & request)
+std::unique_ptr<HTTPRequestHandler> HTTPRequestHandlerFactoryMain::createRequestHandler(const HTTPServerRequest & request)
 {
     LOG_TRACE(log, "HTTP Request for {}. Method: {}, Address: {}, User-Agent: {}{}, Content Type: {}, Transfer Encoding: {}, X-Forwarded-For: {}",
         name, request.getMethod(), request.clientAddress().toString(), request.get("User-Agent", "(none)"),
@@ -38,8 +41,8 @@ Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHand
 
     for (auto & handler_factory : child_factories)
     {
-        auto * handler = handler_factory->createRequestHandler(request);
-        if (handler != nullptr)
+        auto handler = handler_factory->createRequestHandler(request);
+        if (handler)
             return handler;
     }
 
@@ -47,31 +50,16 @@ Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHand
         || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD
         || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
     {
-        return new NotFoundHandler;
+        return std::unique_ptr<HTTPRequestHandler>(new NotFoundHandler);
     }
 
     return nullptr;
 }
 
-HTTPRequestHandlerFactoryMain::~HTTPRequestHandlerFactoryMain()
-{
-    while (!child_factories.empty())
-    {
-        delete child_factories.back();
-        child_factories.pop_back();
-    }
-}
-
-HTTPRequestHandlerFactoryMain::TThis * HTTPRequestHandlerFactoryMain::addHandler(Poco::Net::HTTPRequestHandlerFactory * child_factory)
-{
-    child_factories.emplace_back(child_factory);
-    return this;
-}
-
 static inline auto createHandlersFactoryFromConfig(
     IServer & server, const std::string & name, const String & prefix, AsynchronousMetrics & async_metrics)
 {
-    auto main_handler_factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
+    auto main_handler_factory = std::make_shared<HTTPRequestHandlerFactoryMain>(name);
 
     Poco::Util::AbstractConfiguration::Keys keys;
     server.config().keys(prefix, keys);
@@ -109,10 +97,11 @@ static inline auto createHandlersFactoryFromConfig(
                 ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
     }
 
-    return main_handler_factory.release();
+    return main_handler_factory;
 }
 
-static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IServer & server, const std::string & name, AsynchronousMetrics & async_metrics)
+static inline HTTPRequestHandlerFactoryPtr
+createHTTPHandlerFactory(IServer & server, const std::string & name, AsynchronousMetrics & async_metrics)
 {
     if (server.config().has("http_handlers"))
     {
@@ -120,25 +109,25 @@ static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IS
     }
     else
     {
-        auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
+        auto factory = std::make_shared<HTTPRequestHandlerFactoryMain>(name);
         addDefaultHandlersFactory(*factory, server, async_metrics);
-        return factory.release();
+        return factory;
     }
 }
 
-static inline Poco::Net::HTTPRequestHandlerFactory * createInterserverHTTPHandlerFactory(IServer & server, const std::string & name)
+static inline HTTPRequestHandlerFactoryPtr createInterserverHTTPHandlerFactory(IServer & server, const std::string & name)
 {
-    auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
+    auto factory = std::make_shared<HTTPRequestHandlerFactoryMain>(name);
     addCommonDefaultHandlersFactory(*factory, server);
 
-    auto main_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<InterserverIOHTTPHandler>>(server);
+    auto main_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<InterserverIOHTTPHandler>>(server);
     main_handler->allowPostAndGetParamsRequest();
-    factory->addHandler(main_handler.release());
+    factory->addHandler(main_handler);
 
-    return factory.release();
+    return factory;
 }
 
-Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name)
+HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name)
 {
     if (name == "HTTPHandler-factory" || name == "HTTPSHandler-factory")
         return createHTTPHandlerFactory(server, name, async_metrics);
@@ -146,12 +135,13 @@ Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, As
         return createInterserverHTTPHandlerFactory(server, name);
     else if (name == "PrometheusHandler-factory")
     {
-        auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
-        auto handler = std::make_unique<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
+        auto factory = std::make_shared<HTTPRequestHandlerFactoryMain>(name);
+        auto handler = std::make_shared<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
             server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics));
-        handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest();
-        factory->addHandler(handler.release());
-        return factory.release();
+        handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"));
+        handler->allowGetAndHeadRequest();
+        factory->addHandler(handler);
+        return factory;
     }
 
     throw Exception("LOGICAL ERROR: Unknown HTTP handler factory name.", ErrorCodes::LOGICAL_ERROR);
@@ -162,39 +152,44 @@ static const auto root_response_expression = "config://http_server_default_respo
 
 void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server)
 {
-    auto root_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, root_response_expression);
-    root_handler->attachStrictPath("/")->allowGetAndHeadRequest();
-    factory.addHandler(root_handler.release());
+    auto root_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, root_response_expression);
+    root_handler->attachStrictPath("/");
+    root_handler->allowGetAndHeadRequest();
+    factory.addHandler(root_handler);
 
-    auto ping_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, ping_response_expression);
-    ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest();
-    factory.addHandler(ping_handler.release());
+    auto ping_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, ping_response_expression);
+    ping_handler->attachStrictPath("/ping");
+    ping_handler->allowGetAndHeadRequest();
+    factory.addHandler(ping_handler);
 
-    auto replicas_status_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
-    replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest();
-    factory.addHandler(replicas_status_handler.release());
+    auto replicas_status_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
+    replicas_status_handler->attachNonStrictPath("/replicas_status");
+    replicas_status_handler->allowGetAndHeadRequest();
+    factory.addHandler(replicas_status_handler);
 
-    auto web_ui_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<WebUIRequestHandler>>(server, "play.html");
-    web_ui_handler->attachNonStrictPath("/play")->allowGetAndHeadRequest();
-    factory.addHandler(web_ui_handler.release());
+    auto web_ui_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<WebUIRequestHandler>>(server, "play.html");
+    web_ui_handler->attachNonStrictPath("/play");
+    web_ui_handler->allowGetAndHeadRequest();
+    factory.addHandler(web_ui_handler);
 }
 
 void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics)
 {
     addCommonDefaultHandlersFactory(factory, server);
 
-    auto query_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>>(server, "query");
+    auto query_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>>(server, "query");
     query_handler->allowPostAndGetParamsRequest();
-    factory.addHandler(query_handler.release());
+    factory.addHandler(query_handler);
 
     /// We check that prometheus handler will be served on current (default) port.
     /// Otherwise it will be created separately, see createHandlerFactory(...).
     if (server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0)
     {
-        auto prometheus_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
+        auto prometheus_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
             server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics));
-        prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest();
-        factory.addHandler(prometheus_handler.release());
+        prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"));
+        prometheus_handler->allowGetAndHeadRequest();
+        factory.addHandler(prometheus_handler);
     }
 }
 
diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h
index 3e8313172eb..6297f988eaa 100644
--- a/src/Server/HTTPHandlerFactory.h
+++ b/src/Server/HTTPHandlerFactory.h
@@ -1,82 +1,102 @@
 #pragma once
 
-#include "IServer.h"
-#include <common/logger_useful.h>
-#include <Common/HTMLForm.h>
-#include <Common/StringUtils/StringUtils.h>
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <Poco/Net/HTTPRequestHandlerFactory.h>
 #include <Interpreters/AsynchronousMetrics.h>
+#include <Server/HTTP/HTMLForm.h>
+#include <Server/HTTP/HTTPRequestHandlerFactory.h>
+#include <Server/HTTPHandlerRequestFilter.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <common/logger_useful.h>
+
+#include <Poco/Util/LayeredConfiguration.h>
 
 namespace DB
 {
 
-/// Handle request using child handlers
-class HTTPRequestHandlerFactoryMain : public Poco::Net::HTTPRequestHandlerFactory, boost::noncopyable
+namespace ErrorCodes
 {
-private:
-    using TThis = HTTPRequestHandlerFactoryMain;
+    extern const int UNKNOWN_ELEMENT_IN_CONFIG;
+}
 
+class IServer;
+
+/// Handle request using child handlers
+class HTTPRequestHandlerFactoryMain : public HTTPRequestHandlerFactory
+{
+public:
+    explicit HTTPRequestHandlerFactoryMain(const std::string & name_);
+
+    void addHandler(HTTPRequestHandlerFactoryPtr child_factory) { child_factories.emplace_back(child_factory); }
+
+    std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
+
+private:
     Poco::Logger * log;
     std::string name;
 
-    std::vector<Poco::Net::HTTPRequestHandlerFactory *> child_factories;
-public:
-
-    ~HTTPRequestHandlerFactoryMain() override;
-
-    HTTPRequestHandlerFactoryMain(const std::string & name_);
-
-    TThis * addHandler(Poco::Net::HTTPRequestHandlerFactory * child_factory);
-
-    Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest & request) override;
+    std::vector<HTTPRequestHandlerFactoryPtr> child_factories;
 };
 
 template <typename TEndpoint>
-class HandlingRuleHTTPHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory
+class HandlingRuleHTTPHandlerFactory : public HTTPRequestHandlerFactory
 {
 public:
-    using TThis = HandlingRuleHTTPHandlerFactory<TEndpoint>;
-    using Filter = std::function<bool(const Poco::Net::HTTPServerRequest &)>;
+    using Filter = std::function<bool(const HTTPServerRequest &)>;
 
     template <typename... TArgs>
-    HandlingRuleHTTPHandlerFactory(TArgs &&... args)
+    explicit HandlingRuleHTTPHandlerFactory(TArgs &&... args)
     {
         creator = [args = std::tuple<TArgs...>(std::forward<TArgs>(args) ...)]()
         {
             return std::apply([&](auto && ... endpoint_args)
             {
-                return new TEndpoint(std::forward<decltype(endpoint_args)>(endpoint_args)...);
+                return std::make_unique<TEndpoint>(std::forward<decltype(endpoint_args)>(endpoint_args)...);
             }, std::move(args));
         };
     }
 
-    TThis * addFilter(Filter cur_filter)
+    void addFilter(Filter cur_filter)
     {
         Filter prev_filter = filter;
         filter = [prev_filter, cur_filter](const auto & request)
         {
             return prev_filter ? prev_filter(request) && cur_filter(request) : cur_filter(request);
         };
-
-        return this;
     }
 
-    TThis * attachStrictPath(const String & strict_path)
+    void addFiltersFromConfig(Poco::Util::AbstractConfiguration & config, const std::string & prefix)
     {
-        return addFilter([strict_path](const auto & request) { return request.getURI() == strict_path; });
+        Poco::Util::AbstractConfiguration::Keys filters_type;
+        config.keys(prefix, filters_type);
+
+        for (const auto & filter_type : filters_type)
+        {
+            if (filter_type == "handler")
+                continue;
+            else if (filter_type == "url")
+                addFilter(urlFilter(config, prefix + ".url"));
+            else if (filter_type == "headers")
+                addFilter(headersFilter(config, prefix + ".headers"));
+            else if (filter_type == "methods")
+                addFilter(methodsFilter(config, prefix + ".methods"));
+            else
+                throw Exception("Unknown element in config: " + prefix + "." + filter_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
+        }
     }
 
-    TThis * attachNonStrictPath(const String & non_strict_path)
+    void attachStrictPath(const String & strict_path)
     {
-        return addFilter([non_strict_path](const auto & request) { return startsWith(request.getURI(), non_strict_path); });
+        addFilter([strict_path](const auto & request) { return request.getURI() == strict_path; });
+    }
+
+    void attachNonStrictPath(const String & non_strict_path)
+    {
+        addFilter([non_strict_path](const auto & request) { return startsWith(request.getURI(), non_strict_path); });
     }
 
     /// Handle GET or HEAD endpoint on specified path
-    TThis * allowGetAndHeadRequest()
+    void allowGetAndHeadRequest()
     {
-        return addFilter([](const auto & request)
+        addFilter([](const auto & request)
         {
             return request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET
                 || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD;
@@ -84,35 +104,35 @@ public:
     }
 
     /// Handle POST or GET with params
-    TThis * allowPostAndGetParamsRequest()
+    void allowPostAndGetParamsRequest()
     {
-        return addFilter([](const auto & request)
+        addFilter([](const auto & request)
         {
             return request.getURI().find('?') != std::string::npos
                 || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST;
         });
     }
 
-    Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest & request) override
+    std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override
     {
         return filter(request) ? creator() : nullptr;
     }
 
 private:
     Filter filter;
-    std::function<Poco::Net::HTTPRequestHandler * ()> creator;
+    std::function<std::unique_ptr<HTTPRequestHandler> ()> creator;
 };
 
-Poco::Net::HTTPRequestHandlerFactory * createStaticHandlerFactory(IServer & server, const std::string & config_prefix);
+HTTPRequestHandlerFactoryPtr createStaticHandlerFactory(IServer & server, const std::string & config_prefix);
 
-Poco::Net::HTTPRequestHandlerFactory * createDynamicHandlerFactory(IServer & server, const std::string & config_prefix);
+HTTPRequestHandlerFactoryPtr createDynamicHandlerFactory(IServer & server, const std::string & config_prefix);
 
-Poco::Net::HTTPRequestHandlerFactory * createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix);
+HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix);
 
-Poco::Net::HTTPRequestHandlerFactory * createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix);
+HTTPRequestHandlerFactoryPtr createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix);
 
-Poco::Net::HTTPRequestHandlerFactory * createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix);
-
-Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name);
+HTTPRequestHandlerFactoryPtr
+createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix);
 
+HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name);
 }
diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h
index f952efd7653..f0474e8b953 100644
--- a/src/Server/HTTPHandlerRequestFilter.h
+++ b/src/Server/HTTPHandlerRequestFilter.h
@@ -1,15 +1,17 @@
 #pragma once
 
-#include "HTTPHandlerFactory.h"
+#include <Server/HTTP/HTTPServerRequest.h>
+#include <Common/Exception.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <common/StringRef.h>
+#include <common/find_symbols.h>
 
 #include <re2/re2.h>
 #include <re2/stringpiece.h>
 #include <Poco/StringTokenizer.h>
-#include <Poco/Net/HTTPServerRequest.h>
 #include <Poco/Util/LayeredConfiguration.h>
 
-#include <common/find_symbols.h>
-
+#include <unordered_map>
 
 namespace DB
 {
@@ -17,11 +19,9 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int CANNOT_COMPILE_REGEXP;
-    extern const int UNKNOWN_ELEMENT_IN_CONFIG;
 }
 
-
-typedef std::shared_ptr<const re2::RE2> CompiledRegexPtr;
+using CompiledRegexPtr = std::shared_ptr<const re2::RE2>;
 
 static inline bool checkRegexExpression(const StringRef & match_str, const CompiledRegexPtr & compiled_regex)
 {
@@ -45,10 +45,10 @@ static inline auto methodsFilter(Poco::Util::AbstractConfiguration & config, con
     std::vector<String> methods;
     Poco::StringTokenizer tokenizer(config.getString(config_path), ",");
 
-    for (auto iterator = tokenizer.begin(); iterator != tokenizer.end(); ++iterator)
-        methods.emplace_back(Poco::toUpper(Poco::trim(*iterator)));
+    for (const auto & iterator : tokenizer)
+        methods.emplace_back(Poco::toUpper(Poco::trim(iterator)));
 
-    return [methods](const Poco::Net::HTTPServerRequest & request) { return std::count(methods.begin(), methods.end(), request.getMethod()); };
+    return [methods](const HTTPServerRequest & request) { return std::count(methods.begin(), methods.end(), request.getMethod()); };
 }
 
 static inline auto getExpression(const std::string & expression)
@@ -66,7 +66,7 @@ static inline auto getExpression(const std::string & expression)
 
 static inline auto urlFilter(Poco::Util::AbstractConfiguration & config, const std::string & config_path)
 {
-    return [expression = getExpression(config.getString(config_path))](const Poco::Net::HTTPServerRequest & request)
+    return [expression = getExpression(config.getString(config_path))](const HTTPServerRequest & request)
     {
         const auto & uri = request.getURI();
         const auto & end = find_first_symbols<'?'>(uri.data(), uri.data() + uri.size());
@@ -88,7 +88,7 @@ static inline auto headersFilter(Poco::Util::AbstractConfiguration & config, con
         headers_expression.emplace(std::make_pair(header_name, expression));
     }
 
-    return [headers_expression](const Poco::Net::HTTPServerRequest & request)
+    return [headers_expression](const HTTPServerRequest & request)
     {
         for (const auto & [header_name, header_expression] : headers_expression)
         {
@@ -101,28 +101,4 @@ static inline auto headersFilter(Poco::Util::AbstractConfiguration & config, con
     };
 }
 
-template <typename TEndpoint>
-static inline Poco::Net::HTTPRequestHandlerFactory * addFiltersFromConfig(
-    HandlingRuleHTTPHandlerFactory <TEndpoint> * factory, Poco::Util::AbstractConfiguration & config, const std::string & prefix)
-{
-    Poco::Util::AbstractConfiguration::Keys filters_type;
-    config.keys(prefix, filters_type);
-
-    for (const auto & filter_type : filters_type)
-    {
-        if (filter_type == "handler")
-            continue;
-        else if (filter_type == "url")
-            factory->addFilter(urlFilter(config, prefix + ".url"));
-        else if (filter_type == "headers")
-            factory->addFilter(headersFilter(config, prefix + ".headers"));
-        else if (filter_type == "methods")
-            factory->addFilter(methodsFilter(config, prefix + ".methods"));
-        else
-            throw Exception("Unknown element in config: " + prefix + "." + filter_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
-    }
-
-    return factory;
-}
-
 }
diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp
index 973759bedd1..3296da94578 100644
--- a/src/Server/InterserverIOHTTPHandler.cpp
+++ b/src/Server/InterserverIOHTTPHandler.cpp
@@ -1,18 +1,18 @@
-#include "InterserverIOHTTPHandler.h"
+#include <Server/InterserverIOHTTPHandler.h>
+
+#include <Server/IServer.h>
 
-#include <Poco/Net/HTTPBasicCredentials.h>
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <Poco/Util/LayeredConfiguration.h>
-#include <common/logger_useful.h>
-#include <Common/HTMLForm.h>
-#include <Common/setThreadName.h>
 #include <Compression/CompressedWriteBuffer.h>
 #include <IO/ReadBufferFromIStream.h>
-#include <IO/WriteBufferFromHTTPServerResponse.h>
-#include <Interpreters/InterserverIOHandler.h>
 #include <Interpreters/Context.h>
-#include "IServer.h"
+#include <Interpreters/InterserverIOHandler.h>
+#include <Server/HTTP/HTMLForm.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
+#include <Common/setThreadName.h>
+#include <common/logger_useful.h>
+
+#include <Poco/Net/HTTPBasicCredentials.h>
+#include <Poco/Util/LayeredConfiguration.h>
 
 namespace DB
 {
@@ -23,7 +23,7 @@ namespace ErrorCodes
     extern const int TOO_MANY_SIMULTANEOUS_QUERIES;
 }
 
-std::pair<String, bool> InterserverIOHTTPHandler::checkAuthentication(Poco::Net::HTTPServerRequest & request) const
+std::pair<String, bool> InterserverIOHTTPHandler::checkAuthentication(HTTPServerRequest & request) const
 {
     const auto & config = server.config();
 
@@ -51,7 +51,7 @@ std::pair<String, bool> InterserverIOHTTPHandler::checkAuthentication(Poco::Net:
     return {"", true};
 }
 
-void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, Output & used_output)
+void InterserverIOHTTPHandler::processQuery(HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output)
 {
     HTMLForm params(request);
 
@@ -60,7 +60,7 @@ void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & reque
     String endpoint_name = params.get("endpoint");
     bool compress = params.get("compress") == "true";
 
-    ReadBufferFromIStream body(request.stream());
+    auto & body = request.getStream();
 
     auto endpoint = server.context().getInterserverIOHandler().getEndpoint(endpoint_name);
     /// Locked for read while query processing
@@ -80,18 +80,19 @@ void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & reque
 }
 
 
-void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     setThreadName("IntersrvHandler");
 
     /// In order to work keep-alive.
-    if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1)
+    if (request.getVersion() == HTTPServerRequest::HTTP_1_1)
         response.setChunkedTransferEncoding(true);
 
     Output used_output;
     const auto & config = server.config();
     unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10);
-    used_output.out = std::make_shared<WriteBufferFromHTTPServerResponse>(request, response, keep_alive_timeout);
+    used_output.out = std::make_shared<WriteBufferFromHTTPServerResponse>(
+        response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
 
     try
     {
@@ -102,7 +103,7 @@ void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & requ
         }
         else
         {
-            response.setStatusAndReason(Poco::Net::HTTPServerResponse::HTTP_UNAUTHORIZED);
+            response.setStatusAndReason(HTTPServerResponse::HTTP_UNAUTHORIZED);
             if (!response.sent())
                 writeString(message, *used_output.out);
             LOG_WARNING(log, "Query processing failed request: '{}' authentication failed", request.getURI());
diff --git a/src/Server/InterserverIOHTTPHandler.h b/src/Server/InterserverIOHTTPHandler.h
index 8dc1962664c..47892aa678f 100644
--- a/src/Server/InterserverIOHTTPHandler.h
+++ b/src/Server/InterserverIOHTTPHandler.h
@@ -1,10 +1,12 @@
 #pragma once
 
-#include <memory>
-#include <Poco/Logger.h>
-#include <Poco/Net/HTTPRequestHandler.h>
+#include <Server/HTTP/HTTPRequestHandler.h>
 #include <Common/CurrentMetrics.h>
 
+#include <Poco/Logger.h>
+
+#include <memory>
+
 
 namespace CurrentMetrics
 {
@@ -17,7 +19,7 @@ namespace DB
 class IServer;
 class WriteBufferFromHTTPServerResponse;
 
-class InterserverIOHTTPHandler : public Poco::Net::HTTPRequestHandler
+class InterserverIOHTTPHandler : public HTTPRequestHandler
 {
 public:
     explicit InterserverIOHTTPHandler(IServer & server_)
@@ -26,7 +28,7 @@ public:
     {
     }
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 
 private:
     struct Output
@@ -39,9 +41,9 @@ private:
 
     CurrentMetrics::Increment metric_increment{CurrentMetrics::InterserverConnection};
 
-    void processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, Output & used_output);
+    void processQuery(HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output);
 
-    std::pair<String, bool> checkAuthentication(Poco::Net::HTTPServerRequest & request) const;
+    std::pair<String, bool> checkAuthentication(HTTPServerRequest & request) const;
 };
 
 }
diff --git a/src/Server/NotFoundHandler.cpp b/src/Server/NotFoundHandler.cpp
index 766e8895784..3181708b9b7 100644
--- a/src/Server/NotFoundHandler.cpp
+++ b/src/Server/NotFoundHandler.cpp
@@ -1,32 +1,25 @@
-#include "NotFoundHandler.h"
+#include <Server/NotFoundHandler.h>
 
 #include <IO/HTTPCommon.h>
-
 #include <Common/Exception.h>
 
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
-
 namespace DB
 {
-
-void NotFoundHandler::handleRequest(
-    Poco::Net::HTTPServerRequest & request,
-    Poco::Net::HTTPServerResponse & response)
+void NotFoundHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     try
     {
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_NOT_FOUND);
-        response.send() << "There is no handle " << request.getURI() << "\n\n"
-                        << "Use / or /ping for health checks.\n"
-                        << "Or /replicas_status for more sophisticated health checks.\n\n"
-                        << "Send queries from your program with POST method or GET /?query=...\n\n"
-                        << "Use clickhouse-client:\n\n"
-                        << "For interactive data analysis:\n"
-                        << "    clickhouse-client\n\n"
-                        << "For batch query processing:\n"
-                        << "    clickhouse-client --query='SELECT 1' > result\n"
-                        << "    clickhouse-client < query > result\n";
+        *response.send() << "There is no handle " << request.getURI() << "\n\n"
+                         << "Use / or /ping for health checks.\n"
+                         << "Or /replicas_status for more sophisticated health checks.\n\n"
+                         << "Send queries from your program with POST method or GET /?query=...\n\n"
+                         << "Use clickhouse-client:\n\n"
+                         << "For interactive data analysis:\n"
+                         << "    clickhouse-client\n\n"
+                         << "For batch query processing:\n"
+                         << "    clickhouse-client --query='SELECT 1' > result\n"
+                         << "    clickhouse-client < query > result\n";
     }
     catch (...)
     {
diff --git a/src/Server/NotFoundHandler.h b/src/Server/NotFoundHandler.h
index 7f758e49d0d..749ac388c4d 100644
--- a/src/Server/NotFoundHandler.h
+++ b/src/Server/NotFoundHandler.h
@@ -1,18 +1,15 @@
 #pragma once
 
-#include <Poco/Net/HTTPRequestHandler.h>
-
+#include <Server/HTTP/HTTPRequestHandler.h>
 
 namespace DB
 {
 
 /// Response with 404 and verbose description.
-class NotFoundHandler : public Poco::Net::HTTPRequestHandler
+class NotFoundHandler : public HTTPRequestHandler
 {
 public:
-    void handleRequest(
-        Poco::Net::HTTPServerRequest & request,
-        Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 };
 
 }
diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp
index 60deec9b289..83cb8e85a9e 100644
--- a/src/Server/PrometheusRequestHandler.cpp
+++ b/src/Server/PrometheusRequestHandler.cpp
@@ -1,26 +1,19 @@
-#include "PrometheusRequestHandler.h"
+#include <Server/PrometheusRequestHandler.h>
 
 #include <IO/HTTPCommon.h>
-
-#include <Common/Exception.h>
-
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <Poco/Util/LayeredConfiguration.h>
-
-#include <Common/ProfileEvents.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
+#include <Server/HTTPHandlerFactory.h>
+#include <Server/IServer.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/Exception.h>
+#include <Common/ProfileEvents.h>
 
-#include <IO/WriteBufferFromHTTPServerResponse.h>
-#include <Server/HTTPHandlerRequestFilter.h>
+#include <Poco/Util/LayeredConfiguration.h>
 
 
 namespace DB
 {
-
-void PrometheusRequestHandler::handleRequest(
-    Poco::Net::HTTPServerRequest & request,
-    Poco::Net::HTTPServerResponse & response)
+void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     try
     {
@@ -31,7 +24,7 @@ void PrometheusRequestHandler::handleRequest(
 
         response.setContentType("text/plain; version=0.0.4; charset=UTF-8");
 
-        auto wb = WriteBufferFromHTTPServerResponse(request, response, keep_alive_timeout);
+        auto wb = WriteBufferFromHTTPServerResponse(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
         metrics_writer.write(wb);
         wb.finalize();
     }
@@ -41,10 +34,13 @@ void PrometheusRequestHandler::handleRequest(
     }
 }
 
-Poco::Net::HTTPRequestHandlerFactory * createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix)
+HTTPRequestHandlerFactoryPtr
+createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix)
 {
-    return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>(
-        server, PrometheusMetricsWriter(server.config(), config_prefix + ".handler", async_metrics)), server.config(), config_prefix);
+    auto factory = std::make_shared<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
+        server, PrometheusMetricsWriter(server.config(), config_prefix + ".handler", async_metrics));
+    factory->addFiltersFromConfig(server.config(), config_prefix);
+    return factory;
 }
 
 }
diff --git a/src/Server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h
index 47c8adf4774..1fb3d9f0f59 100644
--- a/src/Server/PrometheusRequestHandler.h
+++ b/src/Server/PrometheusRequestHandler.h
@@ -1,17 +1,15 @@
 #pragma once
 
-#include "IServer.h"
-#include "PrometheusMetricsWriter.h"
+#include <Server/HTTP/HTTPRequestHandler.h>
 
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
-#include <Poco/Net/HTTPRequestHandler.h>
-#include <Poco/Net/HTTPRequestHandlerFactory.h>
+#include "PrometheusMetricsWriter.h"
 
 namespace DB
 {
 
-class PrometheusRequestHandler : public Poco::Net::HTTPRequestHandler
+class IServer;
+
+class PrometheusRequestHandler : public HTTPRequestHandler
 {
 private:
     IServer & server;
@@ -24,9 +22,7 @@ public:
     {
     }
 
-    void handleRequest(
-        Poco::Net::HTTPServerRequest & request,
-        Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 };
 
 }
diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp
index fc79ad9d134..778f9827131 100644
--- a/src/Server/ReplicasStatusHandler.cpp
+++ b/src/Server/ReplicasStatusHandler.cpp
@@ -1,17 +1,18 @@
-#include "ReplicasStatusHandler.h"
+#include <Server/ReplicasStatusHandler.h>
 
-#include <Interpreters/Context.h>
-#include <Storages/StorageReplicatedMergeTree.h>
-#include <Common/HTMLForm.h>
-#include <Common/typeid_cast.h>
 #include <Databases/IDatabase.h>
 #include <IO/HTTPCommon.h>
+#include <Interpreters/Context.h>
+#include <Server/HTTP/HTMLForm.h>
+#include <Server/HTTPHandlerFactory.h>
+#include <Server/HTTPHandlerRequestFilter.h>
+#include <Server/IServer.h>
+#include <Storages/StorageReplicatedMergeTree.h>
+#include <Common/typeid_cast.h>
 
 #include <Poco/Net/HTTPRequestHandlerFactory.h>
 #include <Poco/Net/HTTPServerRequest.h>
 #include <Poco/Net/HTTPServerResponse.h>
-#include <Server/HTTPHandlerFactory.h>
-#include <Server/HTTPHandlerRequestFilter.h>
 
 
 namespace DB
@@ -24,7 +25,7 @@ ReplicasStatusHandler::ReplicasStatusHandler(IServer & server)
 }
 
 
-void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     try
     {
@@ -82,7 +83,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request
         }
 
         if (verbose)
-            response.send() << message.str();
+            *response.send() << message.str();
         else
         {
             const char * data = "Ok.\n";
@@ -100,7 +101,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request
             if (!response.sent())
             {
                 /// We have not sent anything yet and we don't even know if we need to compress response.
-                response.send() << getCurrentExceptionMessage(false) << std::endl;
+                *response.send() << getCurrentExceptionMessage(false) << std::endl;
             }
         }
         catch (...)
@@ -110,9 +111,11 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request
     }
 }
 
-Poco::Net::HTTPRequestHandlerFactory * createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix)
+HTTPRequestHandlerFactoryPtr createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix)
 {
-    return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>(server), server.config(), config_prefix);
+    auto factory = std::make_shared<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
+    factory->addFiltersFromConfig(server.config(), config_prefix);
+    return factory;
 }
 
 }
diff --git a/src/Server/ReplicasStatusHandler.h b/src/Server/ReplicasStatusHandler.h
index a32f1ba905f..8a790b13ad6 100644
--- a/src/Server/ReplicasStatusHandler.h
+++ b/src/Server/ReplicasStatusHandler.h
@@ -1,17 +1,15 @@
 #pragma once
 
-#include "IServer.h"
-
-#include <Poco/Net/HTTPRequestHandler.h>
-
+#include <Server/HTTP/HTTPRequestHandler.h>
 
 namespace DB
 {
 
 class Context;
+class IServer;
 
 /// Replies "Ok.\n" if all replicas on this server don't lag too much. Otherwise output lag information.
-class ReplicasStatusHandler : public Poco::Net::HTTPRequestHandler
+class ReplicasStatusHandler : public HTTPRequestHandler
 {
 private:
     Context & context;
@@ -19,7 +17,7 @@ private:
 public:
     explicit ReplicasStatusHandler(IServer & server_);
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 };
 
 
diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp
index ad2c07ab0aa..f3f564c1cf8 100644
--- a/src/Server/StaticRequestHandler.cpp
+++ b/src/Server/StaticRequestHandler.cpp
@@ -9,7 +9,7 @@
 #include <IO/WriteBufferFromString.h>
 #include <IO/copyData.h>
 #include <IO/WriteHelpers.h>
-#include <IO/WriteBufferFromHTTPServerResponse.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
 #include <Interpreters/Context.h>
 
 #include <Common/Exception.h>
@@ -32,7 +32,8 @@ namespace ErrorCodes
     extern const int INVALID_CONFIG_PARAMETER;
 }
 
-static inline WriteBufferPtr responseWriteBuffer(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, unsigned int keep_alive_timeout)
+static inline WriteBufferPtr
+responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response, unsigned int keep_alive_timeout)
 {
     /// The client can pass a HTTP header indicating supported compression method (gzip or deflate).
     String http_response_compression_methods = request.get("Accept-Encoding", "");
@@ -55,12 +56,15 @@ static inline WriteBufferPtr responseWriteBuffer(Poco::Net::HTTPServerRequest &
     bool client_supports_http_compression = http_response_compression_method != CompressionMethod::None;
 
     return std::make_shared<WriteBufferFromHTTPServerResponse>(
-        request, response, keep_alive_timeout, client_supports_http_compression, http_response_compression_method);
+        response,
+        request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD,
+        keep_alive_timeout,
+        client_supports_http_compression,
+        http_response_compression_method);
 }
 
 static inline void trySendExceptionToClient(
-    const std::string & s, int exception_code,
-    Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response , WriteBuffer & out)
+    const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, WriteBuffer & out)
 {
     try
     {
@@ -69,13 +73,13 @@ static inline void trySendExceptionToClient(
         /// If HTTP method is POST and Keep-Alive is turned on, we should read the whole request body
         /// to avoid reading part of the current request body in the next request.
         if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST
-            && response.getKeepAlive() && !request.stream().eof() && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED)
-            request.stream().ignore(std::numeric_limits<std::streamsize>::max());
+            && response.getKeepAlive() && !request.getStream().eof() && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED)
+            request.getStream().ignore(std::numeric_limits<std::streamsize>::max());
 
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
 
         if (!response.sent())
-            response.send() << s << std::endl;
+            *response.send() << s << std::endl;
         else
         {
             if (out.count() != out.offset())
@@ -94,7 +98,7 @@ static inline void trySendExceptionToClient(
     }
 }
 
-void StaticRequestHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", 10);
     const auto & out = responseWriteBuffer(request, response, keep_alive_timeout);
@@ -159,14 +163,17 @@ StaticRequestHandler::StaticRequestHandler(IServer & server_, const String & exp
 {
 }
 
-Poco::Net::HTTPRequestHandlerFactory * createStaticHandlerFactory(IServer & server, const std::string & config_prefix)
+HTTPRequestHandlerFactoryPtr createStaticHandlerFactory(IServer & server, const std::string & config_prefix)
 {
     int status = server.config().getInt(config_prefix + ".handler.status", 200);
     std::string response_content = server.config().getRawString(config_prefix + ".handler.response_content", "Ok.\n");
     std::string response_content_type = server.config().getString(config_prefix + ".handler.content_type", "text/plain; charset=UTF-8");
+    auto factory = std::make_shared<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(
+        server, std::move(response_content), std::move(status), std::move(response_content_type));
 
-    return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(
-        server, std::move(response_content), std::move(status), std::move(response_content_type)), server.config(), config_prefix);
+    factory->addFiltersFromConfig(server.config(), config_prefix);
+
+    return factory;
 }
 
 }
diff --git a/src/Server/StaticRequestHandler.h b/src/Server/StaticRequestHandler.h
index 0a29384ad0e..56c7f5a6d44 100644
--- a/src/Server/StaticRequestHandler.h
+++ b/src/Server/StaticRequestHandler.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Poco/Net/HTTPRequestHandler.h>
+#include <Server/HTTP/HTTPRequestHandler.h>
 #include <common/types.h>
 
 
@@ -11,7 +11,7 @@ class IServer;
 class WriteBuffer;
 
 /// Response with custom string. Can be used for browser.
-class StaticRequestHandler : public Poco::Net::HTTPRequestHandler
+class StaticRequestHandler : public HTTPRequestHandler
 {
 private:
     IServer & server;
@@ -29,7 +29,7 @@ public:
 
     void writeResponse(WriteBuffer & out);
 
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 };
 
 }
diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp
index 6159a27971f..fb8ff71611e 100644
--- a/src/Server/WebUIRequestHandler.cpp
+++ b/src/Server/WebUIRequestHandler.cpp
@@ -18,18 +18,18 @@ WebUIRequestHandler::WebUIRequestHandler(IServer & server_, std::string resource
 }
 
 
-void WebUIRequestHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
+void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
     auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", 10);
 
     response.setContentType("text/html; charset=UTF-8");
 
-    if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1)
+    if (request.getVersion() == HTTPServerRequest::HTTP_1_1)
         response.setChunkedTransferEncoding(true);
 
     setResponseDefaultHeaders(response, keep_alive_timeout);
     response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
-    response.send() << getResource(resource_name);
+    *response.send() << getResource(resource_name);
 }
 
 }
diff --git a/src/Server/WebUIRequestHandler.h b/src/Server/WebUIRequestHandler.h
index 3066b86b36a..1c52b626091 100644
--- a/src/Server/WebUIRequestHandler.h
+++ b/src/Server/WebUIRequestHandler.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Poco/Net/HTTPRequestHandler.h>
+#include <Server/HTTP/HTTPRequestHandler.h>
 
 
 namespace DB
@@ -9,14 +9,14 @@ namespace DB
 class IServer;
 
 /// Response with HTML page that allows to send queries and show results in browser.
-class WebUIRequestHandler : public Poco::Net::HTTPRequestHandler
+class WebUIRequestHandler : public HTTPRequestHandler
 {
 private:
     IServer & server;
     std::string resource_name;
 public:
     WebUIRequestHandler(IServer & server_, std::string resource_name_);
-    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 };
 
 }
diff --git a/src/Server/ya.make b/src/Server/ya.make
index a0269e9ac84..ef5ef6d5f57 100644
--- a/src/Server/ya.make
+++ b/src/Server/ya.make
@@ -11,6 +11,14 @@ PEERDIR(
 
 SRCS(
     GRPCServer.cpp
+    HTTP/HTMLForm.cpp
+    HTTP/HTTPServer.cpp
+    HTTP/HTTPServerConnection.cpp
+    HTTP/HTTPServerConnectionFactory.cpp
+    HTTP/HTTPServerRequest.cpp
+    HTTP/HTTPServerResponse.cpp
+    HTTP/ReadHeaders.cpp
+    HTTP/WriteBufferFromHTTPServerResponse.cpp
     HTTPHandler.cpp
     HTTPHandlerFactory.cpp
     InterserverIOHTTPHandler.cpp
diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index e01e7793dd3..f80020991b0 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -1,17 +1,20 @@
 #include <Storages/MergeTree/DataPartsExchange.h>
+
+#include <DataStreams/NativeBlockOutputStream.h>
+#include <Disks/SingleDiskVolume.h>
+#include <Disks/createVolume.h>
+#include <IO/HTTPCommon.h>
+#include <Server/HTTP/HTMLForm.h>
+#include <Server/HTTP/HTTPServerResponse.h>
 #include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
-#include <Disks/createVolume.h>
-#include <Disks/SingleDiskVolume.h>
+#include <Storages/MergeTree/ReplicatedFetchList.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/NetException.h>
-#include <DataStreams/NativeBlockOutputStream.h>
-#include <IO/HTTPCommon.h>
 #include <ext/scope_guard.h>
+
 #include <Poco/File.h>
-#include <Poco/Net/HTTPServerResponse.h>
 #include <Poco/Net/HTTPRequest.h>
-#include <Storages/MergeTree/ReplicatedFetchList.h>
 
 
 namespace CurrentMetrics
@@ -83,7 +86,7 @@ std::string Service::getId(const std::string & node_id) const
     return getEndpointId(node_id);
 }
 
-void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*body*/, WriteBuffer & out, Poco::Net::HTTPServerResponse & response)
+void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, WriteBuffer & out, HTTPServerResponse & response)
 {
     int client_protocol_version = parse<int>(params.get("client_protocol_version", "0"));
 
diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h
index 0a359474d2d..834fed1182f 100644
--- a/src/Storages/MergeTree/DataPartsExchange.h
+++ b/src/Storages/MergeTree/DataPartsExchange.h
@@ -20,21 +20,19 @@ namespace DataPartsExchange
 class Service final : public InterserverIOEndpoint
 {
 public:
-    Service(MergeTreeData & data_)
-    : data(data_), log(&Poco::Logger::get(data.getLogName() + " (Replicated PartsService)")) {}
+    explicit Service(MergeTreeData & data_) : data(data_), log(&Poco::Logger::get(data.getLogName() + " (Replicated PartsService)")) {}
 
     Service(const Service &) = delete;
     Service & operator=(const Service &) = delete;
 
     std::string getId(const std::string & node_id) const override;
-    void processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body, WriteBuffer & out, Poco::Net::HTTPServerResponse & response) override;
+    void processQuery(const HTMLForm & params, ReadBuffer & body, WriteBuffer & out, HTTPServerResponse & response) override;
 
 private:
     MergeTreeData::DataPartPtr findPart(const String & name);
     void sendPartFromMemory(const MergeTreeData::DataPartPtr & part, WriteBuffer & out);
     void sendPartFromDisk(const MergeTreeData::DataPartPtr & part, WriteBuffer & out, int client_protocol_version);
 
-private:
     /// StorageReplicatedMergeTree::shutdown() waits for all parts exchange handlers to finish,
     /// so Service will never access dangling reference to storage
     MergeTreeData & data;
@@ -43,13 +41,10 @@ private:
 
 /** Client for getting the parts from the table *MergeTree.
   */
-class Fetcher final
+class Fetcher final : private boost::noncopyable
 {
 public:
-    Fetcher(MergeTreeData & data_) : data(data_), log(&Poco::Logger::get("Fetcher")) {}
-
-    Fetcher(const Fetcher &) = delete;
-    Fetcher & operator=(const Fetcher &) = delete;
+    explicit Fetcher(MergeTreeData & data_) : data(data_), log(&Poco::Logger::get("Fetcher")) {}
 
     /// Downloads a part to tmp_directory. If to_detached - downloads to the `detached` directory.
     MergeTreeData::MutableDataPartPtr fetchPart(
@@ -75,7 +70,7 @@ private:
             bool to_detached,
             const String & tmp_prefix_,
             bool sync,
-            const ReservationPtr reservation,
+            ReservationPtr reservation,
             PooledReadWriteBufferFromHTTP & in);
 
     MergeTreeData::MutableDataPartPtr downloadPartToMemory(
diff --git a/tests/queries/query_test.py b/tests/queries/query_test.py
index 3dea639187e..417a51fe523 100644
--- a/tests/queries/query_test.py
+++ b/tests/queries/query_test.py
@@ -33,7 +33,7 @@ SKIP_LIST = [
     "01057_http_compression_prefer_brotli",
     "01080_check_for_error_incorrect_size_of_nested_column",
     "01083_expressions_in_engine_arguments",
-    "01086_odbc_roundtrip",
+    # "01086_odbc_roundtrip",
     "01088_benchmark_query_id",
     "01098_temporary_and_external_tables",
     "01099_parallel_distributed_insert_select",

From ea27c3ca32bdf9a18e90d75bf38bbc725c6db4db Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 19 Feb 2021 16:41:46 +0300
Subject: [PATCH 1176/1238] Add gdb to fasttest image

---
 docker/test/fasttest/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile
index 03b7b2fc53a..64be52d8e30 100644
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@@ -47,6 +47,7 @@ RUN apt-get update \
         expect \
         fakeroot \
         git \
+        gdb \
         gperf \
         lld-${LLVM_VERSION} \
         llvm-${LLVM_VERSION} \

From b84112a6039589c9a5e2399d4b0efc14d4adf1fc Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Fri, 19 Feb 2021 19:25:50 +0300
Subject: [PATCH 1177/1238] Function sumMap decimal fix

---
 src/AggregateFunctions/AggregateFunctionSumMap.h |  7 ++++++-
 .../queries/0_stateless/00502_sum_map.reference  |  2 ++
 tests/queries/0_stateless/00502_sum_map.sql      | 16 ++++++++++++++++
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h
index 3079da36cda..f88a1468732 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.h
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.h
@@ -115,7 +115,12 @@ public:
                         "Values for {} are expected to be Numeric, Float or Decimal, passed type {}",
                         getName(), value_type->getName()};
 
-                result_type = value_type_without_nullable->promoteNumericType();
+                WhichDataType value_type_to_check(value_type);
+
+                if (value_type_to_check.isDecimal())
+                    result_type = value_type_without_nullable;
+                else
+                    result_type = value_type_without_nullable->promoteNumericType();
             }
 
             types.emplace_back(std::make_shared<DataTypeArray>(result_type));
diff --git a/tests/queries/0_stateless/00502_sum_map.reference b/tests/queries/0_stateless/00502_sum_map.reference
index 0002c43945a..c38fb2ec7d6 100644
--- a/tests/queries/0_stateless/00502_sum_map.reference
+++ b/tests/queries/0_stateless/00502_sum_map.reference
@@ -22,3 +22,5 @@
 ([1.01],[1])
 (['a','b'],[1,2])
 (['a','ab','abc'],[3,2,1])
+([1,2,3,4,5,6,7,8],[1.00000,2.00000,6.00000,8.00000,10.00000,12.00000,7.00000,8.00000])
+([1,2,3,4,5,6,7,8],[1.00000,2.00000,6.00000,8.00000,10.00000,12.00000,7.00000,8.00000])
diff --git a/tests/queries/0_stateless/00502_sum_map.sql b/tests/queries/0_stateless/00502_sum_map.sql
index 021aaf3cd3b..51007a9c78a 100644
--- a/tests/queries/0_stateless/00502_sum_map.sql
+++ b/tests/queries/0_stateless/00502_sum_map.sql
@@ -38,3 +38,19 @@ select sumMap(val, cnt) from ( SELECT [ CAST(1.01, 'Decimal(10,2)') ] as val, [1
 
 select sumMap(val, cnt) from ( SELECT [ CAST('a', 'FixedString(1)'), CAST('b', 'FixedString(1)' ) ] as val, [1, 2] as cnt );
 select sumMap(val, cnt) from ( SELECT [ CAST('abc', 'String'), CAST('ab', 'String'), CAST('a', 'String') ] as val, [1, 2, 3] as cnt );
+
+DROP TABLE IF EXISTS sum_map_decimal;
+
+CREATE TABLE sum_map_decimal(
+    statusMap Nested(
+        goal_id UInt16,
+        revenue Decimal32(5)
+    )
+) ENGINE = Log;
+
+INSERT INTO sum_map_decimal VALUES ([1, 2, 3], [1.0, 2.0, 3.0]), ([3, 4, 5], [3.0, 4.0, 5.0]), ([4, 5, 6], [4.0, 5.0, 6.0]), ([6, 7, 8], [6.0, 7.0, 8.0]);
+
+SELECT sumMap(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal;
+SELECT sumMapWithOverflow(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal;
+
+DROP TABLE sum_map_decimal;

From 0b5213c80d52595eb66ce8a992381073ac290e9a Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 19 Feb 2021 19:49:19 +0300
Subject: [PATCH 1178/1238] Added comment

---
 src/AggregateFunctions/AggregateFunctionSumMap.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h
index f88a1468732..9c2cdb41844 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.h
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.h
@@ -116,7 +116,9 @@ public:
                         getName(), value_type->getName()};
 
                 WhichDataType value_type_to_check(value_type);
-
+                
+                /// Do not promote decimal because of implementation issues of this function design
+                /// If we decide to make this function more efficient we should promote decimal type during summ
                 if (value_type_to_check.isDecimal())
                     result_type = value_type_without_nullable;
                 else

From fc03c1013cc73094ebb592623c60037acd196410 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 19 Feb 2021 20:42:51 +0300
Subject: [PATCH 1179/1238] Fixed style check

---
 src/AggregateFunctions/AggregateFunctionSumMap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h
index 9c2cdb41844..f6a473546f9 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.h
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.h
@@ -116,7 +116,7 @@ public:
                         getName(), value_type->getName()};
 
                 WhichDataType value_type_to_check(value_type);
-                
+
                 /// Do not promote decimal because of implementation issues of this function design
                 /// If we decide to make this function more efficient we should promote decimal type during summ
                 if (value_type_to_check.isDecimal())

From 252bcccddaed5729e2a02fbd610209e0f7de5543 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 19 Feb 2021 21:32:39 +0300
Subject: [PATCH 1180/1238] Just little better

---
 src/Interpreters/Aggregator.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index 8040091256c..abff6f21acf 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -558,7 +558,7 @@ void NO_INLINE Aggregator::executeImplBatch(
 
     /// Generic case.
 
-    PODArray<AggregateDataPtr> places(rows);
+    std::unique_ptr<AggregateDataPtr[]> places(new AggregateDataPtr[rows]);
 
     /// For all rows.
     for (size_t i = 0; i < rows; ++i)
@@ -589,9 +589,9 @@ void NO_INLINE Aggregator::executeImplBatch(
     for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
     {
         if (inst->offsets)
-            inst->batch_that->addBatchArray(rows, places.data(), inst->state_offset, inst->batch_arguments, inst->offsets, aggregates_pool);
+            inst->batch_that->addBatchArray(rows, places.get(), inst->state_offset, inst->batch_arguments, inst->offsets, aggregates_pool);
         else
-            inst->batch_that->addBatch(rows, places.data(), inst->state_offset, inst->batch_arguments, aggregates_pool);
+            inst->batch_that->addBatch(rows, places.get(), inst->state_offset, inst->batch_arguments, aggregates_pool);
     }
 }
 

From 66e775ef8811f1d1bba30a4369872b8ae04e0c54 Mon Sep 17 00:00:00 2001
From: Denis Zhuravlev <denis.zhuravlov@gmail.com>
Date: Fri, 19 Feb 2021 14:53:34 -0400
Subject: [PATCH 1181/1238] test for decimal ( p , s) in dictionaries

---
 .../01721_dictionary_decimal_p_s.reference    | 10 +++
 .../01721_dictionary_decimal_p_s.sql          | 78 +++++++++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 tests/queries/0_stateless/01721_dictionary_decimal_p_s.reference
 create mode 100644 tests/queries/0_stateless/01721_dictionary_decimal_p_s.sql

diff --git a/tests/queries/0_stateless/01721_dictionary_decimal_p_s.reference b/tests/queries/0_stateless/01721_dictionary_decimal_p_s.reference
new file mode 100644
index 00000000000..066b4bd1d97
--- /dev/null
+++ b/tests/queries/0_stateless/01721_dictionary_decimal_p_s.reference
@@ -0,0 +1,10 @@
+-------- 42 --------
+42	14.0000	14.00000000	14.00000000	14.0000000000000000618637523926765281280
+42	14.0000	14.00000000	14.00000000
+14.0000	14.00000000	14.00000000
+-------- 4999 --------
+4999	1666.3333	1666.33333333	1666.33333333	1633.3553612205046244471093725648757194800
+4999	1666.3333	1666.33333333	1666.33333333
+1666.3333	1666.33333333	1666.33333333
+-------- 5000 --------
+0.1100	0.11000000	0.11000000
diff --git a/tests/queries/0_stateless/01721_dictionary_decimal_p_s.sql b/tests/queries/0_stateless/01721_dictionary_decimal_p_s.sql
new file mode 100644
index 00000000000..0451d455009
--- /dev/null
+++ b/tests/queries/0_stateless/01721_dictionary_decimal_p_s.sql
@@ -0,0 +1,78 @@
+set allow_experimental_bigint_types=1;
+drop database if exists db_01721;
+drop table if exists db_01721.table_decimal_dict;
+drop dictionary if exists db_01721.decimal_dict;
+
+
+create database db_01721;
+
+CREATE TABLE db_01721.table_decimal_dict(
+KeyField UInt64,
+Decimal32_ Decimal(5,4),
+Decimal64_ Decimal(18,8),
+Decimal128_ Decimal(25,8),
+Decimal256_ Decimal(76,37)
+)
+ENGINE = Memory;
+
+insert into db_01721.table_decimal_dict
+select number,
+       number / 3,
+       number / 3,
+       number / 3,
+       number / 3
+from numbers(5000);
+
+
+CREATE DICTIONARY IF NOT EXISTS db_01721.decimal_dict (
+	KeyField UInt64 DEFAULT 9999999,
+	Decimal32_ Decimal(5,4) DEFAULT 0.11,
+	Decimal64_ Decimal(18,8) DEFAULT 0.11,
+	Decimal128_ Decimal(25,8) DEFAULT 0.11
+--	,Decimal256_ Decimal256(37) DEFAULT 0.11
+)
+PRIMARY KEY KeyField
+SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_decimal_dict' DB 'db_01721'))
+LIFETIME(0) LAYOUT(SPARSE_HASHED);
+
+select '-------- 42 --------';
+
+SELECT * from db_01721.table_decimal_dict where KeyField = 42;
+
+SELECT * from db_01721.decimal_dict	where KeyField = 42;
+
+SELECT dictGet('db_01721.decimal_dict', 'Decimal32_', toUInt64(42)),
+       dictGet('db_01721.decimal_dict', 'Decimal64_', toUInt64(42)),
+       dictGet('db_01721.decimal_dict', 'Decimal128_', toUInt64(42))
+       -- ,dictGet('db_01721.decimal_dict', 'Decimal256_', toUInt64(42))
+;
+
+
+select '-------- 4999 --------';
+
+SELECT * from db_01721.table_decimal_dict where KeyField = 4999;
+
+SELECT * from db_01721.decimal_dict	where KeyField = 4999;
+
+SELECT dictGet('db_01721.decimal_dict', 'Decimal32_', toUInt64(4999)),
+       dictGet('db_01721.decimal_dict', 'Decimal64_', toUInt64(4999)),
+       dictGet('db_01721.decimal_dict', 'Decimal128_', toUInt64(4999))
+       --,dictGet('db_01721.decimal_dict', 'Decimal256_', toUInt64(4999))
+;
+
+select '-------- 5000 --------';
+
+SELECT * from db_01721.table_decimal_dict where KeyField = 5000;
+
+SELECT * from db_01721.decimal_dict	where KeyField = 5000;
+
+SELECT dictGet('db_01721.decimal_dict', 'Decimal32_', toUInt64(5000)),
+       dictGet('db_01721.decimal_dict', 'Decimal64_', toUInt64(5000)),
+       dictGet('db_01721.decimal_dict', 'Decimal128_', toUInt64(5000))
+       --,dictGet('db_01721.decimal_dict', 'Decimal256_', toUInt64(5000))
+;
+
+drop table if exists table_decimal_dict;
+drop dictionary if exists cache_dict;
+drop database if exists db_01721;
+

From fba1c7fcc165b1d84907a4a1ee37c809307cbf32 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 19 Feb 2021 21:48:58 +0300
Subject: [PATCH 1182/1238] Fix uncaught exception when HTTP client goes away

Even after #20464 it was still possible, for example [1].

    2021.02.19 11:40:21.886191 [ 68373 ] {} <Trace> DynamicQueryHandler: Request URI: /?database=test_ds2d6y&log_comment=/usr/share/clickhouse-test/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh&enable_http_compression=1&http_zlib_compression_level=1

    <snip>

    2021.02.19 11:41:35.289940 [ 365 ] {} <Fatal> BaseDaemon: (version 21.3.1.6058, build id: 8D46D65205E2C8B7FE408A0B4EC76CA0483F9E92) (from thread 68373) Terminate called for uncaught exception:
    Code: 24, e.displayText() = DB::Exception: Cannot write to ostream at offset 262568, Stack trace (when copying this message, always include the lines below):

    0. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/exception:0: Poco::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int) @ 0x15b3c7db in /usr/bin/clickhouse
    1. ./obj-x86_64-linux-gnu/../src/Common/Exception.cpp:56: DB::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int, bool) @ 0x8aba66e in /usr/bin/clickhouse
    2. ./obj-x86_64-linux-gnu/../src/IO/WriteBufferFromOStream.cpp:0: DB::WriteBufferFromOStream::nextImpl() @ 0x8b8c105 in /usr/bin/clickhouse
    3. ./obj-x86_64-linux-gnu/../src/IO/BufferBase.h:39: DB::WriteBufferFromOStream::~WriteBufferFromOStream() @ 0x8b8c537 in /usr/bin/clickhouse
    4. ./obj-x86_64-linux-gnu/../src/IO/WriteBufferFromOStream.cpp:44: DB::Write

  [1]: https://clickhouse-test-reports.s3.yandex.net/16481/5d150cce4778dd14f58dcff67435bdec1efa155b/stress_test_(thread).html#fail1

And according to this partial stacktrace it seems that the dtor of
WriteBufferFromOStream was called from
WriteBufferFromHTTPServerResponse, since the class name starts from
DB::Write*

The problem is that if first time WriteBufferFromOStream::next() fails,
it will reset position to make next write no-op, however
WriteBufferFromHTTPServerResponse::next() will set position to available
buffer back, and next() will throw again, but this time it can be from
dtor.
---
 .../HTTP/WriteBufferFromHTTPServerResponse.cpp       | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
index 86133fc2ffe..81f8cc30468 100644
--- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
+++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
@@ -168,12 +168,18 @@ void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress)
 
 void WriteBufferFromHTTPServerResponse::finalize()
 {
-    next();
-    if (out)
+    try
     {
-        out->next();
+        next();
         out.reset();
     }
+    catch (...)
+    {
+        /// Avoid calling WriteBufferFromOStream::next() from dtor
+        /// (via WriteBufferFromHTTPServerResponse::next())
+        out.reset();
+        throw;
+    }
 
     if (!offset())
     {

From 0f77b6fd9585303162c5386a5b660d5448470d26 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 19 Feb 2021 22:01:45 +0300
Subject: [PATCH 1183/1238] Even more better

---
 src/Interpreters/AggregationCommon.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h
index aafec9a7929..e896b0e14df 100644
--- a/src/Interpreters/AggregationCommon.h
+++ b/src/Interpreters/AggregationCommon.h
@@ -271,9 +271,13 @@ static T inline packFixedShuffle(
     size_t idx,
     const uint8_t * __restrict masks)
 {
-    __m128i res{};
+    assert(num_srcs > 0);
 
-    for (size_t i = 0; i < num_srcs; ++i)
+    __m128i res = _mm_shuffle_epi8(
+        _mm_loadu_si128(reinterpret_cast<const __m128i *>(srcs[0] + elem_sizes[0] * idx)),
+        _mm_loadu_si128(reinterpret_cast<const __m128i *>(masks)));
+
+    for (size_t i = 1; i < num_srcs; ++i)
     {
         res = _mm_xor_si128(res,
             _mm_shuffle_epi8(

From 7474a7e3ca139f1a4e88e83af011b304ebdcaf3c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 19 Feb 2021 22:42:40 +0300
Subject: [PATCH 1184/1238] Increase buffer for uncaught exception /
 std::terminate

Use PIPE_BUF over some magic number 1024 in terminate_handler, since
according to pipe(7):

    PIPE_BUF
           POSIX.1  says  that  write(2)s of less than PIPE_BUF bytes must be atomic

Also note that 1024, is too small, especially for C++ stacktraces (and
especially for debug builds, that contains lots of non-inlined helpers
for various ptrs).
---
 base/daemon/BaseDaemon.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp
index db7019d3572..248ffdd4d10 100644
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@@ -416,7 +416,7 @@ static void sanitizerDeathCallback()
     else
         log_message = "Terminate called without an active exception";
 
-    static const size_t buf_size = 1024;
+    static const size_t buf_size = PIPE_BUF;
 
     if (log_message.size() > buf_size - 16)
         log_message.resize(buf_size - 16);

From f5893778cbf6544cb1a6b2d92d21248674bc864a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 00:01:13 +0300
Subject: [PATCH 1185/1238] Do not use view() in 01731_async_task_queue_wait to
 fix ANTLR parser

---
 tests/queries/0_stateless/01731_async_task_queue_wait.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01731_async_task_queue_wait.sh b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
index 89d8b63d745..2f77628fc6d 100755
--- a/tests/queries/0_stateless/01731_async_task_queue_wait.sh
+++ b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
@@ -7,4 +7,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # regression for 'Empty task was returned from async task queue' during query
 # cancellation with async_socket_for_remote=1 (that ignores
 # max_distributed_connections)
-$(timeout --signal=SIGINT 1 ${CLICKHOUSE_CLIENT} --max_distributed_connections=1 --max_block_size=2  --interactive_delay=900000 -q "select x  from remote('127.{2,3}', view(select number + sleep(0.3) as x from numbers(16))) settings max_block_size = 2") 2>&1 | grep "Empty task was returned from async task queue" || true
+$(timeout --signal=SIGINT 1 ${CLICKHOUSE_CLIENT} --max_distributed_connections=1 --max_block_size=2  --interactive_delay=900000 -q "select number + sleep(0.3) as x from remote('127.{2,3}', system.numbers) settings max_block_size = 2") 2>&1 | grep "Empty task was returned from async task queue" || true

From d0fe8900f980167530a0e1be56dd0cd219c6f08a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 00:04:28 +0300
Subject: [PATCH 1186/1238] Fix bash syntax in 01731_async_task_queue_wait

---
 tests/queries/0_stateless/01731_async_task_queue_wait.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01731_async_task_queue_wait.sh b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
index 2f77628fc6d..e0babf3c6ff 100755
--- a/tests/queries/0_stateless/01731_async_task_queue_wait.sh
+++ b/tests/queries/0_stateless/01731_async_task_queue_wait.sh
@@ -7,4 +7,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # regression for 'Empty task was returned from async task queue' during query
 # cancellation with async_socket_for_remote=1 (that ignores
 # max_distributed_connections)
-$(timeout --signal=SIGINT 1 ${CLICKHOUSE_CLIENT} --max_distributed_connections=1 --max_block_size=2  --interactive_delay=900000 -q "select number + sleep(0.3) as x from remote('127.{2,3}', system.numbers) settings max_block_size = 2") 2>&1 | grep "Empty task was returned from async task queue" || true
+timeout --signal=SIGINT 1 ${CLICKHOUSE_CLIENT} --max_distributed_connections=1 --max_block_size=2  --interactive_delay=900000 -q "select number + sleep(0.3) as x from remote('127.{2,3}', system.numbers) settings max_block_size = 2" 2>&1 | grep "Empty task was returned from async task queue" || true

From 2f7d0ba92677f595b1d760af2a826cc6fa181802 Mon Sep 17 00:00:00 2001
From: M0r64n <M0r64n1635@gmail.com>
Date: Sat, 20 Feb 2021 03:27:23 +0400
Subject: [PATCH 1187/1238] Replace direct truncate with O_TRUNC flag

---
 src/Storages/StorageFile.cpp                     | 16 ++++++++++------
 .../01721_engine_file_truncate_on_insert.sql     |  4 ++--
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 856d03ea2ce..5524569e1f0 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -475,7 +475,8 @@ public:
         std::unique_lock<std::shared_timed_mutex> && lock_,
         const CompressionMethod compression_method,
         const Context & context,
-        const std::optional<FormatSettings> & format_settings)
+        const std::optional<FormatSettings> & format_settings,
+        int & flags)
         : storage(storage_)
         , metadata_snapshot(metadata_snapshot_)
         , lock(std::move(lock_))
@@ -491,13 +492,14 @@ public:
               * INSERT data; SELECT *; last SELECT returns only insert_data
               */
             storage.table_fd_was_used = true;
-            naked_buffer = std::make_unique<WriteBufferFromFileDescriptor>(storage.table_fd);
+            naked_buffer = std::make_unique<WriteBufferFromFileDescriptor>(storage.table_fd, DBMS_DEFAULT_BUFFER_SIZE);
         }
         else
         {
             if (storage.paths.size() != 1)
                 throw Exception("Table '" + storage.getStorageID().getNameForLogs() + "' is in readonly mode because of globs in filepath", ErrorCodes::DATABASE_ACCESS_DENIED);
-            naked_buffer = std::make_unique<WriteBufferFromFile>(storage.paths[0], DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT);
+            flags |= O_WRONLY | O_APPEND | O_CREAT;
+            naked_buffer = std::make_unique<WriteBufferFromFile>(storage.paths[0], DBMS_DEFAULT_BUFFER_SIZE, flags);
         }
 
         /// In case of CSVWithNames we have already written prefix.
@@ -552,10 +554,11 @@ BlockOutputStreamPtr StorageFile::write(
     if (format_name == "Distributed")
         throw Exception("Method write is not implemented for Distributed format", ErrorCodes::NOT_IMPLEMENTED);
 
+    int flags = 0;
+
     std::string path;
     if (context.getSettingsRef().engine_file_truncate_on_insert)
-        if (0 != ::truncate(paths[0].c_str(), 0))
-            throwFromErrnoWithPath("Cannot truncate file " + paths[0], paths[0], ErrorCodes::CANNOT_TRUNCATE_FILE);
+        flags |= O_TRUNC;
 
     if (!paths.empty())
     {
@@ -569,7 +572,8 @@ BlockOutputStreamPtr StorageFile::write(
         std::unique_lock{rwlock, getLockTimeout(context)},
         chooseCompressionMethod(path, compression_method),
         context,
-        format_settings);
+        format_settings,
+        flags);
 }
 
 bool StorageFile::storesDataOnDisk() const
diff --git a/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql
index 42d935cc0dd..079b2546a20 100644
--- a/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql
+++ b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql
@@ -1,7 +1,7 @@
 DROP TABLE IF EXISTS test;
 
-INSERT INTO TABLE FUNCTION file('01718_file/test/data.TSV', 'TSV', 'id UInt32') VALUES (1);
-ATTACH TABLE test FROM '01718_file/test' (id UInt8) ENGINE=File(TSV);
+INSERT INTO TABLE FUNCTION file('01721_file/test/data.TSV', 'TSV', 'id UInt32') VALUES (1);
+ATTACH TABLE test FROM '01721_file/test' (id UInt8) ENGINE=File(TSV);
 
 INSERT INTO test VALUES (2), (3);
 INSERT INTO test VALUES (4);

From 2a36d6cb55af14b0dcf87c1b806afbf5c7dec8be Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Sat, 20 Feb 2021 02:41:58 +0300
Subject: [PATCH 1188/1238] review suggestions

---
 src/Common/ZooKeeper/ZooKeeper.h             |  2 +-
 src/Databases/DatabaseAtomic.cpp             | 11 ++-
 src/Databases/DatabaseFactory.cpp            | 15 +++-
 src/Databases/DatabaseReplicated.cpp         | 79 ++++++++++----------
 src/Databases/DatabaseReplicated.h           |  9 ++-
 src/Databases/DatabaseReplicatedSettings.cpp | 23 ++++++
 src/Databases/DatabaseReplicatedSettings.h   | 26 +++++++
 src/Databases/DatabaseReplicatedWorker.cpp   | 13 ++--
 src/Databases/DatabaseReplicatedWorker.h     | 12 +++
 src/Databases/DatabaseWithDictionaries.cpp   |  4 +-
 src/Databases/ya.make                        |  1 +
 src/Interpreters/Context.cpp                 |  4 +-
 src/Interpreters/Context.h                   | 12 +--
 src/Interpreters/DDLTask.cpp                 | 26 +++----
 src/Interpreters/DDLTask.h                   | 41 ++++++++--
 src/Interpreters/DDLWorker.cpp               | 47 ++++++------
 src/Interpreters/DDLWorker.h                 |  4 +-
 src/Interpreters/InterpreterAlterQuery.cpp   |  2 +-
 src/Interpreters/InterpreterCreateQuery.cpp  |  4 +-
 src/Interpreters/InterpreterDropQuery.cpp    |  4 +-
 src/Interpreters/InterpreterRenameQuery.cpp  |  2 +-
 src/Storages/StorageMaterializedView.cpp     |  4 +-
 src/Storages/StorageReplicatedMergeTree.cpp  | 10 +--
 tests/queries/skip_list.json                 |  1 +
 24 files changed, 232 insertions(+), 124 deletions(-)
 create mode 100644 src/Databases/DatabaseReplicatedSettings.cpp
 create mode 100644 src/Databases/DatabaseReplicatedSettings.h

diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index fbe1bede91a..5b37e4d6024 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -315,7 +315,7 @@ public:
         return std::make_shared<EphemeralNodeHolder>(path, zookeeper, false, false, "");
     }
 
-    void reset()
+    void setAlreadyRemoved()
     {
         need_remove = false;
     }
diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index 2065e036863..71e0effb2d2 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -115,11 +115,14 @@ void DatabaseAtomic::dropTable(const Context & context, const String & table_nam
         std::unique_lock lock(mutex);
         table = getTableUnlocked(table_name, lock);
         table_metadata_path_drop = DatabaseCatalog::instance().getPathForDroppedMetadata(table->getStorageID());
-        auto txn = context.getMetadataTransaction();
+        auto txn = context.getZooKeeperMetadataTransaction();
         if (txn && !context.isInternalSubquery())
             txn->commit();      /// Commit point (a sort of) for Replicated database
 
         /// NOTE: replica will be lost if server crashes before the following rename
+        /// We apply changes in ZooKeeper before applying changes in local metadata file
+        /// to reduce probability of failures between these operations
+        /// (it's more likely to lost connection, than to fail before applying local changes).
         /// TODO better detection and recovery
 
         Poco::File(table_metadata_path).renameTo(table_metadata_path_drop);    /// Mark table as dropped
@@ -241,7 +244,7 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
     }
 
     /// Table renaming actually begins here
-    auto txn = context.getMetadataTransaction();
+    auto txn = context.getZooKeeperMetadataTransaction();
     if (txn && !context.isInternalSubquery())
         txn->commit();     /// Commit point (a sort of) for Replicated database
 
@@ -302,7 +305,7 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora
         DatabaseCatalog::instance().addUUIDMapping(query.uuid);
         locked_uuid = true;
 
-        auto txn = query_context.getMetadataTransaction();
+        auto txn = query_context.getZooKeeperMetadataTransaction();
         if (txn && !query_context.isInternalSubquery())
             txn->commit();     /// Commit point (a sort of) for Replicated database
 
@@ -337,7 +340,7 @@ void DatabaseAtomic::commitAlterTable(const StorageID & table_id, const String &
     if (table_id.uuid != actual_table_id.uuid)
         throw Exception("Cannot alter table because it was renamed", ErrorCodes::CANNOT_ASSIGN_ALTER);
 
-    auto txn = query_context.getMetadataTransaction();
+    auto txn = query_context.getZooKeeperMetadataTransaction();
     if (txn && !query_context.isInternalSubquery())
         txn->commit();      /// Commit point (a sort of) for Replicated database
 
diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp
index ca2b9bb083e..cd0143556c9 100644
--- a/src/Databases/DatabaseFactory.cpp
+++ b/src/Databases/DatabaseFactory.cpp
@@ -103,8 +103,11 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
     if (engine_define->engine->arguments && !engine_may_have_arguments)
         throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS);
 
-    if (engine_define->engine->parameters || engine_define->partition_by || engine_define->primary_key || engine_define->order_by ||
-        engine_define->sample_by || (!endsWith(engine_name, "MySQL") && engine_define->settings))
+    bool has_unexpected_element = engine_define->engine->parameters || engine_define->partition_by ||
+                                  engine_define->primary_key || engine_define->order_by ||
+                                  engine_define->sample_by;
+    bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated";
+    if (has_unexpected_element || (!may_have_settings && engine_define->settings))
         throw Exception("Database engine " + engine_name + " cannot have parameters, primary_key, order_by, sample_by, settings",
                         ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
 
@@ -205,7 +208,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
         shard_name = context.getMacros()->expand(shard_name);
         replica_name = context.getMacros()->expand(replica_name);
 
-        return std::make_shared<DatabaseReplicated>(database_name, metadata_path, uuid, zookeeper_path, shard_name, replica_name, context);
+        DatabaseReplicatedSettings database_replicated_settings{};
+        if (engine_define->settings)
+            database_replicated_settings.loadFromQuery(*engine_define);
+
+        return std::make_shared<DatabaseReplicated>(database_name, metadata_path, uuid,
+                                                    zookeeper_path, shard_name, replica_name,
+                                                    std::move(database_replicated_settings), context);
     }
 
 #if USE_LIBPQXX
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 441880ae616..12cff3407d3 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -63,11 +63,13 @@ DatabaseReplicated::DatabaseReplicated(
     const String & zookeeper_path_,
     const String & shard_name_,
     const String & replica_name_,
+    DatabaseReplicatedSettings db_settings_,
     const Context & context_)
     : DatabaseAtomic(name_, metadata_path_, uuid, "DatabaseReplicated (" + name_ + ")", context_)
     , zookeeper_path(zookeeper_path_)
     , shard_name(shard_name_)
     , replica_name(replica_name_)
+    , db_settings(std::move(db_settings_))
 {
     if (zookeeper_path.empty() || shard_name.empty() || replica_name.empty())
         throw Exception("ZooKeeper path, shard and replica names must be non-empty", ErrorCodes::BAD_ARGUMENTS);
@@ -141,7 +143,8 @@ ClusterPtr DatabaseReplicated::getCluster() const
             break;
     }
     if (!success)
-        throw Exception(ErrorCodes::ALL_CONNECTION_TRIES_FAILED, "Cannot get consistent cluster snapshot");
+        throw Exception(ErrorCodes::ALL_CONNECTION_TRIES_FAILED, "Cannot get consistent cluster snapshot,"
+                                                                 "because replicas are created or removed concurrently");
 
     assert(!hosts.empty());
     assert(hosts.size() == host_ids.size());
@@ -172,7 +175,7 @@ ClusterPtr DatabaseReplicated::getCluster() const
     return std::make_shared<Cluster>(global_context.getSettingsRef(), shards, username, password, global_context.getTCPPort(), false);
 }
 
-void DatabaseReplicated::tryConnectToZooKeeper(bool force_attach)
+void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(bool force_attach)
 {
     try
     {
@@ -228,6 +231,9 @@ bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperP
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "", zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "", zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/counter", "", zkutil::CreateMode::Persistent));
+    /// We create and remove counter/cnt- node to increment sequential number of counter/ node and make log entry numbers start from 1.
+    /// New replicas are created with log pointer equal to 0 and log pointer is a number of the last executed entry.
+    /// It means that we cannot have log entry with number 0.
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/counter/cnt-", "", zkutil::CreateMode::Persistent));
     ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/counter/cnt-", -1));
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", "", zkutil::CreateMode::Persistent));
@@ -253,10 +259,7 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt
     auto host_id = getHostID(global_context, db_uuid);
 
     /// On replica creation add empty entry to log. Can be used to trigger some actions on other replicas (e.g. update cluster info).
-    DDLLogEntry entry;
-    entry.hosts = {};
-    entry.query = {};
-    entry.initiator = {};
+    DDLLogEntry entry{};
 
     String query_path_prefix = zookeeper_path + "/log/query-";
     String counter_prefix = zookeeper_path + "/counter/cnt-";
@@ -273,7 +276,7 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt
 
 void DatabaseReplicated::loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach)
 {
-    tryConnectToZooKeeper(force_attach);
+    tryConnectToZooKeeperAndInitDatabase(force_attach);
 
     DatabaseAtomic::loadStoredObjects(context, has_force_restore_data_flag, force_attach);
 
@@ -281,7 +284,7 @@ void DatabaseReplicated::loadStoredObjects(Context & context, bool has_force_res
     ddl_worker->startup();
 }
 
-BlockIO DatabaseReplicated::propose(const ASTPtr & query, const Context & query_context)
+BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, const Context & query_context)
 {
     if (is_readonly)
         throw Exception(ErrorCodes::NO_ZOOKEEPER, "Database is in readonly mode, because it cannot connect to ZooKeeper");
@@ -405,7 +408,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
 
     String db_name = getDatabaseName();
     String to_db_name = getDatabaseName() + BROKEN_TABLES_SUFFIX;
-    if (total_tables < tables_to_detach.size() * 2)
+    if (total_tables * db_settings.max_broken_tables_ratio < tables_to_detach.size())
         throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "Too many tables to recreate: {} of {}", tables_to_detach.size(), total_tables);
     else if (!tables_to_detach.empty())
     {
@@ -594,12 +597,12 @@ void DatabaseReplicated::shutdown()
 
 void DatabaseReplicated::dropTable(const Context & context, const String & table_name, bool no_delay)
 {
-    auto txn = context.getMetadataTransaction();
+    auto txn = context.getZooKeeperMetadataTransaction();
     assert(!ddl_worker->isCurrentlyActive() || txn);
-    if (txn && txn->is_initial_query)
+    if (txn && txn->isInitialQuery())
     {
         String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_name);
-        txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+        txn->addOp(zkutil::makeRemoveRequest(metadata_zk_path, -1));
     }
     DatabaseAtomic::dropTable(context, table_name, no_delay);
 }
@@ -607,10 +610,10 @@ void DatabaseReplicated::dropTable(const Context & context, const String & table
 void DatabaseReplicated::renameTable(const Context & context, const String & table_name, IDatabase & to_database,
                                      const String & to_table_name, bool exchange, bool dictionary)
 {
-    auto txn = context.getMetadataTransaction();
+    auto txn = context.getZooKeeperMetadataTransaction();
     assert(txn);
 
-    if (txn->is_initial_query)
+    if (txn->isInitialQuery())
     {
         if (this != &to_database)
             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Moving tables between databases is not supported for Replicated engine");
@@ -622,16 +625,16 @@ void DatabaseReplicated::renameTable(const Context & context, const String & tab
             throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", to_table_name);
 
         String statement = readMetadataFile(table_name);
-        String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_name);
-        String metadata_zk_path_to = txn->zookeeper_path + "/metadata/" + escapeForFileName(to_table_name);
-        txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_name);
+        String metadata_zk_path_to = zookeeper_path + "/metadata/" + escapeForFileName(to_table_name);
+        txn->addOp(zkutil::makeRemoveRequest(metadata_zk_path, -1));
         if (exchange)
         {
             String statement_to = readMetadataFile(to_table_name);
-            txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path_to, -1));
-            txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path, statement_to, zkutil::CreateMode::Persistent));
+            txn->addOp(zkutil::makeRemoveRequest(metadata_zk_path_to, -1));
+            txn->addOp(zkutil::makeCreateRequest(metadata_zk_path, statement_to, zkutil::CreateMode::Persistent));
         }
-        txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path_to, statement, zkutil::CreateMode::Persistent));
+        txn->addOp(zkutil::makeCreateRequest(metadata_zk_path_to, statement, zkutil::CreateMode::Persistent));
     }
 
     DatabaseAtomic::renameTable(context, table_name, to_database, to_table_name, exchange, dictionary);
@@ -641,14 +644,14 @@ void DatabaseReplicated::commitCreateTable(const ASTCreateQuery & query, const S
                        const String & table_metadata_tmp_path, const String & table_metadata_path,
                        const Context & query_context)
 {
-    auto txn = query_context.getMetadataTransaction();
+    auto txn = query_context.getZooKeeperMetadataTransaction();
     assert(!ddl_worker->isCurrentlyActive() || txn);
-    if (txn && txn->is_initial_query)
+    if (txn && txn->isInitialQuery())
     {
-        String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(query.table);
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(query.table);
         String statement = getObjectDefinitionFromCreateQuery(query.clone());
         /// zk::multi(...) will throw if `metadata_zk_path` exists
-        txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path, statement, zkutil::CreateMode::Persistent));
+        txn->addOp(zkutil::makeCreateRequest(metadata_zk_path, statement, zkutil::CreateMode::Persistent));
     }
     DatabaseAtomic::commitCreateTable(query, table, table_metadata_tmp_path, table_metadata_path, query_context);
 }
@@ -657,11 +660,11 @@ void DatabaseReplicated::commitAlterTable(const StorageID & table_id,
                                           const String & table_metadata_tmp_path, const String & table_metadata_path,
                                           const String & statement, const Context & query_context)
 {
-    auto txn = query_context.getMetadataTransaction();
-    if (txn && txn->is_initial_query)
+    auto txn = query_context.getZooKeeperMetadataTransaction();
+    if (txn && txn->isInitialQuery())
     {
-        String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_id.table_name);
-        txn->ops.emplace_back(zkutil::makeSetRequest(metadata_zk_path, statement, -1));
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_id.table_name);
+        txn->addOp(zkutil::makeSetRequest(metadata_zk_path, statement, -1));
     }
     DatabaseAtomic::commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path, statement, query_context);
 }
@@ -670,37 +673,37 @@ void DatabaseReplicated::createDictionary(const Context & context,
                                           const String & dictionary_name,
                                           const ASTPtr & query)
 {
-    auto txn = context.getMetadataTransaction();
+    auto txn = context.getZooKeeperMetadataTransaction();
     assert(!ddl_worker->isCurrentlyActive() || txn);
-    if (txn && txn->is_initial_query)
+    if (txn && txn->isInitialQuery())
     {
-        String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(dictionary_name);
+        String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(dictionary_name);
         String statement = getObjectDefinitionFromCreateQuery(query->clone());
-        txn->ops.emplace_back(zkutil::makeCreateRequest(metadata_zk_path, statement, zkutil::CreateMode::Persistent));
+        txn->addOp(zkutil::makeCreateRequest(metadata_zk_path, statement, zkutil::CreateMode::Persistent));
     }
     DatabaseAtomic::createDictionary(context, dictionary_name, query);
 }
 
 void DatabaseReplicated::removeDictionary(const Context & context, const String & dictionary_name)
 {
-    auto txn = context.getMetadataTransaction();
+    auto txn = context.getZooKeeperMetadataTransaction();
     assert(!ddl_worker->isCurrentlyActive() || txn);
-    if (txn && txn->is_initial_query)
+    if (txn && txn->isInitialQuery())
     {
         String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(dictionary_name);
-        txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+        txn->addOp(zkutil::makeRemoveRequest(metadata_zk_path, -1));
     }
     DatabaseAtomic::removeDictionary(context, dictionary_name);
 }
 
 void DatabaseReplicated::detachTablePermanently(const Context & context, const String & table_name)
 {
-    auto txn = context.getMetadataTransaction();
+    auto txn = context.getZooKeeperMetadataTransaction();
     assert(!ddl_worker->isCurrentlyActive() || txn);
-    if (txn && txn->is_initial_query)
+    if (txn && txn->isInitialQuery())
     {
         String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_name);
-        txn->ops.emplace_back(zkutil::makeRemoveRequest(metadata_zk_path, -1));
+        txn->addOp(zkutil::makeRemoveRequest(metadata_zk_path, -1));
     }
     DatabaseAtomic::detachTablePermanently(context, table_name);
 }
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index a3a53e02ee4..fde53cf2c29 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Databases/DatabaseAtomic.h>
+#include <Databases/DatabaseReplicatedSettings.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <DataStreams/BlockIO.h>
@@ -22,13 +23,14 @@ class DatabaseReplicated : public DatabaseAtomic
 public:
     DatabaseReplicated(const String & name_, const String & metadata_path_, UUID uuid,
                        const String & zookeeper_path_, const String & shard_name_, const String & replica_name_,
+                       DatabaseReplicatedSettings db_settings_,
                        const Context & context);
 
     ~DatabaseReplicated() override;
 
     String getEngineName() const override { return "Replicated"; }
 
-    /// If current query is initial, then the following methods add metadata updating ZooKeeper operations to current MetadataTransaction.
+    /// If current query is initial, then the following methods add metadata updating ZooKeeper operations to current ZooKeeperMetadataTransaction.
     void dropTable(const Context &, const String & table_name, bool no_delay) override;
     void renameTable(const Context & context, const String & table_name, IDatabase & to_database,
                      const String & to_table_name, bool exchange, bool dictionary) override;
@@ -46,7 +48,7 @@ public:
 
     /// Try to execute DLL query on current host as initial query. If query is succeed,
     /// then it will be executed on all replicas.
-    BlockIO propose(const ASTPtr & query, const Context & query_context);
+    BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, const Context & query_context);
 
     void stopReplication();
 
@@ -64,7 +66,7 @@ public:
     friend struct DatabaseReplicatedTask;
     friend class DatabaseReplicatedDDLWorker;
 private:
-    void tryConnectToZooKeeper(bool force_attach);
+    void tryConnectToZooKeeperAndInitDatabase(bool force_attach);
     bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
     void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
 
@@ -78,6 +80,7 @@ private:
     String shard_name;
     String replica_name;
     String replica_path;
+    DatabaseReplicatedSettings db_settings;
 
     zkutil::ZooKeeperPtr getZooKeeper() const;
 
diff --git a/src/Databases/DatabaseReplicatedSettings.cpp b/src/Databases/DatabaseReplicatedSettings.cpp
new file mode 100644
index 00000000000..61febcf2810
--- /dev/null
+++ b/src/Databases/DatabaseReplicatedSettings.cpp
@@ -0,0 +1,23 @@
+#include <Databases/DatabaseReplicatedSettings.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTCreateQuery.h>
+
+namespace DB
+{
+
+IMPLEMENT_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS)
+
+void DatabaseReplicatedSettings::loadFromQuery(ASTStorage & storage_def)
+{
+    if (storage_def.settings)
+    {
+        applyChanges(storage_def.settings->changes);
+        return;
+    }
+
+    auto settings_ast = std::make_shared<ASTSetQuery>();
+    settings_ast->is_standalone = false;
+    storage_def.set(storage_def.settings, settings_ast);
+}
+
+}
diff --git a/src/Databases/DatabaseReplicatedSettings.h b/src/Databases/DatabaseReplicatedSettings.h
new file mode 100644
index 00000000000..11d5b3820e4
--- /dev/null
+++ b/src/Databases/DatabaseReplicatedSettings.h
@@ -0,0 +1,26 @@
+#pragma once
+#include <Core/Defines.h>
+#include <Core/BaseSettings.h>
+
+namespace DB
+{
+
+class ASTStorage;
+
+#define LIST_OF_DATABASE_REPLICATED_SETTINGS(M) \
+    M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \
+    M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \
+    M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \
+
+DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS)
+
+
+/** Settings for the MaterializeMySQL database engine.
+  * Could be loaded from a CREATE DATABASE query (SETTINGS clause).
+  */
+struct DatabaseReplicatedSettings : public BaseSettings<DatabaseReplicatedSettingsTraits>
+{
+    void loadFromQuery(ASTStorage & storage_def);
+};
+
+}
diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp
index ff15878b136..e0c5717711c 100644
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@@ -30,7 +30,7 @@ void DatabaseReplicatedDDLWorker::initializeMainThread()
         {
             auto zookeeper = getAndSetZooKeeper();
             if (database->is_readonly)
-                database->tryConnectToZooKeeper(false);
+                database->tryConnectToZooKeeperAndInitDatabase(false);
             initializeReplication();
             initialized = true;
             return;
@@ -98,8 +98,7 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
     UInt32 our_log_ptr = parse<UInt32>(zookeeper->get(database->replica_path + "/log_ptr"));
     UInt32 max_log_ptr = parse<UInt32>(zookeeper->get(database->zookeeper_path + "/max_log_ptr"));
     assert(our_log_ptr <= max_log_ptr);
-    constexpr UInt32 max_replication_lag = 16;
-    if (max_replication_lag < max_log_ptr - our_log_ptr)
+    if (database->db_settings.max_replication_lag_to_enqueue < max_log_ptr - our_log_ptr)
         throw Exception(ErrorCodes::NOT_A_LEADER, "Cannot enqueue query on this replica, "
                         "because it has replication lag of {} queries. Try other replica.", max_log_ptr - our_log_ptr);
 
@@ -131,7 +130,7 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
     if (zookeeper->expired() || stop_flag)
         throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "ZooKeeper session expired or replication stopped, try again");
 
-    processTask(*task);
+    processTask(*task, zookeeper);
 
     if (!task->was_executed)
     {
@@ -139,7 +138,7 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
                         task->execution_status.code, task->execution_status.message);
     }
 
-    try_node->reset();
+    try_node->setAlreadyRemoved();
 
     return entry_path;
 }
@@ -178,7 +177,7 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
         /// Query is not committed yet. We cannot just skip it and execute next one, because reordering may break replication.
         LOG_TRACE(log, "Waiting for initiator {} to commit or rollback entry {}", initiator_name, entry_path);
         constexpr size_t wait_time_ms = 1000;
-        constexpr size_t max_iterations = 3600;
+        size_t max_iterations = database->db_settings.wait_entry_commited_timeout_sec;
         size_t iteration = 0;
 
         while (!wait_committed_or_failed->tryWait(wait_time_ms))
@@ -194,7 +193,7 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
             if (max_iterations <= ++iteration)
             {
                 /// What can we do if initiator hangs for some reason? Seems like we can remove /try node.
-                /// Initiator will fail to commit entry to ZK (including ops for replicated table) if /try does not exist.
+                /// Initiator will fail to commit ZooKeeperMetadataTransaction (including ops for replicated table) if /try does not exist.
                 /// But it's questionable.
 
                 /// We use tryRemove(...) because multiple hosts (including initiator) may try to do it concurrently.
diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h
index 1eafe2489e7..6dd8dc408d7 100644
--- a/src/Databases/DatabaseReplicatedWorker.h
+++ b/src/Databases/DatabaseReplicatedWorker.h
@@ -6,6 +6,18 @@ namespace DB
 
 class DatabaseReplicated;
 
+/// It's similar to DDLWorker, but has the following differences:
+/// 1. DDL queue in ZooKeeper is not shared between multiple clusters and databases,
+///    each DatabaseReplicated has its own queue in ZooKeeper and DatabaseReplicatedDDLWorker object.
+/// 2. Shards and replicas are identified by shard_name and replica_name arguments of database engine,
+///    not by address:port pairs. Cluster (of multiple database replicas) is identified by its zookeeper_path.
+/// 3. After creation of an entry in DDL queue initiator tries to execute the entry locally
+///    and other hosts wait for query to finish on initiator host.
+///    If query succeed on initiator, then all hosts must execute it, so they will retry until query succeed.
+///    We assume that cluster is homogenous, so if replicas are in consistent state and query succeed on one host,
+///    then all hosts can execute it (maybe after several retries).
+/// 4. Each database replica stores its log pointer in ZooKeeper. Cleanup thread removes old entry
+///    if its number < max_log_ptr - logs_to_keep.
 class DatabaseReplicatedDDLWorker : public DDLWorker
 {
 public:
diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp
index 7ce5de56b64..d92f0f1897e 100644
--- a/src/Databases/DatabaseWithDictionaries.cpp
+++ b/src/Databases/DatabaseWithDictionaries.cpp
@@ -194,7 +194,7 @@ void DatabaseWithDictionaries::createDictionary(const Context & context, const S
             detachDictionary(dictionary_name);
     });
 
-    auto txn = context.getMetadataTransaction();
+    auto txn = context.getZooKeeperMetadataTransaction();
     if (txn && !context.isInternalSubquery())
         txn->commit();      /// Commit point (a sort of) for Replicated database
 
@@ -219,7 +219,7 @@ void DatabaseWithDictionaries::removeDictionary(const Context & context, const S
     {
         String dictionary_metadata_path = getObjectMetadataPath(dictionary_name);
 
-        auto txn = context.getMetadataTransaction();
+        auto txn = context.getZooKeeperMetadataTransaction();
         if (txn && !context.isInternalSubquery())
             txn->commit();      /// Commit point (a sort of) for Replicated database
 
diff --git a/src/Databases/ya.make b/src/Databases/ya.make
index 38f79532080..8bd3f291a64 100644
--- a/src/Databases/ya.make
+++ b/src/Databases/ya.make
@@ -17,6 +17,7 @@ SRCS(
     DatabaseOnDisk.cpp
     DatabaseOrdinary.cpp
     DatabaseReplicated.cpp
+    DatabaseReplicatedSettings.cpp
     DatabaseReplicatedWorker.cpp
     DatabaseWithDictionaries.cpp
     DatabasesCommon.cpp
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 766b14dea42..98e4a87fba3 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2553,14 +2553,14 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w
     return StorageID::createEmpty();
 }
 
-void Context::initMetadataTransaction(MetadataTransactionPtr txn, [[maybe_unused]] bool attach_existing)
+void Context::initZooKeeperMetadataTransaction(ZooKeeperMetadataTransactionPtr txn, [[maybe_unused]] bool attach_existing)
 {
     assert(!metadata_transaction);
     assert(attach_existing || query_context == this);
     metadata_transaction = std::move(txn);
 }
 
-MetadataTransactionPtr Context::getMetadataTransaction() const
+ZooKeeperMetadataTransactionPtr Context::getZooKeeperMetadataTransaction() const
 {
     assert(!metadata_transaction || hasQueryContext());
     return metadata_transaction;
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 24d0eb4b0de..563fb172488 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -117,8 +117,8 @@ using VolumePtr = std::shared_ptr<IVolume>;
 struct NamedSession;
 struct BackgroundTaskSchedulingSettings;
 
-struct MetadataTransaction;
-using MetadataTransactionPtr = std::shared_ptr<MetadataTransaction>;
+class ZooKeeperMetadataTransaction;
+using ZooKeeperMetadataTransactionPtr = std::shared_ptr<ZooKeeperMetadataTransaction>;
 
 #if USE_EMBEDDED_COMPILER
 class CompiledExpressionCache;
@@ -281,7 +281,7 @@ private:
                                    /// to be customized in HTTP and TCP servers by overloading the customizeContext(DB::Context&)
                                    /// methods.
 
-    MetadataTransactionPtr metadata_transaction;    /// Distributed DDL context. I'm not sure if it's a suitable place for this,
+    ZooKeeperMetadataTransactionPtr metadata_transaction;    /// Distributed DDL context. I'm not sure if it's a suitable place for this,
                                                     /// but it's the easiest way to pass this through the whole stack from executeQuery(...)
                                                     /// to DatabaseOnDisk::commitCreateTable(...) or IStorage::alter(...) without changing
                                                     /// thousands of signatures.
@@ -746,8 +746,10 @@ public:
     IHostContextPtr & getHostContext();
     const IHostContextPtr & getHostContext() const;
 
-    void initMetadataTransaction(MetadataTransactionPtr txn, bool attach_existing = false);
-    MetadataTransactionPtr getMetadataTransaction() const;
+    /// Initialize context of distributed DDL query with Replicated database.
+    void initZooKeeperMetadataTransaction(ZooKeeperMetadataTransactionPtr txn, bool attach_existing = false);
+    /// Returns context of current distributed DDL query or nullptr.
+    ZooKeeperMetadataTransactionPtr getZooKeeperMetadataTransaction() const;
 
     struct MySQLWireContext
     {
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 7f47f0a6659..4be465d3de4 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -96,7 +96,7 @@ void DDLTaskBase::parseQueryFromEntry(const Context & context)
     query = parseQuery(parser_query, begin, end, description, 0, context.getSettingsRef().max_parser_depth);
 }
 
-std::unique_ptr<Context> DDLTaskBase::makeQueryContext(Context & from_context)
+std::unique_ptr<Context> DDLTaskBase::makeQueryContext(Context & from_context, const ZooKeeperPtr & /*zookeeper*/)
 {
     auto query_context = std::make_unique<Context>(from_context);
     query_context->makeQueryContext();
@@ -293,28 +293,26 @@ String DatabaseReplicatedTask::getShardID() const
     return database->shard_name;
 }
 
-std::unique_ptr<Context> DatabaseReplicatedTask::makeQueryContext(Context & from_context)
+std::unique_ptr<Context> DatabaseReplicatedTask::makeQueryContext(Context & from_context, const ZooKeeperPtr & zookeeper)
 {
-    auto query_context = DDLTaskBase::makeQueryContext(from_context);
+    auto query_context = DDLTaskBase::makeQueryContext(from_context, zookeeper);
     query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
     query_context->setCurrentDatabase(database->getDatabaseName());
 
-    auto txn = std::make_shared<MetadataTransaction>();
-    query_context->initMetadataTransaction(txn);
-    txn->current_zookeeper = from_context.getZooKeeper();
-    txn->zookeeper_path = database->zookeeper_path;
-    txn->is_initial_query = is_initial_query;
+    auto txn = std::make_shared<ZooKeeperMetadataTransaction>(zookeeper, database->zookeeper_path, is_initial_query);
+    query_context->initZooKeeperMetadataTransaction(txn);
 
     if (is_initial_query)
     {
-        txn->ops.emplace_back(zkutil::makeRemoveRequest(entry_path + "/try", -1));
-        txn->ops.emplace_back(zkutil::makeCreateRequest(entry_path + "/committed", host_id_str, zkutil::CreateMode::Persistent));
-        txn->ops.emplace_back(zkutil::makeSetRequest(database->zookeeper_path + "/max_log_ptr", toString(getLogEntryNumber(entry_name)), -1));
+        txn->addOp(zkutil::makeRemoveRequest(entry_path + "/try", -1));
+        txn->addOp(zkutil::makeCreateRequest(entry_path + "/committed", host_id_str, zkutil::CreateMode::Persistent));
+        txn->addOp(zkutil::makeSetRequest(database->zookeeper_path + "/max_log_ptr", toString(getLogEntryNumber(entry_name)), -1));
     }
 
-    txn->ops.emplace_back(zkutil::makeSetRequest(database->replica_path + "/log_ptr", toString(getLogEntryNumber(entry_name)), -1));
+    txn->addOp(zkutil::makeSetRequest(database->replica_path + "/log_ptr", toString(getLogEntryNumber(entry_name)), -1));
 
-    std::move(ops.begin(), ops.end(), std::back_inserter(txn->ops));
+    for (auto & op : ops)
+        txn->addOp(std::move(op));
     ops.clear();
 
     return query_context;
@@ -335,7 +333,7 @@ UInt32 DDLTaskBase::getLogEntryNumber(const String & log_entry_name)
     return parse<UInt32>(log_entry_name.substr(strlen(name)));
 }
 
-void MetadataTransaction::commit()
+void ZooKeeperMetadataTransaction::commit()
 {
     assert(state == CREATED);
     state = FAILED;
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index f02e17103aa..18c1f4c80cd 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -20,8 +20,8 @@ class ASTQueryWithOnCluster;
 using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
 class DatabaseReplicated;
 
-struct MetadataTransaction;
-using MetadataTransactionPtr = std::shared_ptr<MetadataTransaction>;
+class ZooKeeperMetadataTransaction;
+using ZooKeeperMetadataTransactionPtr = std::shared_ptr<ZooKeeperMetadataTransaction>;
 
 struct HostID
 {
@@ -95,7 +95,7 @@ struct DDLTaskBase
 
     virtual String getShardID() const = 0;
 
-    virtual std::unique_ptr<Context> makeQueryContext(Context & from_context);
+    virtual std::unique_ptr<Context> makeQueryContext(Context & from_context, const ZooKeeperPtr & zookeeper);
 
     inline String getActiveNodePath() const { return entry_path + "/active/" + host_id_str; }
     inline String getFinishedNodePath() const { return entry_path + "/finished/" + host_id_str; }
@@ -132,13 +132,19 @@ struct DatabaseReplicatedTask : public DDLTaskBase
     DatabaseReplicatedTask(const String & name, const String & path, DatabaseReplicated * database_);
 
     String getShardID() const override;
-    std::unique_ptr<Context> makeQueryContext(Context & from_context) override;
+    std::unique_ptr<Context> makeQueryContext(Context & from_context, const ZooKeeperPtr & zookeeper) override;
 
     DatabaseReplicated * database;
 };
 
-
-struct MetadataTransaction
+/// The main purpose of ZooKeeperMetadataTransaction is to execute all zookeeper operation related to query
+/// in a single transaction when we performed all required checks and ready to "commit" changes.
+/// For example, create ALTER_METADATA entry in ReplicatedMergeTree log,
+/// create path/to/entry/finished/host_id node in distributed DDL queue to mark query as executed and
+/// update metadata in path/to/replicated_database/metadata/table_name
+/// It's used for DatabaseReplicated.
+/// TODO we can also use it for ordinary ON CLUSTER queries
+class ZooKeeperMetadataTransaction
 {
     enum State
     {
@@ -153,8 +159,29 @@ struct MetadataTransaction
     bool is_initial_query;
     Coordination::Requests ops;
 
+public:
+    ZooKeeperMetadataTransaction(const ZooKeeperPtr & current_zookeeper_, const String & zookeeper_path_, bool is_initial_query_)
+    : current_zookeeper(current_zookeeper_)
+    , zookeeper_path(zookeeper_path_)
+    , is_initial_query(is_initial_query_)
+    {
+    }
+
+    bool isInitialQuery() const { return is_initial_query; }
+
+    bool isExecuted() const { return state != CREATED; }
+
+    String getDatabaseZooKeeperPath() const { return zookeeper_path; }
+
+    void addOp(Coordination::RequestPtr && op)
+    {
+        assert(!isExecuted());
+        ops.emplace_back(op);
+    }
+
     void moveOpsTo(Coordination::Requests & other_ops)
     {
+        assert(!isExecuted());
         std::move(ops.begin(), ops.end(), std::back_inserter(other_ops));
         ops.clear();
         state = COMMITTED;
@@ -162,7 +189,7 @@ struct MetadataTransaction
 
     void commit();
 
-    ~MetadataTransaction() { assert(state != CREATED || std::uncaught_exception()); }
+    ~ZooKeeperMetadataTransaction() { assert(isExecuted() || std::uncaught_exception()); }
 };
 
 }
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 12fd03b3b70..67f716c235c 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -195,16 +195,15 @@ void DDLWorker::startup()
 
 void DDLWorker::shutdown()
 {
-    stop_flag = true;
-    queue_updated_event->set();
-    cleanup_event->set();
-
-    if (main_thread.joinable())
+    bool prev_stop_flag = stop_flag.exchange(true);
+    if (!prev_stop_flag)
+    {
+        queue_updated_event->set();
+        cleanup_event->set();
         main_thread.join();
-    if (cleanup_thread.joinable())
         cleanup_thread.join();
-
-    worker_pool.reset();
+        worker_pool.reset();
+    }
 }
 
 DDLWorker::~DDLWorker()
@@ -267,6 +266,8 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
     }
 
     /// Stage 2: resolve host_id and check if we should execute query or not
+    /// Multiple clusters can use single DDL queue path in ZooKeeper,
+    /// So we should skip task if we cannot find current host in cluster hosts list.
     if (!task->findCurrentHostID(context, log))
     {
         out_reason = "There is no a local address in host list";
@@ -317,7 +318,7 @@ void DDLWorker::scheduleTasks()
         bool status_written = zookeeper->exists(task->getFinishedNodePath());
         if (task->was_executed && !status_written && task_still_exists)
         {
-            processTask(*task);
+            processTask(*task, zookeeper);
         }
     }
 
@@ -364,15 +365,15 @@ void DDLWorker::scheduleTasks()
 
         if (worker_pool)
         {
-            worker_pool->scheduleOrThrowOnError([this, &saved_task]()
+            worker_pool->scheduleOrThrowOnError([this, &saved_task, &zookeeper]()
             {
                 setThreadName("DDLWorkerExec");
-                processTask(saved_task);
+                processTask(saved_task, zookeeper);
             });
         }
         else
         {
-            processTask(saved_task);
+            processTask(saved_task, zookeeper);
         }
     }
 }
@@ -385,7 +386,7 @@ DDLTaskBase & DDLWorker::saveTask(DDLTaskPtr && task)
     return *current_tasks.back();
 }
 
-bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task)
+bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
 {
     /// Add special comment at the start of query to easily identify DDL-produced queries in query_log
     String query_prefix = "/* ddl_entry=" + task.entry_name + " */ ";
@@ -398,14 +399,16 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task)
 
     try
     {
-        auto query_context = task.makeQueryContext(context);
+        auto query_context = task.makeQueryContext(context, zookeeper);
         if (!task.is_initial_query)
             query_scope.emplace(*query_context);
         executeQuery(istr, ostr, !task.is_initial_query, *query_context, {});
 
-        if (auto txn = query_context->getMetadataTransaction())
+        if (auto txn = query_context->getZooKeeperMetadataTransaction())
         {
-            if (txn->state == MetadataTransaction::CREATED)
+            /// Most queries commit changes to ZooKeeper right before applying local changes,
+            /// but some queries does not support it, so we have to do it here.
+            if (!txn->isExecuted())
                 txn->commit();
         }
     }
@@ -463,10 +466,8 @@ void DDLWorker::updateMaxDDLEntryID(const String & entry_name)
     }
 }
 
-void DDLWorker::processTask(DDLTaskBase & task)
+void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
 {
-    auto zookeeper = tryGetZooKeeper();
-
     LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query);
 
     String active_node_path = task.getActiveNodePath();
@@ -541,7 +542,7 @@ void DDLWorker::processTask(DDLTaskBase & task)
             else
             {
                 storage.reset();
-                tryExecuteQuery(rewritten_query, task);
+                tryExecuteQuery(rewritten_query, task, zookeeper);
             }
         }
         catch (const Coordination::Exception &)
@@ -565,7 +566,7 @@ void DDLWorker::processTask(DDLTaskBase & task)
             }
             else
             {
-                /// task.ops where not executed by table or database engine, se DDLWorker is responsible for
+                /// task.ops where not executed by table or database engine, so DDLWorker is responsible for
                 /// writing query execution status into ZooKeeper.
                 task.ops.emplace_back(zkutil::makeSetRequest(finished_node_path, task.execution_status.serializeText(), -1));
             }
@@ -589,7 +590,7 @@ void DDLWorker::processTask(DDLTaskBase & task)
     }
 
     /// Active node was removed in multi ops
-    active_node->reset();
+    active_node->setAlreadyRemoved();
 
     task.completely_processed = true;
 }
@@ -712,7 +713,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
 
             /// If the leader will unexpectedly changed this method will return false
             /// and on the next iteration new leader will take lock
-            if (tryExecuteQuery(rewritten_query, task))
+            if (tryExecuteQuery(rewritten_query, task, zookeeper))
             {
                 executed_by_us = true;
                 break;
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index c39a832c098..8b0a8f038a0 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -77,7 +77,7 @@ protected:
     /// Returns non-empty DDLTaskPtr if entry parsed and the check is passed
     virtual DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper);
 
-    void processTask(DDLTaskBase & task);
+    void processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper);
     void updateMaxDDLEntryID(const String & entry_name);
 
     /// Check that query should be executed on leader replica only
@@ -95,7 +95,7 @@ protected:
         const String & node_path,
         const ZooKeeperPtr & zookeeper);
 
-    bool tryExecuteQuery(const String & query, DDLTaskBase & task);
+    bool tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper);
 
     /// Checks and cleanups queue's nodes
     void cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper);
diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index 402f05895bc..bf624507574 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -54,7 +54,7 @@ BlockIO InterpreterAlterQuery::execute()
     {
         auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name);
         guard->releaseTableLock();
-        return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr, context);
+        return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, context);
     }
 
     StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context);
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 2021c1f1d60..2b1dddde78c 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -880,7 +880,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
         {
             assertOrSetUUID(create, database);
             guard->releaseTableLock();
-            return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr, context);
+            return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, context);
         }
     }
 
@@ -1092,7 +1092,7 @@ BlockIO InterpreterCreateQuery::createDictionary(ASTCreateQuery & create)
         if (!create.attach)
             assertOrSetUUID(create, database);
         guard->releaseTableLock();
-        return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr, context);
+        return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, context);
     }
 
     if (database->isDictionaryExist(dictionary_name))
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index 9e63c647f71..33e93a79c41 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -146,7 +146,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
 
             ddl_guard->releaseTableLock();
             table.reset();
-            return typeid_cast<DatabaseReplicated *>(database.get())->propose(query.clone(), context);
+            return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query.clone(), context);
         }
 
         if (query.kind == ASTDropQuery::Kind::Detach)
@@ -231,7 +231,7 @@ BlockIO InterpreterDropQuery::executeToDictionary(
         context.checkAccess(AccessType::DROP_DICTIONARY, database_name, dictionary_name);
 
         ddl_guard->releaseTableLock();
-        return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr, context);
+        return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, context);
     }
 
     if (!database || !database->isDictionaryExist(dictionary_name))
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index b9d7faac73c..923a342d9ea 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -90,7 +90,7 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
             UniqueTableName to(elem.to_database_name, elem.to_table_name);
             ddl_guards[from]->releaseTableLock();
             ddl_guards[to]->releaseTableLock();
-            return typeid_cast<DatabaseReplicated *>(database.get())->propose(query_ptr, context);
+            return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, context);
         }
         else
         {
diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp
index 32317968fe5..325bf3d2f74 100644
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@@ -212,11 +212,11 @@ static void executeDropQuery(ASTDropQuery::Kind kind, const Context & global_con
         /// looks like expected behaviour and we have tests for it.
         auto drop_context = Context(global_context);
         drop_context.getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
-        if (auto txn = current_context.getMetadataTransaction())
+        if (auto txn = current_context.getZooKeeperMetadataTransaction())
         {
             /// For Replicated database
             drop_context.setQueryContext(const_cast<Context &>(current_context));
-            drop_context.initMetadataTransaction(txn, true);
+            drop_context.initZooKeeperMetadataTransaction(txn, true);
         }
         InterpreterDropQuery drop_interpreter(ast_drop_query, drop_context);
         drop_interpreter.execute();
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index ff39bf91fbb..f2c88cdedd9 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -4282,12 +4282,12 @@ void StorageReplicatedMergeTree::alter(
                 zkutil::makeCreateRequest(mutations_path + "/", mutation_entry.toString(), zkutil::CreateMode::PersistentSequential));
         }
 
-        if (auto txn = query_context.getMetadataTransaction())
+        if (auto txn = query_context.getZooKeeperMetadataTransaction())
         {
             txn->moveOpsTo(ops);
             /// NOTE: IDatabase::alterTable(...) is called when executing ALTER_METADATA queue entry without query context,
             /// so we have to update metadata of DatabaseReplicated here.
-            String metadata_zk_path = txn->zookeeper_path + "/metadata/" + escapeForFileName(table_id.table_name);
+            String metadata_zk_path = txn->getDatabaseZooKeeperPath() + "/metadata/" + escapeForFileName(table_id.table_name);
             auto ast = DatabaseCatalog::instance().getDatabase(table_id.database_name)->getCreateTableQuery(table_id.table_name, query_context);
             applyMetadataChangesToCreateQuery(ast, future_metadata);
             ops.emplace_back(zkutil::makeSetRequest(metadata_zk_path, getObjectDefinitionFromCreateQuery(ast), -1));
@@ -5262,7 +5262,7 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, const
         requests.emplace_back(zkutil::makeCreateRequest(
             mutations_path + "/", mutation_entry.toString(), zkutil::CreateMode::PersistentSequential));
 
-        if (auto txn = query_context.getMetadataTransaction())
+        if (auto txn = query_context.getZooKeeperMetadataTransaction())
             txn->moveOpsTo(requests);
 
         Coordination::Responses responses;
@@ -5766,7 +5766,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
             }
         }
 
-        if (auto txn = context.getMetadataTransaction())
+        if (auto txn = context.getZooKeeperMetadataTransaction())
             txn->moveOpsTo(ops);
 
         ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1));  /// Just update version
@@ -6269,7 +6269,7 @@ bool StorageReplicatedMergeTree::dropAllPartsInPartition(
     Coordination::Requests ops;
     ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential));
     ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1));  /// Just update version.
-    if (auto txn = query_context.getMetadataTransaction())
+    if (auto txn = query_context.getZooKeeperMetadataTransaction())
         txn->moveOpsTo(ops);
     Coordination::Responses responses = zookeeper.multi(ops);
 
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index f08a41e32b8..e6bb3747fb0 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -108,6 +108,7 @@
         "memory_tracking",
         "memory_usage",
         "live_view",
+        "00825_protobuf_format_map",
         "00152_insert_different_granularity",
         "01715_background_checker_blather_zookeeper",
         "01714_alter_drop_version",

From e8583ddfe2f03b20d86e9ce85a8215e7ee46d0f4 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sat, 20 Feb 2021 09:10:15 +0300
Subject: [PATCH 1189/1238] Update BaseDaemon.cpp

---
 base/daemon/BaseDaemon.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp
index 248ffdd4d10..83384038b7c 100644
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@@ -416,7 +416,9 @@ static void sanitizerDeathCallback()
     else
         log_message = "Terminate called without an active exception";
 
-    static const size_t buf_size = PIPE_BUF;
+    /// POSIX.1 says that write(2)s of less than PIPE_BUF bytes must be atomic - man 7 pipe
+    /// And the buffer should not be too small because our exception messages can be large.
+    static constexpr size_t buf_size = PIPE_BUF;
 
     if (log_message.size() > buf_size - 16)
         log_message.resize(buf_size - 16);

From 487fb09ff670a379deddc953b2bd1f52d3c77a39 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sat, 20 Feb 2021 14:11:01 +0800
Subject: [PATCH 1190/1238] Suppress signed overflow in
 AggregateFunctionGroupArrayMoving 2

---
 src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h | 6 +++---
 tests/queries/0_stateless/01177_group_array_moving.sql     | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
index 2a713f3aed2..3bab831d316 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
@@ -52,7 +52,7 @@ struct MovingSumData : public MovingData<T>
 {
     static constexpr auto name = "groupArrayMovingSum";
 
-    T get(size_t idx, UInt64 window_size) const
+    T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
     {
         if (idx < window_size)
             return this->value[idx];
@@ -66,7 +66,7 @@ struct MovingAvgData : public MovingData<T>
 {
     static constexpr auto name = "groupArrayMovingAvg";
 
-    T get(size_t idx, UInt64 window_size) const
+    T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
     {
         if (idx < window_size)
             return this->value[idx] / window_size;
@@ -114,7 +114,7 @@ public:
             return std::make_shared<DataTypeArray>(std::make_shared<DataTypeResult>());
     }
 
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
+    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
         this->data(place).add(static_cast<ResultT>(value), arena);
diff --git a/tests/queries/0_stateless/01177_group_array_moving.sql b/tests/queries/0_stateless/01177_group_array_moving.sql
index b1969e204fc..5689cd95f75 100644
--- a/tests/queries/0_stateless/01177_group_array_moving.sql
+++ b/tests/queries/0_stateless/01177_group_array_moving.sql
@@ -1,2 +1,4 @@
 SELECT groupArrayMovingSum(257)(-9223372036854775808), groupArrayMovingSum(1048575)(18446744073709551615), groupArrayMovingSum(9223372036854775807)(number * 9223372036854775807) FROM remote('127.0.0.{1..2}', numbers(3));
 SELECT groupArrayMovingAvg(257)(-9223372036854775808), groupArrayMovingAvg(1048575)(18446744073709551615), groupArrayMovingAvg(9223372036854775807)(number * 9223372036854775807) FROM remote('127.0.0.{1..2}', numbers(3));
+
+SELECT groupArrayMovingSum(257)(-9223372036854775808), groupArrayMovingSum(1)(10.000100135803223, [NULL, NULL], NULL), groupArrayMovingSum(NULL)(NULL) FROM numbers(1023) FORMAT Null;

From 7c04f15c8031a63f20573b9948dd18005f860f26 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 09:11:42 +0300
Subject: [PATCH 1191/1238] Add log message when stacktrace cannot be obtained
 for thread

This is to provide better diagnostics for 01051_system_stack_trace
failure [1].

  [1]: https://clickhouse-test-reports.s3.yandex.net/20881/866dfaec793f764dc9ba167d3ac9f6521b9b3381/functional_stateless_tests_(release,_wide_parts_enabled).html#fail1
---
 src/Storages/System/StorageSystemStackTrace.cpp | 4 ++++
 src/Storages/System/StorageSystemStackTrace.h   | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp
index abb2fdf54ed..e74d56108ad 100644
--- a/src/Storages/System/StorageSystemStackTrace.cpp
+++ b/src/Storages/System/StorageSystemStackTrace.cpp
@@ -16,6 +16,7 @@
 #include <Common/PipeFDs.h>
 #include <Common/CurrentThread.h>
 #include <common/getThreadId.h>
+#include <common/logger_useful.h>
 
 
 namespace DB
@@ -150,6 +151,7 @@ namespace
 
 StorageSystemStackTrace::StorageSystemStackTrace(const StorageID & table_id_)
     : IStorageSystemOneBlock<StorageSystemStackTrace>(table_id_)
+    , log(&Poco::Logger::get("StorageSystemStackTrace"))
 {
     notification_pipe.open();
 
@@ -229,6 +231,8 @@ void StorageSystemStackTrace::fillData(MutableColumns & res_columns, const Conte
         }
         else
         {
+            LOG_DEBUG(log, "Cannot obtain a stack trace for thread {}", tid);
+
             /// Cannot obtain a stack trace. But create a record in result nevertheless.
 
             res_columns[0]->insert(tid);
diff --git a/src/Storages/System/StorageSystemStackTrace.h b/src/Storages/System/StorageSystemStackTrace.h
index a389f02eb09..582618d2ecd 100644
--- a/src/Storages/System/StorageSystemStackTrace.h
+++ b/src/Storages/System/StorageSystemStackTrace.h
@@ -6,6 +6,10 @@
 #include <ext/shared_ptr_helper.h>
 #include <Storages/System/IStorageSystemOneBlock.h>
 
+namespace Poco
+{
+class Logger;
+}
 
 namespace DB
 {
@@ -30,6 +34,8 @@ protected:
     void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const override;
 
     mutable std::mutex mutex;
+
+    Poco::Logger * log;
 };
 
 }

From 4390cb3d73f8672269fe030a709899ca119909a9 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sat, 20 Feb 2021 09:49:02 +0300
Subject: [PATCH 1192/1238] Update config.xml

---
 programs/server/config.xml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/programs/server/config.xml b/programs/server/config.xml
index fe2a068787b..ba9b8b04b05 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -285,10 +285,9 @@
          Cache is used when 'use_uncompressed_cache' user setting turned on (off by default).
          Uncompressed cache is advantageous only for very short queries and in rare cases.
 
-         Note: uncompressed cache is pointless for lz4, because memory bandwidth is slower than multi-core decompression.
-         Enabling it will only make queries slower.
-         If number of CPU cores is in order of 100 and memory bandwidth is in range of 100-200 GB/sec,
-         there is a chance it is also being pointless for zstd.
+         Note: uncompressed cache can be pointless for lz4, because memory bandwidth
+         is slower than multi-core decompression on some server configurations.
+         Enabling it can sometimes paradoxically make queries slower.
       -->
     <uncompressed_cache_size>8589934592</uncompressed_cache_size>
 

From 5d36ceaaee50c1442dfef55a3d98c240ee2f7bd6 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 08:31:05 +0300
Subject: [PATCH 1193/1238] Fix WriteBufferFromHTTPServerResponse usage in
 odbc-bridge

---
 programs/odbc-bridge/ColumnInfoHandler.cpp    | 10 ++++++++-
 .../odbc-bridge/IdentifierQuoteHandler.cpp    | 10 ++++++++-
 programs/odbc-bridge/MainHandler.cpp          | 22 +++++++++++++++++--
 programs/odbc-bridge/SchemaAllowedHandler.cpp | 10 ++++++++-
 4 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp
index 5aef7f1ac38..14fa734f246 100644
--- a/programs/odbc-bridge/ColumnInfoHandler.cpp
+++ b/programs/odbc-bridge/ColumnInfoHandler.cpp
@@ -160,7 +160,15 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
         }
 
         WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
-        writeStringBinary(columns.toString(), out);
+        try
+        {
+            writeStringBinary(columns.toString(), out);
+            out.finalize();
+        }
+        catch (...)
+        {
+            out.finalize();
+        }
     }
     catch (...)
     {
diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp
index ec4e4493d61..5060d37c479 100644
--- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp
+++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp
@@ -50,7 +50,15 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
         auto identifier = getIdentifierQuote(hdbc);
 
         WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
-        writeStringBinary(identifier, out);
+        try
+        {
+            writeStringBinary(identifier, out);
+            out.finalize();
+        }
+        catch (...)
+        {
+            out.finalize();
+        }
     }
     catch (...)
     {
diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp
index b9670397878..4fcc9deea6a 100644
--- a/programs/odbc-bridge/MainHandler.cpp
+++ b/programs/odbc-bridge/MainHandler.cpp
@@ -187,9 +187,27 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
         auto message = getCurrentExceptionMessage(true);
         response.setStatusAndReason(
                 Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); // can't call process_error, because of too soon response sending
-        writeStringBinary(message, out);
-        tryLogCurrentException(log);
 
+        try
+        {
+            writeStringBinary(message, out);
+            out.finalize();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log);
+        }
+
+        tryLogCurrentException(log);
+    }
+
+    try
+    {
+        out.finalize();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log);
     }
 }
 
diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp
index 48744b6d2ca..d4a70db61f4 100644
--- a/programs/odbc-bridge/SchemaAllowedHandler.cpp
+++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp
@@ -61,7 +61,15 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
         bool result = isSchemaAllowed(hdbc);
 
         WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
-        writeBoolText(result, out);
+        try
+        {
+            writeBoolText(result, out);
+            out.finalize();
+        }
+        catch (...)
+        {
+            out.finalize();
+        }
     }
     catch (...)
     {

From 1ccb333ac50e1e62d9507e424c3daeee465e14f9 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 08:28:47 +0300
Subject: [PATCH 1194/1238] Fix WriteBufferFromHTTPServerResponse usage in
 other places (add missing finalize())

Since I saw the following:

    0. DB::WriteBufferFromOStream::nextImpl()
    1. DB::WriteBufferFromHTTPServerResponse::nextImpl()
    2. DB::WriteBufferFromHTTPServerResponse::finalize()
    3. DB::WriteBufferFromHTTPServerResponse::~WriteBufferFromHTTPServerResponse()
    4. DB::StaticRequestHandler::handleRequest(Poco::Net::HTTPServerRequest&, Poco::Net::HTTPServerResponse&)
    5. Poco::Net::HTTPServerConnection::run()
    6. Poco::Net::TCPServerConnection::start()
---
 src/Server/InterserverIOHTTPHandler.cpp | 26 +++++++++++++++++++------
 src/Server/PrometheusRequestHandler.cpp | 13 ++++++++++---
 src/Server/StaticRequestHandler.cpp     |  2 ++
 3 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp
index 3296da94578..740072e8e9f 100644
--- a/src/Server/InterserverIOHTTPHandler.cpp
+++ b/src/Server/InterserverIOHTTPHandler.cpp
@@ -94,6 +94,23 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
     used_output.out = std::make_shared<WriteBufferFromHTTPServerResponse>(
         response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
 
+    auto write_response = [&](const std::string & message)
+    {
+        if (response.sent())
+            return;
+
+        auto & out = *used_output.out;
+        try
+        {
+            writeString(message, out);
+            out.finalize();
+        }
+        catch (...)
+        {
+            out.finalize();
+        }
+    };
+
     try
     {
         if (auto [message, success] = checkAuthentication(request); success)
@@ -104,8 +121,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
         else
         {
             response.setStatusAndReason(HTTPServerResponse::HTTP_UNAUTHORIZED);
-            if (!response.sent())
-                writeString(message, *used_output.out);
+            write_response(message);
             LOG_WARNING(log, "Query processing failed request: '{}' authentication failed", request.getURI());
         }
     }
@@ -120,8 +136,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
         bool is_real_error = e.code() != ErrorCodes::ABORTED;
 
         std::string message = getCurrentExceptionMessage(is_real_error);
-        if (!response.sent())
-            writeString(message, *used_output.out);
+        write_response(message);
 
         if (is_real_error)
             LOG_ERROR(log, message);
@@ -132,8 +147,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
     {
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
         std::string message = getCurrentExceptionMessage(false);
-        if (!response.sent())
-            writeString(message, *used_output.out);
+        write_response(message);
 
         LOG_ERROR(log, message);
     }
diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp
index 83cb8e85a9e..bf78a37166a 100644
--- a/src/Server/PrometheusRequestHandler.cpp
+++ b/src/Server/PrometheusRequestHandler.cpp
@@ -24,9 +24,16 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe
 
         response.setContentType("text/plain; version=0.0.4; charset=UTF-8");
 
-        auto wb = WriteBufferFromHTTPServerResponse(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
-        metrics_writer.write(wb);
-        wb.finalize();
+        WriteBufferFromHTTPServerResponse wb(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
+        try
+        {
+            metrics_writer.write(wb);
+            wb.finalize();
+        }
+        catch (...)
+        {
+            wb.finalize();
+        }
     }
     catch (...)
     {
diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp
index f3f564c1cf8..9f959239be9 100644
--- a/src/Server/StaticRequestHandler.cpp
+++ b/src/Server/StaticRequestHandler.cpp
@@ -126,6 +126,8 @@ void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServer
         std::string exception_message = getCurrentExceptionMessage(false, true);
         trySendExceptionToClient(exception_message, exception_code, request, response, *out);
     }
+
+    out->finalize();
 }
 
 void StaticRequestHandler::writeResponse(WriteBuffer & out)

From 2ab37d025a62f650d4b90f5fafa23f4076ab3844 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sat, 20 Feb 2021 16:14:38 +0800
Subject: [PATCH 1195/1238] Skip non-parallel tests

---
 tests/queries/skip_list.json | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index fdb845b7e72..1164d7b0004 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -364,6 +364,7 @@
         "00626_replace_partition_from_table",
         "00626_replace_partition_from_table_zookeeper",
         "00633_materialized_view_and_too_many_parts_zookeeper",
+        "00643_cast_zookeeper",
         "00652_mergetree_mutations",
         "00652_replicated_mutations_zookeeper",
         "00682_empty_parts_merge",
@@ -577,10 +578,11 @@
         "01602_show_create_view",
         "01603_rename_overwrite_bug",
         "01646_system_restart_replicas_smoke", // system restart replicas is a global query
-        "01676_dictget_in_default_expression",
-        "01715_background_checker_blather_zookeeper",
-        "01700_system_zookeeper_path_in",
+        "01656_test_query_log_factories_info",
         "01669_columns_declaration_serde",
+        "01676_dictget_in_default_expression",
+        "01700_system_zookeeper_path_in",
+        "01715_background_checker_blather_zookeeper",
         "attach",
         "ddl_dictionaries",
         "dictionary",

From d947dbc185beee7a78bf73ba2aceeb81e664e013 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 20 Feb 2021 11:44:35 +0300
Subject: [PATCH 1196/1238] Add test to skip list

---
 tests/queries/0_stateless/arcadia_skip_list.txt | 1 +
 tests/queries/skip_list.json                    | 9 ++++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt
index 5466fb4bfb8..4e523545938 100644
--- a/tests/queries/0_stateless/arcadia_skip_list.txt
+++ b/tests/queries/0_stateless/arcadia_skip_list.txt
@@ -206,3 +206,4 @@
 01683_dist_INSERT_block_structure_mismatch
 01702_bitmap_native_integers
 01686_event_time_microseconds_part_log
+01017_uniqCombined_memory_usage
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index fdb845b7e72..70963190125 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -17,7 +17,8 @@
         "functions_bad_arguments", /// Too long for TSan
         "01603_read_with_backoff_bug", /// Too long for TSan
         "01646_system_restart_replicas_smoke", /// RESTART REPLICAS can acquire too much locks, while only 64 is possible from one thread under TSan
-        "01641_memory_tracking_insert_optimize" /// INSERT lots of rows is too heavy for TSan
+        "01641_memory_tracking_insert_optimize", /// INSERT lots of rows is too heavy for TSan
+        "01017_uniqCombined_memory_usage" /// Fine thresholds on memory usage
     ],
     "address-sanitizer": [
         "00877",
@@ -27,7 +28,8 @@
         "01103_check_cpu_instructions_at_startup",
         "01473_event_time_microseconds",
         "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers
-        "01193_metadata_loading"
+        "01193_metadata_loading",
+        "01017_uniqCombined_memory_usage" /// Fine thresholds on memory usage
     ],
     "ub-sanitizer": [
         "capnproto",
@@ -48,7 +50,8 @@
         "00877_memory_limit_for_new_delete", /// memory limits don't work correctly under msan because it replaces malloc/free
         "01473_event_time_microseconds",
         "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers
-        "01193_metadata_loading"
+        "01193_metadata_loading",
+        "01017_uniqCombined_memory_usage" /// Fine thresholds on memory usage
     ],
     "debug-build": [
         "query_profiler",

From f37631830f8139a68c42111c11584956f992630a Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sat, 20 Feb 2021 16:45:25 +0800
Subject: [PATCH 1197/1238] Comments

---
 src/Interpreters/FunctionNameNormalizer.cpp | 4 ++++
 src/Interpreters/ya.make                    | 1 +
 src/Server/TCPHandler.cpp                   | 4 +++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/FunctionNameNormalizer.cpp b/src/Interpreters/FunctionNameNormalizer.cpp
index 36ccc9340ea..255f4d8c6bb 100644
--- a/src/Interpreters/FunctionNameNormalizer.cpp
+++ b/src/Interpreters/FunctionNameNormalizer.cpp
@@ -14,6 +14,8 @@ void FunctionNameNormalizer::visit(IAST * ast)
     if (!ast)
         return;
 
+    // Normalize only selected children. Avoid normalizing engine clause because some engine might
+    // have the same name as function, e.g. Log.
     if (auto * node_storage = ast->as<ASTStorage>())
     {
         visit(node_storage->partition_by);
@@ -24,6 +26,8 @@ void FunctionNameNormalizer::visit(IAST * ast)
         return;
     }
 
+    // Normalize only selected children. Avoid normalizing type clause because some type might
+    // have the same name as function, e.g. Date.
     if (auto * node_decl = ast->as<ASTColumnDeclaration>())
     {
         visit(node_decl->default_expression.get());
diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make
index cd4980927e4..e7882ec8d98 100644
--- a/src/Interpreters/ya.make
+++ b/src/Interpreters/ya.make
@@ -58,6 +58,7 @@ SRCS(
     ExternalModelsLoader.cpp
     ExtractExpressionInfoVisitor.cpp
     FillingRow.cpp
+    FunctionNameNormalizer.cpp
     HashJoin.cpp
     IExternalLoadable.cpp
     IInterpreter.cpp
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 9794a86d3e3..d2ce2a409a9 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1133,7 +1133,9 @@ void TCPHandler::receiveQuery()
     }
     query_context->applySettingsChanges(settings_changes);
 
-    /// Disable function name normalization it's a secondary query.
+    /// Disable function name normalization when it's a secondary query, because queries are either
+    /// already normalized on initiator node, or not normalized and should remain unnormalized for
+    /// compatibility.
     if (client_info.query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
     {
         query_context->setSetting("normalize_function_names", Field(0));

From 863c0992540c68b781b393a35d8c8f47dddbdd20 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <avtokmakov@yandex-team.ru>
Date: Sat, 20 Feb 2021 15:56:28 +0300
Subject: [PATCH 1198/1238] fix

---
 docker/test/fasttest/run.sh                                   | 2 +-
 src/Databases/DatabaseReplicatedWorker.h                      | 2 +-
 ...ference => 01541_max_memory_usage_for_user_long.reference} | 0
 ...ge_for_user.sh => 01541_max_memory_usage_for_user_long.sh} | 0
 tests/queries/skip_list.json                                  | 4 ++--
 5 files changed, 4 insertions(+), 4 deletions(-)
 rename tests/queries/0_stateless/{01541_max_memory_usage_for_user.reference => 01541_max_memory_usage_for_user_long.reference} (100%)
 rename tests/queries/0_stateless/{01541_max_memory_usage_for_user.sh => 01541_max_memory_usage_for_user_long.sh} (100%)

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 1c5f62a9e46..c9c8cb1382d 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -326,7 +326,7 @@ function run_tests
         # Look at DistributedFilesToInsert, so cannot run in parallel.
         01460_DistributedFilesToInsert
 
-        01541_max_memory_usage_for_user
+        01541_max_memory_usage_for_user_long
 
         # Require python libraries like scipy, pandas and numpy
         01322_ttest_scipy
diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h
index 6dd8dc408d7..6ba46a98bca 100644
--- a/src/Databases/DatabaseReplicatedWorker.h
+++ b/src/Databases/DatabaseReplicatedWorker.h
@@ -14,7 +14,7 @@ class DatabaseReplicated;
 /// 3. After creation of an entry in DDL queue initiator tries to execute the entry locally
 ///    and other hosts wait for query to finish on initiator host.
 ///    If query succeed on initiator, then all hosts must execute it, so they will retry until query succeed.
-///    We assume that cluster is homogenous, so if replicas are in consistent state and query succeed on one host,
+///    We assume that cluster is homogeneous, so if replicas are in consistent state and query succeed on one host,
 ///    then all hosts can execute it (maybe after several retries).
 /// 4. Each database replica stores its log pointer in ZooKeeper. Cleanup thread removes old entry
 ///    if its number < max_log_ptr - logs_to_keep.
diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user.reference b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.reference
similarity index 100%
rename from tests/queries/0_stateless/01541_max_memory_usage_for_user.reference
rename to tests/queries/0_stateless/01541_max_memory_usage_for_user_long.reference
diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh
similarity index 100%
rename from tests/queries/0_stateless/01541_max_memory_usage_for_user.sh
rename to tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index e6bb3747fb0..77c4d487082 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -440,7 +440,7 @@
         "01530_drop_database_atomic_sync",
         "01532_execute_merges_on_single_replica",
         "01532_primary_key_without_order_by_zookeeper",
-        "01541_max_memory_usage_for_user",
+        "01541_max_memory_usage_for_user_long",
         "01551_mergetree_read_in_order_spread",
         "01552_dict_fixedstring",
         "01554_bloom_filter_index_big_integer_uuid",
@@ -717,7 +717,7 @@
         "01527_clickhouse_local_optimize",
         "01527_dist_sharding_key_dictGet_reload",
         "01530_drop_database_atomic_sync",
-        "01541_max_memory_usage_for_user",
+        "01541_max_memory_usage_for_user_long",
         "01542_dictionary_load_exception_race",
         "01575_disable_detach_table_of_dictionary",
         "01593_concurrent_alter_mutations_kill",

From fe159de141bd47ae1915fea24ad520d71ae6a9a3 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Sat, 20 Feb 2021 19:30:27 +0300
Subject: [PATCH 1199/1238] Update version_date.tsv after release 21.2.4.6

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index d0d782e77ec..f7035ebb506 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v21.2.4.6-stable	2021-02-20
 v21.2.3.15-stable	2021-02-14
 v21.2.2.8-stable	2021-02-07
 v21.1.4.46-stable	2021-02-14

From 4fa822dd287cb699e170da2941effb3c89c7f0ea Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Sat, 20 Feb 2021 20:21:55 +0300
Subject: [PATCH 1200/1238] Update version_date.tsv after release 21.1.5.4

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index f7035ebb506..1ccf3c66580 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,6 +1,7 @@
 v21.2.4.6-stable	2021-02-20
 v21.2.3.15-stable	2021-02-14
 v21.2.2.8-stable	2021-02-07
+v21.1.5.4-stable	2021-02-20
 v21.1.4.46-stable	2021-02-14
 v21.1.3.32-stable	2021-02-03
 v21.1.2.15-stable	2021-01-18

From e49d90405cac621c35698443d69b8a2de887a9da Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Sat, 20 Feb 2021 20:39:18 +0300
Subject: [PATCH 1201/1238] Update version_date.tsv after release 20.12.7.3

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 1ccf3c66580..b0abdaab087 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -5,6 +5,7 @@ v21.1.5.4-stable	2021-02-20
 v21.1.4.46-stable	2021-02-14
 v21.1.3.32-stable	2021-02-03
 v21.1.2.15-stable	2021-01-18
+v20.12.7.3-stable	2021-02-20
 v20.12.6.29-stable	2021-02-14
 v20.12.5.18-stable	2021-02-03
 v20.12.5.14-stable	2020-12-28

From 64e76a4a8da87adb374ffeb571fe76eac4850ae8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 20 Feb 2021 21:13:36 +0300
Subject: [PATCH 1202/1238] Minor changes in Decimal

---
 src/Core/DecimalComparison.h                  |  2 +-
 src/Core/DecimalFunctions.h                   | 24 +++++++++----------
 src/Core/MySQL/MySQLReplication.cpp           |  6 ++---
 src/DataTypes/DataTypeDateTime64.cpp          |  4 ++--
 src/DataTypes/DataTypeDecimalBase.h           | 10 ++++----
 src/DataTypes/DataTypesDecimal.cpp            |  2 +-
 src/DataTypes/DataTypesDecimal.h              |  2 +-
 src/DataTypes/convertMySQLDataType.cpp        |  6 ++---
 .../fetchPostgreSQLTableStructure.cpp         |  8 +++----
 src/Functions/array/arrayAggregation.cpp      |  2 +-
 src/Functions/array/arrayCumSum.cpp           |  2 +-
 .../array/arrayCumSumNonNegative.cpp          |  2 +-
 src/Functions/isDecimalOverflow.cpp           |  2 +-
 src/IO/WriteHelpers.h                         | 20 ++++++++--------
 14 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/src/Core/DecimalComparison.h b/src/Core/DecimalComparison.h
index 8279d01d35a..486c2c1f8f4 100644
--- a/src/Core/DecimalComparison.h
+++ b/src/Core/DecimalComparison.h
@@ -78,7 +78,7 @@ public:
 
     static bool compare(A a, B b, UInt32 scale_a, UInt32 scale_b)
     {
-        static const UInt32 max_scale = DecimalUtils::maxPrecision<Decimal256>();
+        static const UInt32 max_scale = DecimalUtils::max_precision<Decimal256>;
         if (scale_a > max_scale || scale_b > max_scale)
             throw Exception("Bad scale of decimal field", ErrorCodes::DECIMAL_OVERFLOW);
 
diff --git a/src/Core/DecimalFunctions.h b/src/Core/DecimalFunctions.h
index 2b916cbf538..355cf1d378a 100644
--- a/src/Core/DecimalFunctions.h
+++ b/src/Core/DecimalFunctions.h
@@ -24,13 +24,13 @@ namespace ErrorCodes
 namespace DecimalUtils
 {
 
-static constexpr size_t minPrecision() { return 1; }
-template <typename T> static constexpr size_t maxPrecision() { return 0; }
-template <> constexpr size_t maxPrecision<Decimal32>() { return 9; }
-template <> constexpr size_t maxPrecision<Decimal64>() { return 18; }
-template <> constexpr size_t maxPrecision<DateTime64>() { return 18; }
-template <> constexpr size_t maxPrecision<Decimal128>() { return 38; }
-template <> constexpr size_t maxPrecision<Decimal256>() { return 76; }
+inline constexpr size_t min_precision = 1;
+template <typename T> inline constexpr size_t max_precision = 0;
+template <> inline constexpr size_t max_precision<Decimal32> = 9;
+template <> inline constexpr size_t max_precision<Decimal64> = 18;
+template <> inline constexpr size_t max_precision<DateTime64> = 18;
+template <> inline constexpr size_t max_precision<Decimal128> = 38;
+template <> inline constexpr size_t max_precision<Decimal256> = 76;
 
 template <typename T>
 inline auto scaleMultiplier(UInt32 scale)
@@ -87,7 +87,7 @@ struct DataTypeDecimalTrait
   *
   * Sign of `whole` controls sign of result: negative whole => negative result, positive whole => positive result.
   * Sign of `fractional` is expected to be positive, otherwise result is undefined.
-  * If `scale` is to big (scale > maxPrecision<DecimalType::NativeType>), result is undefined.
+  * If `scale` is to big (scale > max_precision<DecimalType::NativeType>), result is undefined.
   */
 template <typename DecimalType>
 inline DecimalType decimalFromComponentsWithMultiplier(
@@ -287,21 +287,21 @@ inline auto binaryOpResult(const DecimalType<T> & tx, const DecimalType<U> & ty)
         scale = (tx.getScale() > ty.getScale() ? tx.getScale() : ty.getScale());
 
     if constexpr (sizeof(T) < sizeof(U))
-        return DataTypeDecimalTrait<U>(DecimalUtils::maxPrecision<U>(), scale);
+        return DataTypeDecimalTrait<U>(DecimalUtils::max_precision<U>, scale);
     else
-        return DataTypeDecimalTrait<T>(DecimalUtils::maxPrecision<T>(), scale);
+        return DataTypeDecimalTrait<T>(DecimalUtils::max_precision<T>, scale);
 }
 
 template <bool, bool, typename T, typename U, template <typename> typename DecimalType>
 inline const DataTypeDecimalTrait<T> binaryOpResult(const DecimalType<T> & tx, const DataTypeNumber<U> &)
 {
-    return DataTypeDecimalTrait<T>(DecimalUtils::maxPrecision<T>(), tx.getScale());
+    return DataTypeDecimalTrait<T>(DecimalUtils::max_precision<T>, tx.getScale());
 }
 
 template <bool, bool, typename T, typename U, template <typename> typename DecimalType>
 inline const DataTypeDecimalTrait<U> binaryOpResult(const DataTypeNumber<T> &, const DecimalType<U> & ty)
 {
-    return DataTypeDecimalTrait<U>(DecimalUtils::maxPrecision<U>(), ty.getScale());
+    return DataTypeDecimalTrait<U>(DecimalUtils::max_precision<U>, ty.getScale());
 }
 
 }
diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp
index 8fdf337c849..1b202c4edb4 100644
--- a/src/Core/MySQL/MySQLReplication.cpp
+++ b/src/Core/MySQL/MySQLReplication.cpp
@@ -475,11 +475,11 @@ namespace MySQLReplication
                     {
                         const auto & dispatch = [](const size_t & precision, const size_t & scale, const auto & function) -> Field
                         {
-                            if (precision <= DecimalUtils::maxPrecision<Decimal32>())
+                            if (precision <= DecimalUtils::max_precision<Decimal32>)
                                 return Field(function(precision, scale, Decimal32()));
-                            else if (precision <= DecimalUtils::maxPrecision<Decimal64>())
+                            else if (precision <= DecimalUtils::max_precision<Decimal64>)
                                 return Field(function(precision, scale, Decimal64()));
-                            else if (precision <= DecimalUtils::maxPrecision<Decimal128>())
+                            else if (precision <= DecimalUtils::max_precision<Decimal128>)
                                 return Field(function(precision, scale, Decimal128()));
 
                             return Field(function(precision, scale, Decimal256()));
diff --git a/src/DataTypes/DataTypeDateTime64.cpp b/src/DataTypes/DataTypeDateTime64.cpp
index 09e39c2de1a..17b94e871bf 100644
--- a/src/DataTypes/DataTypeDateTime64.cpp
+++ b/src/DataTypes/DataTypeDateTime64.cpp
@@ -28,7 +28,7 @@ namespace ErrorCodes
 static constexpr UInt32 max_scale = 9;
 
 DataTypeDateTime64::DataTypeDateTime64(UInt32 scale_, const std::string & time_zone_name)
-    : DataTypeDecimalBase<DateTime64>(DecimalUtils::maxPrecision<DateTime64>(), scale_),
+    : DataTypeDecimalBase<DateTime64>(DecimalUtils::max_precision<DateTime64>, scale_),
       TimezoneMixin(time_zone_name)
 {
     if (scale > max_scale)
@@ -37,7 +37,7 @@ DataTypeDateTime64::DataTypeDateTime64(UInt32 scale_, const std::string & time_z
 }
 
 DataTypeDateTime64::DataTypeDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_info)
-    : DataTypeDecimalBase<DateTime64>(DecimalUtils::maxPrecision<DateTime64>(), scale_),
+    : DataTypeDecimalBase<DateTime64>(DecimalUtils::max_precision<DateTime64>, scale_),
       TimezoneMixin(time_zone_info)
 {
     if (scale > max_scale)
diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h
index c861b3bcac0..d9079166fa7 100644
--- a/src/DataTypes/DataTypeDecimalBase.h
+++ b/src/DataTypes/DataTypeDecimalBase.h
@@ -65,7 +65,7 @@ public:
 
     static constexpr bool is_parametric = true;
 
-    static constexpr size_t maxPrecision() { return DecimalUtils::maxPrecision<T>(); }
+    static constexpr size_t maxPrecision() { return DecimalUtils::max_precision<T>; }
 
     DataTypeDecimalBase(UInt32 precision_, UInt32 scale_)
     :   precision(precision_),
@@ -197,17 +197,17 @@ inline const DecimalType<U> decimalResultType(const DataTypeNumber<T> & tx, cons
 template <template <typename> typename DecimalType>
 inline DataTypePtr createDecimal(UInt64 precision_value, UInt64 scale_value)
 {
-    if (precision_value < DecimalUtils::minPrecision() || precision_value > DecimalUtils::maxPrecision<Decimal256>())
+    if (precision_value < DecimalUtils::min_precision || precision_value > DecimalUtils::max_precision<Decimal256>)
         throw Exception("Wrong precision", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
 
     if (static_cast<UInt64>(scale_value) > precision_value)
         throw Exception("Negative scales and scales larger than precision are not supported", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
 
-    if (precision_value <= DecimalUtils::maxPrecision<Decimal32>())
+    if (precision_value <= DecimalUtils::max_precision<Decimal32>)
         return std::make_shared<DecimalType<Decimal32>>(precision_value, scale_value);
-    else if (precision_value <= DecimalUtils::maxPrecision<Decimal64>())
+    else if (precision_value <= DecimalUtils::max_precision<Decimal64>)
         return std::make_shared<DecimalType<Decimal64>>(precision_value, scale_value);
-    else if (precision_value <= DecimalUtils::maxPrecision<Decimal128>())
+    else if (precision_value <= DecimalUtils::max_precision<Decimal128>)
        return std::make_shared<DecimalType<Decimal128>>(precision_value, scale_value);
     return std::make_shared<DecimalType<Decimal256>>(precision_value, scale_value);
 }
diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp
index e174a242462..160e09d92d8 100644
--- a/src/DataTypes/DataTypesDecimal.cpp
+++ b/src/DataTypes/DataTypesDecimal.cpp
@@ -141,7 +141,7 @@ static DataTypePtr createExact(const ASTPtr & arguments)
     if (!scale_arg || !(scale_arg->value.getType() == Field::Types::Int64 || scale_arg->value.getType() == Field::Types::UInt64))
         throw Exception("Decimal data type family must have a two numbers as its arguments", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
-    UInt64 precision = DecimalUtils::maxPrecision<T>();
+    UInt64 precision = DecimalUtils::max_precision<T>;
     UInt64 scale = scale_arg->value.get<UInt64>();
 
     return createDecimal<DataTypeDecimal>(precision, scale);
diff --git a/src/DataTypes/DataTypesDecimal.h b/src/DataTypes/DataTypesDecimal.h
index 08f44c60c41..2b708b53be0 100644
--- a/src/DataTypes/DataTypesDecimal.h
+++ b/src/DataTypes/DataTypesDecimal.h
@@ -270,7 +270,7 @@ tryConvertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale
 template <typename T>
 inline DataTypePtr createDecimalMaxPrecision(UInt64 scale)
 {
-    return std::make_shared<DataTypeDecimal<T>>(DecimalUtils::maxPrecision<T>(), scale);
+    return std::make_shared<DataTypeDecimal<T>>(DecimalUtils::max_precision<T>, scale);
 }
 
 }
diff --git a/src/DataTypes/convertMySQLDataType.cpp b/src/DataTypes/convertMySQLDataType.cpp
index b266a9f035f..5684503e34a 100644
--- a/src/DataTypes/convertMySQLDataType.cpp
+++ b/src/DataTypes/convertMySQLDataType.cpp
@@ -103,11 +103,11 @@ DataTypePtr convertMySQLDataType(MultiEnum<MySQLDataTypesSupport> type_support,
     }
     else if (type_support.isSet(MySQLDataTypesSupport::DECIMAL) && (type_name == "numeric" || type_name == "decimal"))
     {
-        if (precision <= DecimalUtils::maxPrecision<Decimal32>())
+        if (precision <= DecimalUtils::max_precision<Decimal32>)
             res = std::make_shared<DataTypeDecimal<Decimal32>>(precision, scale);
-        else if (precision <= DecimalUtils::maxPrecision<Decimal64>())
+        else if (precision <= DecimalUtils::max_precision<Decimal64>)
             res = std::make_shared<DataTypeDecimal<Decimal64>>(precision, scale);
-        else if (precision <= DecimalUtils::maxPrecision<Decimal128>())
+        else if (precision <= DecimalUtils::max_precision<Decimal128>)
             res = std::make_shared<DataTypeDecimal<Decimal128>>(precision, scale);
     }
 
diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index ec23cfc8794..a6e5ded3efd 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -59,13 +59,13 @@ static DataTypePtr convertPostgreSQLDataType(std::string & type, bool is_nullabl
         uint32_t precision = getDecimalPrecision(*res);
         uint32_t scale = getDecimalScale(*res);
 
-        if (precision <= DecimalUtils::maxPrecision<Decimal32>())
+        if (precision <= DecimalUtils::max_precision<Decimal32>)
             res = std::make_shared<DataTypeDecimal<Decimal32>>(precision, scale);
-        else if (precision <= DecimalUtils::maxPrecision<Decimal64>())
+        else if (precision <= DecimalUtils::max_precision<Decimal64>)
             res = std::make_shared<DataTypeDecimal<Decimal64>>(precision, scale);
-        else if (precision <= DecimalUtils::maxPrecision<Decimal128>())
+        else if (precision <= DecimalUtils::max_precision<Decimal128>)
             res = std::make_shared<DataTypeDecimal<Decimal128>>(precision, scale);
-        else if (precision <= DecimalUtils::maxPrecision<Decimal256>())
+        else if (precision <= DecimalUtils::max_precision<Decimal256>)
             res = std::make_shared<DataTypeDecimal<Decimal256>>(precision, scale);
     }
 
diff --git a/src/Functions/array/arrayAggregation.cpp b/src/Functions/array/arrayAggregation.cpp
index 40afd657abb..e0e246b8af4 100644
--- a/src/Functions/array/arrayAggregation.cpp
+++ b/src/Functions/array/arrayAggregation.cpp
@@ -103,7 +103,7 @@ struct ArrayAggregateImpl
             {
                 using DecimalReturnType = ArrayAggregateResult<typename DataType::FieldType, aggregate_operation>;
                 UInt32 scale = getDecimalScale(*expression_return);
-                result = std::make_shared<DataTypeDecimal<DecimalReturnType>>(DecimalUtils::maxPrecision<DecimalReturnType>(), scale);
+                result = std::make_shared<DataTypeDecimal<DecimalReturnType>>(DecimalUtils::max_precision<DecimalReturnType>, scale);
 
                 return true;
             }
diff --git a/src/Functions/array/arrayCumSum.cpp b/src/Functions/array/arrayCumSum.cpp
index 96001901a6e..9a6eafb8822 100644
--- a/src/Functions/array/arrayCumSum.cpp
+++ b/src/Functions/array/arrayCumSum.cpp
@@ -37,7 +37,7 @@ struct ArrayCumSumImpl
         if (which.isDecimal())
         {
             UInt32 scale = getDecimalScale(*expression_return);
-            DataTypePtr nested = std::make_shared<DataTypeDecimal<Decimal128>>(DecimalUtils::maxPrecision<Decimal128>(), scale);
+            DataTypePtr nested = std::make_shared<DataTypeDecimal<Decimal128>>(DecimalUtils::max_precision<Decimal128>, scale);
             return std::make_shared<DataTypeArray>(nested);
         }
 
diff --git a/src/Functions/array/arrayCumSumNonNegative.cpp b/src/Functions/array/arrayCumSumNonNegative.cpp
index 148d4283701..2c7362a1605 100644
--- a/src/Functions/array/arrayCumSumNonNegative.cpp
+++ b/src/Functions/array/arrayCumSumNonNegative.cpp
@@ -40,7 +40,7 @@ struct ArrayCumSumNonNegativeImpl
         if (which.isDecimal())
         {
             UInt32 scale = getDecimalScale(*expression_return);
-            DataTypePtr nested = std::make_shared<DataTypeDecimal<Decimal128>>(DecimalUtils::maxPrecision<Decimal128>(), scale);
+            DataTypePtr nested = std::make_shared<DataTypeDecimal<Decimal128>>(DecimalUtils::max_precision<Decimal128>, scale);
             return std::make_shared<DataTypeArray>(nested);
         }
 
diff --git a/src/Functions/isDecimalOverflow.cpp b/src/Functions/isDecimalOverflow.cpp
index c4c02e3763e..9104b1f1dee 100644
--- a/src/Functions/isDecimalOverflow.cpp
+++ b/src/Functions/isDecimalOverflow.cpp
@@ -133,7 +133,7 @@ private:
         static_assert(IsDecimalNumber<T>);
         using NativeT = typename T::NativeType;
 
-        if (precision > DecimalUtils::maxPrecision<T>())
+        if (precision > DecimalUtils::max_precision<T>)
             return false;
 
         NativeT pow10 = intExp10OfSize<NativeT>(precision);
diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h
index a37a5b5ddc6..39666936c68 100644
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@@ -709,7 +709,7 @@ inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf)
 template<typename DecimalType>
 inline void writeDecimalTypeFractionalText(typename DecimalType::NativeType fractional, UInt32 scale, WriteBuffer & buf)
 {
-    static constexpr UInt32 MaxScale = DecimalUtils::maxPrecision<DecimalType>();
+    static constexpr UInt32 MaxScale = DecimalUtils::max_precision<DecimalType>;
 
     char data[20] = {'0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'};
     static_assert(sizeof(data) >= MaxScale);
@@ -831,19 +831,19 @@ inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTI
 template <char date_delimeter = '-', char time_delimeter = ':', char between_date_time_delimiter = ' ', char fractional_time_delimiter = '.'>
 inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance())
 {
-    static constexpr UInt32 MaxScale = DecimalUtils::maxPrecision<DateTime64>();
+    static constexpr UInt32 MaxScale = DecimalUtils::max_precision<DateTime64>;
     scale = scale > MaxScale ? MaxScale : scale;
 
-    auto c = DecimalUtils::split(datetime64, scale);
-    const auto & values = date_lut.getValues(c.whole);
+    auto components = DecimalUtils::split(datetime64, scale);
+    const auto & values = date_lut.getValues(components.whole);
     writeDateTimeText<date_delimeter, time_delimeter, between_date_time_delimiter>(
         LocalDateTime(values.year, values.month, values.day_of_month,
-            date_lut.toHour(c.whole), date_lut.toMinute(c.whole), date_lut.toSecond(c.whole)), buf);
+            date_lut.toHour(components.whole), date_lut.toMinute(components.whole), date_lut.toSecond(components.whole)), buf);
 
     if (scale > 0)
     {
         buf.write(fractional_time_delimiter);
-        writeDecimalTypeFractionalText<DateTime64>(c.fractional, scale, buf);
+        writeDecimalTypeFractionalText<DateTime64>(components.fractional, scale, buf);
     }
 }
 
@@ -887,16 +887,16 @@ inline void writeDateTimeTextISO(DateTime64 datetime64, UInt32 scale, WriteBuffe
 
 inline void writeDateTimeUnixTimestamp(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf)
 {
-    static constexpr UInt32 MaxScale = DecimalUtils::maxPrecision<DateTime64>();
+    static constexpr UInt32 MaxScale = DecimalUtils::max_precision<DateTime64>;
     scale = scale > MaxScale ? MaxScale : scale;
 
-    auto c = DecimalUtils::split(datetime64, scale);
-    writeIntText(c.whole, buf);
+    auto components = DecimalUtils::split(datetime64, scale);
+    writeIntText(components.whole, buf);
 
     if (scale > 0)
     {
         buf.write('.');
-        writeDecimalTypeFractionalText<DateTime64>(c.fractional, scale, buf);
+        writeDecimalTypeFractionalText<DateTime64>(components.fractional, scale, buf);
     }
 }
 

From 3248bf4e2fe4ac469f6f9689946ca3b998615620 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@yandex-team.ru>
Date: Sat, 20 Feb 2021 21:46:27 +0300
Subject: [PATCH 1203/1238] Update version_date.tsv after release 20.8.13.15

---
 utils/list-versions/version_date.tsv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index b0abdaab087..4ba92864020 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -30,6 +30,7 @@ v20.9.5.5-stable	2020-11-13
 v20.9.4.76-stable	2020-10-29
 v20.9.3.45-stable	2020-10-09
 v20.9.2.20-stable	2020-09-22
+v20.8.13.15-lts	2021-02-20
 v20.8.12.2-lts	2021-01-16
 v20.8.11.17-lts	2020-12-25
 v20.8.10.13-lts	2020-12-24

From 6873ad19839e61f0233de80dab7bd52da6fd0254 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 21:04:19 +0300
Subject: [PATCH 1204/1238] tests/integration: fix yamllint issues in
 docker-compose.yaml for mysql

---
 .../docker_compose_mysql_5_7_for_materialize_mysql.yml    | 7 +++++--
 .../docker_compose_mysql_8_0_for_materialize_mysql.yml    | 8 ++++++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml
index e7d762203ee..f238c1aad56 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml
@@ -6,5 +6,8 @@ services:
         environment:
             MYSQL_ROOT_PASSWORD: clickhouse
         ports:
-          - 3308:3306
-        command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
+            - 3308:3306
+        command: --server_id=100 --log-bin='mysql-bin-1.log'
+            --default-time-zone='+3:00'
+            --gtid-mode="ON"
+            --enforce-gtid-consistency
diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml
index 918a2b5f80f..4e4648ee960 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml
@@ -6,5 +6,9 @@ services:
         environment:
             MYSQL_ROOT_PASSWORD: clickhouse
         ports:
-          - 33308:3306
-        command: --server_id=100 --log-bin='mysql-bin-1.log' --default_authentication_plugin='mysql_native_password' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
+            - 33308:3306
+        command: --server_id=100 --log-bin='mysql-bin-1.log'
+            --default_authentication_plugin='mysql_native_password'
+            --default-time-zone='+3:00'
+            --gtid-mode="ON"
+            --enforce-gtid-consistency

From 80f448a466c2d450e253443004f905c55c8b9907 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 21:04:19 +0300
Subject: [PATCH 1205/1238] tests/integration: preserve mysql docker logs in
 test_materialize_mysql_database

---
 tests/integration/helpers/cluster.py             |  4 ++--
 .../test_materialize_mysql_database/test.py      | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index aaba3a34555..16ceb823f2e 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -44,8 +44,8 @@ def _create_env_file(path, variables, fname=DEFAULT_ENV_NAME):
             f.write("=".join([var, value]) + "\n")
     return full_path
 
-def run_and_check(args, env=None, shell=False):
-    res = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, shell=shell)
+def run_and_check(args, env=None, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE):
+    res = subprocess.run(args, stdout=stdout, stderr=stderr, env=env, shell=shell)
     if res.returncode != 0:
         # check_call(...) from subprocess does not print stderr, so we do it manually
         print('Stderr:\n{}\n'.format(res.stderr.decode('utf-8')))
diff --git a/tests/integration/test_materialize_mysql_database/test.py b/tests/integration/test_materialize_mysql_database/test.py
index e55772d9e1d..58b20ad3139 100644
--- a/tests/integration/test_materialize_mysql_database/test.py
+++ b/tests/integration/test_materialize_mysql_database/test.py
@@ -37,6 +37,12 @@ class MySQLNodeInstance:
         self.docker_compose = docker_compose
         self.project_name = project_name
 
+        self.base_dir = p.dirname(__file__)
+        self.instances_dir = p.join(self.base_dir, '_instances_mysql')
+        if not os.path.exists(self.instances_dir):
+            os.mkdir(self.instances_dir)
+        self.docker_logs_path = p.join(self.instances_dir, 'docker_mysql.log')
+
 
     def alloc_connection(self):
         if self.mysql_connection is None:
@@ -75,6 +81,16 @@ class MySQLNodeInstance:
         if self.mysql_connection is not None:
             self.mysql_connection.close()
 
+        with open(self.docker_logs_path, "w+") as f:
+            try:
+                run_and_check([
+                    'docker-compose',
+                    '-p', cluster.project_name,
+                    '-f', self.docker_compose, 'logs',
+                ], stdout=f)
+            except Exception as e:
+                print("Unable to get logs from docker mysql.")
+
     def wait_mysql_to_start(self, timeout=60):
         start = time.time()
         while time.time() - start < timeout:

From d343060dd5ccd521329af546d4157775ab01bfb6 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 21:04:19 +0300
Subject: [PATCH 1206/1238] tests/integration: add start_and_wait() in
 test_materialize_mysql_database

---
 .../test_materialize_mysql_database/test.py      | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_materialize_mysql_database/test.py b/tests/integration/test_materialize_mysql_database/test.py
index 58b20ad3139..ced9a978d02 100644
--- a/tests/integration/test_materialize_mysql_database/test.py
+++ b/tests/integration/test_materialize_mysql_database/test.py
@@ -77,6 +77,14 @@ class MySQLNodeInstance:
             cursor.execute(executio_query)
             return cursor.fetchall()
 
+    def start_and_wait(self):
+        run_and_check(['docker-compose',
+            '-p', cluster.project_name,
+            '-f', self.docker_compose,
+            'up', '--no-recreate', '-d',
+        ])
+        self.wait_mysql_to_start(120)
+
     def close(self):
         if self.mysql_connection is not None:
             self.mysql_connection.close()
@@ -111,9 +119,7 @@ def started_mysql_5_7():
     mysql_node = MySQLNodeInstance('root', 'clickhouse', '127.0.0.1', 3308, docker_compose)
 
     try:
-        run_and_check(
-            ['docker-compose', '-p', cluster.project_name, '-f', docker_compose, 'up', '--no-recreate', '-d'])
-        mysql_node.wait_mysql_to_start(120)
+        mysql_node.start_and_wait()
         yield mysql_node
     finally:
         mysql_node.close()
@@ -127,9 +133,7 @@ def started_mysql_8_0():
     mysql_node = MySQLNodeInstance('root', 'clickhouse', '127.0.0.1', 33308, docker_compose)
 
     try:
-        run_and_check(
-            ['docker-compose', '-p', cluster.project_name, '-f', docker_compose, 'up', '--no-recreate', '-d'])
-        mysql_node.wait_mysql_to_start(120)
+        mysql_node.start_and_wait()
         yield mysql_node
     finally:
         mysql_node.close()

From 4784d09d1701c44781c3441efed5506f0d14a9dc Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 21:04:20 +0300
Subject: [PATCH 1207/1238] tests/integration: increase verbosity for
 test_materialize_mysql_database

---
 .../compose/docker_compose_mysql_5_7_for_materialize_mysql.yml   | 1 +
 .../compose/docker_compose_mysql_8_0_for_materialize_mysql.yml   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml
index f238c1aad56..5aa13ba91c7 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml
@@ -11,3 +11,4 @@ services:
             --default-time-zone='+3:00'
             --gtid-mode="ON"
             --enforce-gtid-consistency
+            --log-error-verbosity=3
diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml
index 4e4648ee960..7c8a930c84e 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml
@@ -12,3 +12,4 @@ services:
             --default-time-zone='+3:00'
             --gtid-mode="ON"
             --enforce-gtid-consistency
+            --log-error-verbosity=3

From bde3439859cc7c566df036dacd5adfa63a20d828 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 21:04:19 +0300
Subject: [PATCH 1208/1238] Add yamllint configuration

---
 .yamllint | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 .yamllint

diff --git a/.yamllint b/.yamllint
new file mode 100644
index 00000000000..ea04e9981d9
--- /dev/null
+++ b/.yamllint
@@ -0,0 +1,17 @@
+# vi: ft=yaml
+extends: default
+
+rules:
+    indentation:
+        level: warning
+        indent-sequences: consistent
+    line-length:
+        # there are some bash -c "", so this is OK
+        max: 300
+        level: warning
+    comments:
+        min-spaces-from-content: 1
+    document-start:
+        present: false
+    truthy:
+        check-keys: false

From d909adc8b930b6589f2c1e38ca929761c35b8402 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 22:03:33 +0300
Subject: [PATCH 1209/1238] Add yamllint into style docker image

---
 docker/test/style/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile
index 74af8eafc17..f97087a782a 100644
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@@ -1,7 +1,7 @@
 # docker build -t yandex/clickhouse-style-test .
 FROM ubuntu:20.04
 
-RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip pylint && pip3 install codespell
+RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip pylint yamllint && pip3 install codespell
 
 
 # For |& syntax

From e1bc8bb5873a4446e4d0179dc319f1bff46a9cf4 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 22:18:49 +0300
Subject: [PATCH 1210/1238] Wrap lines in Dockerfile for style image

---
 docker/test/style/Dockerfile | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile
index f97087a782a..e70f9e05679 100644
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@@ -1,7 +1,14 @@
 # docker build -t yandex/clickhouse-style-test .
 FROM ubuntu:20.04
 
-RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip pylint yamllint && pip3 install codespell
+RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
+    shellcheck \
+    libxml2-utils \
+    git \
+    python3-pip \
+    pylint \
+    yamllint \
+    && pip3 install codespell
 
 
 # For |& syntax

From 2db3a6af4962331c5ad2f4e43725ed6ff068b996 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 21:52:44 +0300
Subject: [PATCH 1211/1238] Add yamllint into style check

---
 utils/check-style/check-style | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index f62c7ca5849..9336bca2474 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -72,6 +72,10 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' |
 # FIXME: for now only clickhouse-test
 pylint --rcfile=$ROOT_PATH/.pylintrc --score=n $ROOT_PATH/tests/clickhouse-test
 
+find $ROOT_PATH -name '*.yaml' -or -name '*.yml' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs yamllint --config-file=$ROOT_PATH/.yamllint
+
 # Machine translation to Russian is strictly prohibited
 find $ROOT_PATH/docs/ru -name '*.md' |
     grep -vP $EXCLUDE_DIRS |

From e9226904530afa0262a4c478ddd6d7d7d82293cf Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 20 Feb 2021 21:59:45 +0300
Subject: [PATCH 1212/1238] Fix yamllint issues

---
 .github/codecov.yml                           |  4 +-
 .github/workflows/anchore-analysis.yml        | 36 +++++------
 .potato.yml                                   | 16 ++---
 .../compose/docker_compose_cassandra.yml      |  2 +-
 .../runner/compose/docker_compose_hdfs.yml    |  4 +-
 .../runner/compose/docker_compose_kafka.yml   | 44 ++++++-------
 .../docker_compose_kerberized_hdfs.yml        | 14 ++--
 .../docker_compose_kerberized_kafka.yml       | 64 +++++++++----------
 .../runner/compose/docker_compose_mongo.yml   |  2 +-
 .../runner/compose/docker_compose_mysql.yml   |  2 +-
 ...ompose_mysql_5_7_for_materialize_mysql.yml |  2 +-
 ...ompose_mysql_8_0_for_materialize_mysql.yml |  2 +-
 .../compose/docker_compose_mysql_client.yml   |  2 +-
 .../compose/docker_compose_postgesql.yml      |  2 +-
 .../compose/docker_compose_postgres.yml       |  8 +--
 .../runner/compose/docker_compose_redis.yml   |  2 +-
 .../docker-compose/docker-compose.yml         |  2 +-
 .../example/docker-compose/docker-compose.yml |  2 +-
 .../docker-compose/docker-compose.yml         |  2 +-
 .../docker-compose/openldap-service.yml       |  3 +-
 .../docker-compose/docker-compose.yml         |  2 +-
 .../docker-compose/openldap-service.yml       |  3 +-
 .../docker-compose/docker-compose.yml         |  2 +-
 .../docker-compose/openldap-service.yml       |  3 +-
 .../rbac/docker-compose/docker-compose.yml    |  2 +-
 25 files changed, 112 insertions(+), 115 deletions(-)

diff --git a/.github/codecov.yml b/.github/codecov.yml
index 9b75efc791d..f185c5e2dcc 100644
--- a/.github/codecov.yml
+++ b/.github/codecov.yml
@@ -1,5 +1,5 @@
 codecov:
-  max_report_age: off
+  max_report_age: "off"
   strict_yaml_branch: "master"
 
 ignore:
@@ -14,4 +14,4 @@ ignore:
 comment: false
 
 github_checks:
-  annotations: false
\ No newline at end of file
+  annotations: false
diff --git a/.github/workflows/anchore-analysis.yml b/.github/workflows/anchore-analysis.yml
index 50eaf45e2ef..105c05f470c 100644
--- a/.github/workflows/anchore-analysis.yml
+++ b/.github/workflows/anchore-analysis.yml
@@ -10,7 +10,7 @@ name: Docker Container Scan (clickhouse-server)
 
 on:
   pull_request:
-    paths: 
+    paths:
       - docker/server/Dockerfile
       - .github/workflows/anchore-analysis.yml
   schedule:
@@ -20,20 +20,20 @@ jobs:
   Anchore-Build-Scan:
     runs-on: ubuntu-latest
     steps:
-    - name: Checkout the code
-      uses: actions/checkout@v2
-    - name: Build the Docker image
-      run: |
-        cd docker/server
-        perl -pi -e 's|=\$version||g' Dockerfile
-        docker build . --file Dockerfile --tag localbuild/testimage:latest      
-    - name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled
-      uses: anchore/scan-action@v2
-      id: scan
-      with:
-        image: "localbuild/testimage:latest"
-        acs-report-enable: true
-    - name: Upload Anchore Scan Report
-      uses: github/codeql-action/upload-sarif@v1
-      with:
-        sarif_file: ${{ steps.scan.outputs.sarif }}
+      - name: Checkout the code
+        uses: actions/checkout@v2
+      - name: Build the Docker image
+        run: |
+          cd docker/server
+          perl -pi -e 's|=\$version||g' Dockerfile
+          docker build . --file Dockerfile --tag localbuild/testimage:latest
+      - name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled
+        uses: anchore/scan-action@v2
+        id: scan
+        with:
+          image: "localbuild/testimage:latest"
+          acs-report-enable: true
+      - name: Upload Anchore Scan Report
+        uses: github/codeql-action/upload-sarif@v1
+        with:
+          sarif_file: ${{ steps.scan.outputs.sarif }}
diff --git a/.potato.yml b/.potato.yml
index 113bdacbdde..7cb87c58bd1 100644
--- a/.potato.yml
+++ b/.potato.yml
@@ -14,14 +14,14 @@ handlers:
         # The trigger for creating the Yandex.Tracker issue. When the specified event occurs, it transfers PR data to Yandex.Tracker.
         github:pullRequest:labeled:
           data:
-              # The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues.
-          	queue: CLICKHOUSEDOCS
-              # The issue title.
-          	summary: '[Potato] Pull Request #{{pullRequest.number}}'
-              # The issue description.
-          	description: >
+          # The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues.
+          queue: CLICKHOUSEDOCS
+          # The issue title.
+          summary: '[Potato] Pull Request #{{pullRequest.number}}'
+          # The issue description.
+          description: >
                 {{pullRequest.description}}
 
                 Ссылка на Pull Request: {{pullRequest.webUrl}}
-            # The condition for creating the Yandex.Tracker issue.
-        	condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length
+          # The condition for creating the Yandex.Tracker issue.
+          condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length
diff --git a/docker/test/integration/runner/compose/docker_compose_cassandra.yml b/docker/test/integration/runner/compose/docker_compose_cassandra.yml
index 6567a352027..c5cdfac5ce7 100644
--- a/docker/test/integration/runner/compose/docker_compose_cassandra.yml
+++ b/docker/test/integration/runner/compose/docker_compose_cassandra.yml
@@ -4,4 +4,4 @@ services:
         image: cassandra
         restart: always
         ports:
-          - 9043:9042
+            - 9043:9042
diff --git a/docker/test/integration/runner/compose/docker_compose_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_hdfs.yml
index b8cd7f64273..43dd1aa43d3 100644
--- a/docker/test/integration/runner/compose/docker_compose_hdfs.yml
+++ b/docker/test/integration/runner/compose/docker_compose_hdfs.yml
@@ -5,6 +5,6 @@ services:
         hostname: hdfs1
         restart: always
         ports:
-          - 50075:50075
-          - 50070:50070
+            - 50075:50075
+            - 50070:50070
         entrypoint: /etc/bootstrap.sh -d
diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml
index 219d977ffd9..b77542f7e11 100644
--- a/docker/test/integration/runner/compose/docker_compose_kafka.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml
@@ -5,42 +5,42 @@ services:
     image: zookeeper:3.4.9
     hostname: kafka_zookeeper
     environment:
-        ZOO_MY_ID: 1
-        ZOO_PORT: 2181
-        ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888
+      ZOO_MY_ID: 1
+      ZOO_PORT: 2181
+      ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888
     security_opt:
-        - label:disable
+      - label:disable
 
   kafka1:
     image: confluentinc/cp-kafka:5.2.0
     hostname: kafka1
     ports:
-        - "9092:9092"
+      - "9092:9092"
     environment:
-        KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kafka1:19092
-        KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:19092
-        KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
-        KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
-        KAFKA_BROKER_ID: 1
-        KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
-        KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
-        KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
+      KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kafka1:19092
+      KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:19092
+      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
+      KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
+      KAFKA_BROKER_ID: 1
+      KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
+      KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
+      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
     depends_on:
-        - kafka_zookeeper
+      - kafka_zookeeper
     security_opt:
-        - label:disable
+      - label:disable
 
   schema-registry:
     image: confluentinc/cp-schema-registry:5.2.0
     hostname: schema-registry
     ports:
-        - "8081:8081"
+      - "8081:8081"
     environment:
-        SCHEMA_REGISTRY_HOST_NAME: schema-registry
-        SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
-        SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
+      SCHEMA_REGISTRY_HOST_NAME: schema-registry
+      SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
+      SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
     depends_on:
-        - kafka_zookeeper
-        - kafka1
+      - kafka_zookeeper
+      - kafka1
     security_opt:
-        - label:disable
+      - label:disable
diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
index f2a659bce58..e2e15975e22 100644
--- a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml
@@ -8,22 +8,22 @@ services:
     hostname: kerberizedhdfs1
     restart: always
     volumes:
-        - ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro
-        - ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf
-        - ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro
+      - ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro
+      - ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf
+      - ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro
     ports:
       - 1006:1006
       - 50070:50070
       - 9010:9010
     depends_on:
-        - hdfskerberos
+      - hdfskerberos
     entrypoint: /etc/bootstrap.sh -d
 
   hdfskerberos:
     image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG}
     hostname: hdfskerberos
     volumes:
-        - ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab
-        - ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
-        - /dev/urandom:/dev/random
+      - ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab
+      - ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
+      - /dev/urandom:/dev/random
     ports: [88, 749]
diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml
index 6e1e11344bb..64a3ef3e956 100644
--- a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml
@@ -6,54 +6,54 @@ services:
     # restart: always
     hostname: kafka_kerberized_zookeeper
     environment:
-        ZOOKEEPER_SERVER_ID: 1
-        ZOOKEEPER_CLIENT_PORT: 2181
-        ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888"
-        KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true"
+      ZOOKEEPER_SERVER_ID: 1
+      ZOOKEEPER_CLIENT_PORT: 2181
+      ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888"
+      KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true"
     volumes:
-        - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
-        - /dev/urandom:/dev/random
+      - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
+      - /dev/urandom:/dev/random
     depends_on:
-        - kafka_kerberos
+      - kafka_kerberos
     security_opt:
-        - label:disable
+      - label:disable
 
   kerberized_kafka1:
     image: confluentinc/cp-kafka:5.2.0
     # restart: always
     hostname: kerberized_kafka1
     ports:
-        - "9092:9092"
-        - "9093:9093"
+      - "9092:9092"
+      - "9093:9093"
     environment:
-        KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093
-        KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093
-        # KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092
-        # KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092
-        KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI
-        KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI
-        KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka
-        KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT,
-        KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE
-        KAFKA_BROKER_ID: 1
-        KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181"
-        KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
-        KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
-        KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true"
+      KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093
+      KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093
+      # KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092
+      # KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092
+      KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI
+      KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI
+      KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka
+      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT,
+      KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE
+      KAFKA_BROKER_ID: 1
+      KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181"
+      KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
+      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
+      KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true"
     volumes:
-        - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
-        - /dev/urandom:/dev/random
+      - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
+      - /dev/urandom:/dev/random
     depends_on:
-        - kafka_kerberized_zookeeper
-        - kafka_kerberos
+      - kafka_kerberized_zookeeper
+      - kafka_kerberos
     security_opt:
-        - label:disable
+      - label:disable
 
   kafka_kerberos:
     image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
     hostname: kafka_kerberos
     volumes:
-        - ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab
-        - ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh
-        - /dev/urandom:/dev/random
+      - ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab
+      - ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh
+      - /dev/urandom:/dev/random
     ports: [88, 749]
diff --git a/docker/test/integration/runner/compose/docker_compose_mongo.yml b/docker/test/integration/runner/compose/docker_compose_mongo.yml
index 8c54544ed88..6c98fde2303 100644
--- a/docker/test/integration/runner/compose/docker_compose_mongo.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mongo.yml
@@ -7,5 +7,5 @@ services:
             MONGO_INITDB_ROOT_USERNAME: root
             MONGO_INITDB_ROOT_PASSWORD: clickhouse
         ports:
-          - 27018:27017
+            - 27018:27017
         command: --profile=2 --verbose
diff --git a/docker/test/integration/runner/compose/docker_compose_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql.yml
index 90daf8a4238..5b15d517f37 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql.yml
@@ -6,5 +6,5 @@ services:
         environment:
             MYSQL_ROOT_PASSWORD: clickhouse
         ports:
-          - 3308:3306
+            - 3308:3306
         command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml
index e7d762203ee..509e04ef9c3 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml
@@ -6,5 +6,5 @@ services:
         environment:
             MYSQL_ROOT_PASSWORD: clickhouse
         ports:
-          - 3308:3306
+            - 3308:3306
         command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml
index 918a2b5f80f..72813552b54 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml
@@ -6,5 +6,5 @@ services:
         environment:
             MYSQL_ROOT_PASSWORD: clickhouse
         ports:
-          - 33308:3306
+            - 33308:3306
         command: --server_id=100 --log-bin='mysql-bin-1.log' --default_authentication_plugin='mysql_native_password' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_client.yml b/docker/test/integration/runner/compose/docker_compose_mysql_client.yml
index 802151c4d7b..5e4565d64c3 100644
--- a/docker/test/integration/runner/compose/docker_compose_mysql_client.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_client.yml
@@ -7,7 +7,7 @@ services:
       MYSQL_ALLOW_EMPTY_PASSWORD: 1
     command: --federated --socket /var/run/mysqld/mysqld.sock
     healthcheck:
-      test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
+      test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
       interval: 1s
       timeout: 2s
       retries: 100
diff --git a/docker/test/integration/runner/compose/docker_compose_postgesql.yml b/docker/test/integration/runner/compose/docker_compose_postgesql.yml
index 984f5f97384..90764188ddd 100644
--- a/docker/test/integration/runner/compose/docker_compose_postgesql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_postgesql.yml
@@ -11,4 +11,4 @@ services:
     ports:
       - "5433:5433"
     environment:
-      POSTGRES_HOST_AUTH_METHOD: "trust"
\ No newline at end of file
+      POSTGRES_HOST_AUTH_METHOD: "trust"
diff --git a/docker/test/integration/runner/compose/docker_compose_postgres.yml b/docker/test/integration/runner/compose/docker_compose_postgres.yml
index fff4fb1fa42..5657352e1b3 100644
--- a/docker/test/integration/runner/compose/docker_compose_postgres.yml
+++ b/docker/test/integration/runner/compose/docker_compose_postgres.yml
@@ -6,8 +6,8 @@ services:
         environment:
             POSTGRES_PASSWORD: mysecretpassword
         ports:
-          - 5432:5432
+            - 5432:5432
         networks:
-          default:
-            aliases:
-              - postgre-sql.local
+            default:
+                aliases:
+                    - postgre-sql.local
diff --git a/docker/test/integration/runner/compose/docker_compose_redis.yml b/docker/test/integration/runner/compose/docker_compose_redis.yml
index 72df99ec59b..3d834aadaa4 100644
--- a/docker/test/integration/runner/compose/docker_compose_redis.yml
+++ b/docker/test/integration/runner/compose/docker_compose_redis.yml
@@ -4,5 +4,5 @@ services:
         image: redis
         restart: always
         ports:
-          - 6380:6379
+            - 6380:6379
         command: redis-server --requirepass "clickhouse" --databases 32
diff --git a/tests/testflows/aes_encryption/docker-compose/docker-compose.yml b/tests/testflows/aes_encryption/docker-compose/docker-compose.yml
index 04a51ad7ec0..124b53bf502 100644
--- a/tests/testflows/aes_encryption/docker-compose/docker-compose.yml
+++ b/tests/testflows/aes_encryption/docker-compose/docker-compose.yml
@@ -56,7 +56,7 @@ services:
       zookeeper:
         condition: service_healthy
 
-  # dummy service which does nothing, but allows to postpone 
+  # dummy service which does nothing, but allows to postpone
   # 'docker-compose up -d' till all dependecies will go healthy
   all_services_ready:
     image: hello-world
diff --git a/tests/testflows/example/docker-compose/docker-compose.yml b/tests/testflows/example/docker-compose/docker-compose.yml
index e7e57386dc4..4edb415824f 100644
--- a/tests/testflows/example/docker-compose/docker-compose.yml
+++ b/tests/testflows/example/docker-compose/docker-compose.yml
@@ -20,7 +20,7 @@ services:
       zookeeper:
         condition: service_healthy
 
-  # dummy service which does nothing, but allows to postpone 
+  # dummy service which does nothing, but allows to postpone
   # 'docker-compose up -d' till all dependecies will go healthy
   all_services_ready:
     image: hello-world
diff --git a/tests/testflows/ldap/authentication/docker-compose/docker-compose.yml b/tests/testflows/ldap/authentication/docker-compose/docker-compose.yml
index c8ff683df58..36e25ef766e 100644
--- a/tests/testflows/ldap/authentication/docker-compose/docker-compose.yml
+++ b/tests/testflows/ldap/authentication/docker-compose/docker-compose.yml
@@ -135,7 +135,7 @@ services:
       zookeeper:
         condition: service_healthy
 
-  # dummy service which does nothing, but allows to postpone 
+  # dummy service which does nothing, but allows to postpone
   # 'docker-compose up -d' till all dependecies will go healthy
   all_services_ready:
     image: hello-world
diff --git a/tests/testflows/ldap/authentication/docker-compose/openldap-service.yml b/tests/testflows/ldap/authentication/docker-compose/openldap-service.yml
index 139907c513c..e489637b8c9 100644
--- a/tests/testflows/ldap/authentication/docker-compose/openldap-service.yml
+++ b/tests/testflows/ldap/authentication/docker-compose/openldap-service.yml
@@ -28,7 +28,7 @@ services:
     environment:
       PHPLDAPADMIN_HTTPS=false:
     ports:
-      - "8080:80"      
+      - "8080:80"
     healthcheck:
       test: echo 1
       interval: 10s
@@ -37,4 +37,3 @@ services:
       start_period: 300s
     security_opt:
       - label:disable
-
diff --git a/tests/testflows/ldap/external_user_directory/docker-compose/docker-compose.yml b/tests/testflows/ldap/external_user_directory/docker-compose/docker-compose.yml
index c8ff683df58..36e25ef766e 100644
--- a/tests/testflows/ldap/external_user_directory/docker-compose/docker-compose.yml
+++ b/tests/testflows/ldap/external_user_directory/docker-compose/docker-compose.yml
@@ -135,7 +135,7 @@ services:
       zookeeper:
         condition: service_healthy
 
-  # dummy service which does nothing, but allows to postpone 
+  # dummy service which does nothing, but allows to postpone
   # 'docker-compose up -d' till all dependecies will go healthy
   all_services_ready:
     image: hello-world
diff --git a/tests/testflows/ldap/external_user_directory/docker-compose/openldap-service.yml b/tests/testflows/ldap/external_user_directory/docker-compose/openldap-service.yml
index 139907c513c..e489637b8c9 100644
--- a/tests/testflows/ldap/external_user_directory/docker-compose/openldap-service.yml
+++ b/tests/testflows/ldap/external_user_directory/docker-compose/openldap-service.yml
@@ -28,7 +28,7 @@ services:
     environment:
       PHPLDAPADMIN_HTTPS=false:
     ports:
-      - "8080:80"      
+      - "8080:80"
     healthcheck:
       test: echo 1
       interval: 10s
@@ -37,4 +37,3 @@ services:
       start_period: 300s
     security_opt:
       - label:disable
-
diff --git a/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml b/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml
index c8ff683df58..36e25ef766e 100644
--- a/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml
+++ b/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml
@@ -135,7 +135,7 @@ services:
       zookeeper:
         condition: service_healthy
 
-  # dummy service which does nothing, but allows to postpone 
+  # dummy service which does nothing, but allows to postpone
   # 'docker-compose up -d' till all dependecies will go healthy
   all_services_ready:
     image: hello-world
diff --git a/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml b/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml
index 139907c513c..e489637b8c9 100644
--- a/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml
+++ b/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml
@@ -28,7 +28,7 @@ services:
     environment:
       PHPLDAPADMIN_HTTPS=false:
     ports:
-      - "8080:80"      
+      - "8080:80"
     healthcheck:
       test: echo 1
       interval: 10s
@@ -37,4 +37,3 @@ services:
       start_period: 300s
     security_opt:
       - label:disable
-
diff --git a/tests/testflows/rbac/docker-compose/docker-compose.yml b/tests/testflows/rbac/docker-compose/docker-compose.yml
index a3f5144c9ed..29f2ef52470 100755
--- a/tests/testflows/rbac/docker-compose/docker-compose.yml
+++ b/tests/testflows/rbac/docker-compose/docker-compose.yml
@@ -57,4 +57,4 @@ services:
       clickhouse3:
         condition: service_healthy
       zookeeper:
-        condition: service_healthy
\ No newline at end of file
+        condition: service_healthy

From d8b9a7052eb883b40e223a7ecc4aba92a8f2999f Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 21 Feb 2021 06:15:12 +0300
Subject: [PATCH 1213/1238] Drop truthy.check-keys from yamllint (does not
 supported on CI)

https://clickhouse-test-reports.s3.yandex.net/21019/e9226904530afa0262a4c478ddd6d7d7d82293cf/style_check.html#fail1
---
 .github/workflows/anchore-analysis.yml | 2 +-
 .yamllint                              | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/anchore-analysis.yml b/.github/workflows/anchore-analysis.yml
index 105c05f470c..1005c8f6c38 100644
--- a/.github/workflows/anchore-analysis.yml
+++ b/.github/workflows/anchore-analysis.yml
@@ -8,7 +8,7 @@
 
 name: Docker Container Scan (clickhouse-server)
 
-on:
+"on":
   pull_request:
     paths:
       - docker/server/Dockerfile
diff --git a/.yamllint b/.yamllint
index ea04e9981d9..fe161e71849 100644
--- a/.yamllint
+++ b/.yamllint
@@ -13,5 +13,3 @@ rules:
         min-spaces-from-content: 1
     document-start:
         present: false
-    truthy:
-        check-keys: false

From a33183ee1e12e1d4072054f0908732955683a0e6 Mon Sep 17 00:00:00 2001
From: feng lv <fenglv15@mails.ucas.ac.cn>
Date: Sun, 14 Feb 2021 16:07:20 +0000
Subject: [PATCH 1214/1238] fix transform with floating point key

better

update test
---
 src/Functions/transform.cpp                   | 102 +++++++++++-------
 .../01704_transform_with_float_key.reference  |  30 ++++++
 .../01704_transform_with_float_key.sql        |   3 +
 3 files changed, 94 insertions(+), 41 deletions(-)
 create mode 100644 tests/queries/0_stateless/01704_transform_with_float_key.reference
 create mode 100644 tests/queries/0_stateless/01704_transform_with_float_key.sql

diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp
index ab589be9c07..07fbd5a7b96 100644
--- a/src/Functions/transform.cpp
+++ b/src/Functions/transform.cpp
@@ -1,4 +1,6 @@
 #include <mutex>
+#include <ext/bit_cast.h>
+
 #include <Common/FieldVisitors.h>
 #include <DataTypes/DataTypeArray.h>
 #include <Columns/ColumnString.h>
@@ -13,6 +15,7 @@
 #include <Functions/FunctionHelpers.h>
 #include <Functions/FunctionFactory.h>
 #include <DataTypes/getLeastSupertype.h>
+#include <Interpreters/convertFieldToType.h>
 
 
 namespace DB
@@ -491,7 +494,7 @@ private:
         dst.resize(size);
         for (size_t i = 0; i < size; ++i)
         {
-            auto it = table.find(src[i]);
+            const auto * it = table.find(ext::bit_cast<UInt64>(src[i]));
             if (it)
                 memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));    /// little endian.
             else
@@ -507,7 +510,7 @@ private:
         dst.resize(size);
         for (size_t i = 0; i < size; ++i)
         {
-            auto it = table.find(src[i]);
+            const auto * it = table.find(ext::bit_cast<UInt64>(src[i]));
             if (it)
                 memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));    /// little endian.
             else
@@ -523,7 +526,7 @@ private:
         dst.resize(size);
         for (size_t i = 0; i < size; ++i)
         {
-            auto it = table.find(src[i]);
+            const auto * it = table.find(ext::bit_cast<UInt64>(src[i]));
             if (it)
                 memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
             else
@@ -541,7 +544,7 @@ private:
         ColumnString::Offset current_dst_offset = 0;
         for (size_t i = 0; i < size; ++i)
         {
-            auto it = table.find(src[i]);
+            const auto * it = table.find(ext::bit_cast<UInt64>(src[i]));
             StringRef ref = it ? it->getMapped() : dst_default;
             dst_data.resize(current_dst_offset + ref.size);
             memcpy(&dst_data[current_dst_offset], ref.data, ref.size);
@@ -562,7 +565,8 @@ private:
         ColumnString::Offset current_dst_default_offset = 0;
         for (size_t i = 0; i < size; ++i)
         {
-            auto it = table.find(src[i]);
+            Field key = src[i];
+            const auto * it = table.find(key.reinterpret<UInt64>());
             StringRef ref;
 
             if (it)
@@ -778,50 +782,66 @@ private:
 
         /// Note: Doesn't check the duplicates in the `from` array.
 
-        if (from[0].getType() != Field::Types::String && to[0].getType() != Field::Types::String)
+        const IDataType & from_type = *arguments[0].type;
+
+        if (from[0].getType() != Field::Types::String)
         {
-            cache.table_num_to_num = std::make_unique<Cache::NumToNum>();
-            auto & table = *cache.table_num_to_num;
-            for (size_t i = 0; i < size; ++i)
+            if (to[0].getType() != Field::Types::String)
             {
-                // Field may be of Float type, but for the purpose of bitwise
-                // equality we can treat them as UInt64, hence the reinterpret().
-                table[from[i].reinterpret<UInt64>()] = (*used_to)[i].reinterpret<UInt64>();
+                cache.table_num_to_num = std::make_unique<Cache::NumToNum>();
+                auto & table = *cache.table_num_to_num;
+                for (size_t i = 0; i < size; ++i)
+                {
+                    Field key = convertFieldToType(from[i], from_type);
+                    if (key.isNull())
+                        continue;
+
+                    // Field may be of Float type, but for the purpose of bitwise
+                    // equality we can treat them as UInt64, hence the reinterpret().
+                    table[key.reinterpret<UInt64>()] = (*used_to)[i].reinterpret<UInt64>();
+                }
+            }
+            else
+            {
+                cache.table_num_to_string = std::make_unique<Cache::NumToString>();
+                auto & table = *cache.table_num_to_string;
+                for (size_t i = 0; i < size; ++i)
+                {
+                    Field key = convertFieldToType(from[i], from_type);
+                    if (key.isNull())
+                        continue;
+
+                    const String & str_to = to[i].get<const String &>();
+                    StringRef ref{cache.string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
+                    table[key.reinterpret<UInt64>()] = ref;
+                }
             }
         }
-        else if (from[0].getType() != Field::Types::String && to[0].getType() == Field::Types::String)
+        else
         {
-            cache.table_num_to_string = std::make_unique<Cache::NumToString>();
-            auto & table = *cache.table_num_to_string;
-            for (size_t i = 0; i < size; ++i)
+            if (to[0].getType() != Field::Types::String)
             {
-                const String & str_to = to[i].get<const String &>();
-                StringRef ref{cache.string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
-                table[from[i].reinterpret<UInt64>()] = ref;
+                cache.table_string_to_num = std::make_unique<Cache::StringToNum>();
+                auto & table = *cache.table_string_to_num;
+                for (size_t i = 0; i < size; ++i)
+                {
+                    const String & str_from = from[i].get<const String &>();
+                    StringRef ref{cache.string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
+                    table[ref] = (*used_to)[i].reinterpret<UInt64>();
+                }
             }
-        }
-        else if (from[0].getType() == Field::Types::String && to[0].getType() != Field::Types::String)
-        {
-            cache.table_string_to_num = std::make_unique<Cache::StringToNum>();
-            auto & table = *cache.table_string_to_num;
-            for (size_t i = 0; i < size; ++i)
+            else
             {
-                const String & str_from = from[i].get<const String &>();
-                StringRef ref{cache.string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
-                table[ref] = (*used_to)[i].reinterpret<UInt64>();
-            }
-        }
-        else if (from[0].getType() == Field::Types::String && to[0].getType() == Field::Types::String)
-        {
-            cache.table_string_to_string = std::make_unique<Cache::StringToString>();
-            auto & table = *cache.table_string_to_string;
-            for (size_t i = 0; i < size; ++i)
-            {
-                const String & str_from = from[i].get<const String &>();
-                const String & str_to = to[i].get<const String &>();
-                StringRef ref_from{cache.string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
-                StringRef ref_to{cache.string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
-                table[ref_from] = ref_to;
+                cache.table_string_to_string = std::make_unique<Cache::StringToString>();
+                auto & table = *cache.table_string_to_string;
+                for (size_t i = 0; i < size; ++i)
+                {
+                    const String & str_from = from[i].get<const String &>();
+                    const String & str_to = to[i].get<const String &>();
+                    StringRef ref_from{cache.string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
+                    StringRef ref_to{cache.string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
+                    table[ref_from] = ref_to;
+                }
             }
         }
 
diff --git a/tests/queries/0_stateless/01704_transform_with_float_key.reference b/tests/queries/0_stateless/01704_transform_with_float_key.reference
new file mode 100644
index 00000000000..761e15c903c
--- /dev/null
+++ b/tests/queries/0_stateless/01704_transform_with_float_key.reference
@@ -0,0 +1,30 @@
+-
+Hello
+-
+World
+-
+-
+-
+-
+-
+-
+-
+-
+Hello
+-
+World
+-
+-
+-
+-
+-
+-
+-
+Hello
+-
+World
+-
+-
+-
+-
+-
diff --git a/tests/queries/0_stateless/01704_transform_with_float_key.sql b/tests/queries/0_stateless/01704_transform_with_float_key.sql
new file mode 100644
index 00000000000..690c73ee28a
--- /dev/null
+++ b/tests/queries/0_stateless/01704_transform_with_float_key.sql
@@ -0,0 +1,3 @@
+SELECT transform(number / 2, [0.5, 1.5], ['Hello', 'World'], '-') FROM numbers(10);
+SELECT transform(number / 2, [1.0, 2.0], ['Hello', 'World'], '-') FROM numbers(10);
+SELECT transform(number / 2, [1, 2], ['Hello', 'World'], '-') FROM numbers(10);

From b736f4fdeed4cff47ffe59827c68800b15ae5907 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 21 Feb 2021 10:21:59 +0300
Subject: [PATCH 1215/1238] Suppression for PVS-Studio (looks like a false
 positive)

---
 src/IO/WriteHelpers.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h
index 39666936c68..a382ae13cdd 100644
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@@ -893,7 +893,7 @@ inline void writeDateTimeUnixTimestamp(DateTime64 datetime64, UInt32 scale, Writ
     auto components = DecimalUtils::split(datetime64, scale);
     writeIntText(components.whole, buf);
 
-    if (scale > 0)
+    if (scale > 0) //-V547
     {
         buf.write('.');
         writeDecimalTypeFractionalText<DateTime64>(components.fractional, scale, buf);

From b9cb8fdbad9268e35babecd1bf74e645a09a93e8 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 21 Feb 2021 10:45:05 +0300
Subject: [PATCH 1216/1238] Exclude contrib for yamllint check

---
 utils/check-style/check-style | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index 9336bca2474..f8926a9af2f 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -72,7 +72,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' |
 # FIXME: for now only clickhouse-test
 pylint --rcfile=$ROOT_PATH/.pylintrc --score=n $ROOT_PATH/tests/clickhouse-test
 
-find $ROOT_PATH -name '*.yaml' -or -name '*.yml' |
+find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f |
     grep -vP $EXCLUDE_DIRS |
     xargs yamllint --config-file=$ROOT_PATH/.yamllint
 

From 9a264091473bc303fdff39e90d67cb9c52c24b3d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 21 Feb 2021 10:56:48 +0300
Subject: [PATCH 1217/1238] Fix performance tests (by avoid sharding status
 file for right and left server)

Since cp -al (hard links):

    2021.02.21 01:09:09.991771 [ 243 ] {} <Information> StatusFile: Status file right/db/status already exists - unclean restart. Contents:
    PID: 241
    Started at: 2021-02-21 01:09:09
    Revision: 54448

    2021.02.21 01:09:09.992007 [ 243 ] {} <Error> Application: DB::Exception: Cannot lock file right/db/status. Another server instance in same directory is already running.
---
 docker/test/performance-comparison/compare.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 9a0d8093a55..2b19a5e75a8 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -97,6 +97,7 @@ function configure
     rm -r right/db ||:
     rm -r db0/preprocessed_configs ||:
     rm -r db0/{data,metadata}/system ||:
+    rm db0/status ||:
     cp -al db0/ left/db/
     cp -al db0/ right/db/
 }

From 00e6b6232fd5952b84bc318f1f6f1b0a357f92f6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 21 Feb 2021 12:49:50 +0300
Subject: [PATCH 1218/1238] Revert "optimize aggfunc column data copy (#19407)"

This reverts commit 7e3186c826e5e673368a953a76b14c8369414956.
---
 src/Columns/ColumnAggregateFunction.cpp     | 52 ++-------------------
 src/Columns/ColumnAggregateFunction.h       | 15 ------
 src/Common/HashTable/HashTable.h            |  9 +---
 tests/performance/aggfunc_col_data_copy.xml | 24 ----------
 tests/performance/reinterpret_as.xml        | 20 ++++----
 5 files changed, 17 insertions(+), 103 deletions(-)
 delete mode 100644 tests/performance/aggfunc_col_data_copy.xml

diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp
index 9562dc647c9..d0a5e120a07 100644
--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@@ -75,28 +75,8 @@ void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_)
 ColumnAggregateFunction::~ColumnAggregateFunction()
 {
     if (!func->hasTrivialDestructor() && !src)
-    {
-        if (copiedDataInfo.empty())
-        {
-            for (auto * val : data)
-            {
-                func->destroy(val);
-            }
-        }
-        else
-        {
-            size_t pos;
-            for (Map::iterator it = copiedDataInfo.begin(), it_end = copiedDataInfo.end(); it != it_end; ++it)
-            {
-                pos = it->getValue().second;
-                if (data[pos] != nullptr)
-                {
-                    func->destroy(data[pos]);
-                    data[pos] = nullptr;
-                }
-            }
-        }
-    }
+        for (auto * val : data)
+            func->destroy(val);
 }
 
 void ColumnAggregateFunction::addArena(ConstArenaPtr arena_)
@@ -475,37 +455,14 @@ void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
     ///  (only as a whole, see comment above).
     ensureOwnership();
     insertDefault();
-    insertCopyFrom(assert_cast<const ColumnAggregateFunction &>(from).data[n]);
+    insertMergeFrom(from, n);
 }
 
 void ColumnAggregateFunction::insertFrom(ConstAggregateDataPtr place)
 {
     ensureOwnership();
     insertDefault();
-    insertCopyFrom(place);
-}
-
-void ColumnAggregateFunction::insertCopyFrom(ConstAggregateDataPtr place)
-{
-    Map::LookupResult result;
-    result = copiedDataInfo.find(place);
-    if (result == nullptr)
-    {
-        copiedDataInfo[place] = data.size()-1;
-        func->merge(data.back(), place, &createOrGetArena());
-    }
-    else
-    {
-        size_t pos = result->getValue().second;
-        if (pos != data.size() - 1)
-        {
-            data[data.size() - 1] = data[pos];
-        }
-        else /// insert same data to same pos, merge them.
-        {
-            func->merge(data.back(), place, &createOrGetArena());
-        }
-    }
+    insertMergeFrom(place);
 }
 
 void ColumnAggregateFunction::insertMergeFrom(ConstAggregateDataPtr place)
@@ -740,4 +697,5 @@ MutableColumnPtr ColumnAggregateFunction::cloneResized(size_t size) const
         return cloned_col;
     }
 }
+
 }
diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h
index a1aa9e29a39..cd45cf583a0 100644
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@@ -13,8 +13,6 @@
 
 #include <Functions/FunctionHelpers.h>
 
-#include <Common/HashTable/HashMap.h>
-
 namespace DB
 {
 
@@ -84,17 +82,6 @@ private:
     /// Name of the type to distinguish different aggregation states.
     String type_string;
 
-    /// MergedData records, used to avoid duplicated data copy.
-    ///key: src pointer, val:  pos in current column.
-    using Map = HashMap<
-        ConstAggregateDataPtr,
-        size_t,
-        DefaultHash<ConstAggregateDataPtr>,
-        HashTableGrower<3>,
-        HashTableAllocatorWithStackMemory<sizeof(std::pair<ConstAggregateDataPtr, size_t>) * (1 << 3)>>;
-
-    Map copiedDataInfo;
-
     ColumnAggregateFunction() {}
 
     /// Create a new column that has another column as a source.
@@ -153,8 +140,6 @@ public:
 
     void insertFrom(ConstAggregateDataPtr place);
 
-    void insertCopyFrom(ConstAggregateDataPtr place);
-
     /// Merge state at last row with specified state in another column.
     void insertMergeFrom(ConstAggregateDataPtr place);
 
diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h
index 892bd0b2ba9..809d0691049 100644
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@@ -69,16 +69,11 @@ namespace ZeroTraits
 {
 
 template <typename T>
-inline bool check(const T x) { return x == 0; }
+bool check(const T x) { return x == 0; }
 
 template <typename T>
-inline void set(T & x) { x = 0; }
+void set(T & x) { x = 0; }
 
-template <>
-inline bool check(const char * x) { return x == nullptr; }
-
-template <>
-inline void set(const char *& x){ x = nullptr; }
 }
 
 
diff --git a/tests/performance/aggfunc_col_data_copy.xml b/tests/performance/aggfunc_col_data_copy.xml
deleted file mode 100644
index 111f7959d58..00000000000
--- a/tests/performance/aggfunc_col_data_copy.xml
+++ /dev/null
@@ -1,24 +0,0 @@
-<test max_ignored_relative_change="0.2">
-  <create_query>drop table if EXISTS test_bm2;</create_query>  
-  <create_query>drop table if EXISTS test_bm_join2;</create_query>  
-  <create_query>create table test_bm2(
-        dim UInt64,
-        id UInt64)
-        ENGINE = MergeTree()
-        ORDER BY( dim )
-        SETTINGS index_granularity = 8192;
-  </create_query>  
-  <create_query>  
-   create table test_bm_join2(
-     dim UInt64,
-     ids AggregateFunction(groupBitmap, UInt64) ) 
-   ENGINE = MergeTree()
-   ORDER BY(dim)
-   SETTINGS index_granularity = 8192;
-  </create_query>  
-  <fill_query>insert into test_bm2 SELECT 1,number FROM numbers(0, 1000)</fill_query>  
-  <fill_query>insert into test_bm_join2 SELECT 1, bitmapBuild(range(toUInt64(0),toUInt64(11000000)))</fill_query>  
-  <query>select a.dim,bitmapCardinality(b.ids) from test_bm2 a left join test_bm_join2 b using(dim)</query>  
-  <drop_query>drop table if exists test_bm2</drop_query> 
-  <drop_query>drop table if exists test_bm_join2</drop_query> 
-</test>
diff --git a/tests/performance/reinterpret_as.xml b/tests/performance/reinterpret_as.xml
index 17c5fd9da11..b9b6fec2084 100644
--- a/tests/performance/reinterpret_as.xml
+++ b/tests/performance/reinterpret_as.xml
@@ -19,7 +19,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(200000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -38,7 +38,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(200000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -76,7 +76,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(200000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -115,7 +115,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(200000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -134,7 +134,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(200000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -153,7 +153,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(200000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -172,7 +172,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(200000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -191,7 +191,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(200000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -230,7 +230,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(2000000)
+        FROM numbers_mt(20000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -249,7 +249,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>

From 9cb972e9ed740de4f33f55da96518106b121332a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 21 Feb 2021 13:23:18 +0300
Subject: [PATCH 1219/1238] Fix UBSan report in Decimal arithmetic #19432

---
 base/common/arithmeticOverflow.h              | 50 ++++++++++++-------
 .../0_stateless/01732_bigint_ubsan.reference  |  0
 .../0_stateless/01732_bigint_ubsan.sql        | 11 ++++
 3 files changed, 42 insertions(+), 19 deletions(-)
 create mode 100644 tests/queries/0_stateless/01732_bigint_ubsan.reference
 create mode 100644 tests/queries/0_stateless/01732_bigint_ubsan.sql

diff --git a/base/common/arithmeticOverflow.h b/base/common/arithmeticOverflow.h
index fd557fd5b2d..a92fe56b9cb 100644
--- a/base/common/arithmeticOverflow.h
+++ b/base/common/arithmeticOverflow.h
@@ -6,6 +6,25 @@
 
 namespace common
 {
+    /// Multiply and ignore overflow.
+    template <typename T1, typename T2>
+    inline auto NO_SANITIZE_UNDEFINED mulIgnoreOverflow(T1 x, T2 y)
+    {
+        return x * y;
+    }
+
+    template <typename T1, typename T2>
+    inline auto NO_SANITIZE_UNDEFINED addIgnoreOverflow(T1 x, T2 y)
+    {
+        return x + y;
+    }
+
+    template <typename T1, typename T2>
+    inline auto NO_SANITIZE_UNDEFINED subIgnoreOverflow(T1 x, T2 y)
+    {
+        return x - y;
+    }
+
     template <typename T>
     inline bool addOverflow(T x, T y, T & res)
     {
@@ -35,14 +54,14 @@ namespace common
     {
         static constexpr __int128 min_int128 = minInt128();
         static constexpr __int128 max_int128 = maxInt128();
-        res = x + y;
+        res = addIgnoreOverflow(x, y);
         return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y);
     }
 
     template <>
     inline bool addOverflow(wInt256 x, wInt256 y, wInt256 & res)
     {
-        res = x + y;
+        res = addIgnoreOverflow(x, y);
         return (y > 0 && x > std::numeric_limits<wInt256>::max() - y) ||
             (y < 0 && x < std::numeric_limits<wInt256>::min() - y);
     }
@@ -50,7 +69,7 @@ namespace common
     template <>
     inline bool addOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
     {
-        res = x + y;
+        res = addIgnoreOverflow(x, y);
         return x > std::numeric_limits<wUInt256>::max() - y;
     }
 
@@ -83,14 +102,14 @@ namespace common
     {
         static constexpr __int128 min_int128 = minInt128();
         static constexpr __int128 max_int128 = maxInt128();
-        res = x - y;
+        res = subIgnoreOverflow(x, y);
         return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y);
     }
 
     template <>
     inline bool subOverflow(wInt256 x, wInt256 y, wInt256 & res)
     {
-        res = x - y;
+        res = subIgnoreOverflow(x, y);
         return (y < 0 && x > std::numeric_limits<wInt256>::max() + y) ||
             (y > 0 && x < std::numeric_limits<wInt256>::min() + y);
     }
@@ -98,7 +117,7 @@ namespace common
     template <>
     inline bool subOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
     {
-        res = x - y;
+        res = subIgnoreOverflow(x, y);
         return x < y;
     }
 
@@ -129,40 +148,33 @@ namespace common
     template <>
     inline bool mulOverflow(__int128 x, __int128 y, __int128 & res)
     {
-        res = static_cast<unsigned __int128>(x) * static_cast<unsigned __int128>(y);    /// Avoid signed integer overflow.
+        res = mulIgnoreOverflow(x, y);
         if (!x || !y)
             return false;
 
         unsigned __int128 a = (x > 0) ? x : -x;
         unsigned __int128 b = (y > 0) ? y : -y;
-        return (a * b) / b != a;
+        return mulIgnoreOverflow(a, b) / b != a;
     }
 
     template <>
     inline bool mulOverflow(wInt256 x, wInt256 y, wInt256 & res)
     {
-        res = x * y;
+        res = mulIgnoreOverflow(x, y);
         if (!x || !y)
             return false;
 
         wInt256 a = (x > 0) ? x : -x;
         wInt256 b = (y > 0) ? y : -y;
-        return (a * b) / b != a;
+        return mulIgnoreOverflow(a, b) / b != a;
     }
 
     template <>
     inline bool mulOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
     {
-        res = x * y;
+        res = mulIgnoreOverflow(x, y);
         if (!x || !y)
             return false;
-        return (x * y) / y != x;
-    }
-
-    /// Multiply and ignore overflow.
-    template <typename T1, typename T2>
-    inline auto NO_SANITIZE_UNDEFINED mulIgnoreOverflow(T1 x, T2 y)
-    {
-        return x * y;
+        return res / y != x;
     }
 }
diff --git a/tests/queries/0_stateless/01732_bigint_ubsan.reference b/tests/queries/0_stateless/01732_bigint_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01732_bigint_ubsan.sql b/tests/queries/0_stateless/01732_bigint_ubsan.sql
new file mode 100644
index 00000000000..238a5d99d30
--- /dev/null
+++ b/tests/queries/0_stateless/01732_bigint_ubsan.sql
@@ -0,0 +1,11 @@
+CREATE TEMPORARY TABLE decimal
+(
+    f dec(38, 38)
+);
+
+INSERT INTO decimal VALUES (0);
+INSERT INTO decimal VALUES (0.42);
+INSERT INTO decimal VALUES (-0.42);
+
+SELECT f + 1048575, f - 21, f - 84, f * 21, f * -21, f / 21, f / 84 FROM decimal WHERE f > 0; -- { serverError 407 }
+SELECT f + -2, f - 21, f - 84, f * 21, f * -21, f / 9223372036854775807, f / 84 FROM decimal WHERE f > 0; -- { serverError 407 }

From af2f4ceb0e267041aed3f28a9a2e4de39a40c716 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 21 Feb 2021 15:34:04 +0300
Subject: [PATCH 1220/1238] Add disambiguation while parsing already scaled
 DateTime64 #13194

---
 src/IO/ReadHelpers.cpp | 17 ++++++++++-------
 src/IO/ReadHelpers.h   | 18 ++++++++++++------
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index fe563021d2e..72ffd74a42d 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -831,14 +831,18 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
     static constexpr auto date_time_broken_down_length = 19;
     /// YYYY-MM-DD
     static constexpr auto date_broken_down_length = 10;
-    /// unix timestamp max length
-    static constexpr auto unix_timestamp_max_length = 10;
 
     char s[date_time_broken_down_length];
     char * s_pos = s;
 
-    /// A piece similar to unix timestamp.
-    while (s_pos < s + unix_timestamp_max_length && !buf.eof() && isNumericASCII(*buf.position()))
+    /** Read characters, that could represent unix timestamp.
+      * Only unix timestamp of at least 5 characters is supported.
+      * Then look at 5th character. If it is a number - treat whole as unix timestamp.
+      * If it is not a number - then parse datetime in YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format.
+      */
+
+    /// A piece similar to unix timestamp, maybe scaled to subsecond precision.
+    while (s_pos < s + date_time_broken_down_length && !buf.eof() && isNumericASCII(*buf.position()))
     {
         *s_pos = *buf.position();
         ++s_pos;
@@ -846,7 +850,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
     }
 
     /// 2015-01-01 01:02:03 or 2015-01-01
-    if (s_pos == s + 4 && !buf.eof() && (*buf.position() < '0' || *buf.position() > '9'))
+    if (s_pos == s + 4 && !buf.eof() && !isNumericASCII(*buf.position()))
     {
         const auto already_read_length = s_pos - s;
         const size_t remaining_date_time_size = date_time_broken_down_length - already_read_length;
@@ -885,8 +889,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
     }
     else
     {
-        /// Only unix timestamp of 5-10 characters is supported. For consistency. See readDateTimeTextImpl.
-        if (s_pos - s >= 5 && s_pos - s <= 10)
+        if (s_pos - s >= 5)
         {
             /// Not very efficient.
             datetime = 0;
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index d203bd7bbee..e33de04f322 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -703,12 +703,6 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
 template <typename ReturnType = void>
 inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut)
 {
-    /** Read 10 characters, that could represent unix timestamp.
-      * Only unix timestamp of 5-10 characters is supported.
-      * Then look at 5th character. If it is a number - treat whole as unix timestamp.
-      * If it is not a number - then parse datetime in YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format.
-      */
-
     /// Optimistic path, when whole value is in buffer.
     const char * s = buf.position();
 
@@ -779,6 +773,18 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re
         while (!buf.eof() && isNumericASCII(*buf.position()))
             ++buf.position();
     }
+    else if (scale && (whole >= 1000000000LL * scale))
+    {
+        /// Unix timestamp with subsecond precision, already scaled to integer.
+        /// For disambiguation we support only time since 2001-09-09 01:46:40 UTC and less than 30 000 years in future.
+
+        for (size_t i = 0; i < scale; ++i)
+        {
+            components.fractional *= 10;
+            components.fractional += components.whole % 10;
+            components.whole /= 10;
+        }
+    }
 
     datetime64 = DecimalUtils::decimalFromComponents<DateTime64>(components, scale);
 

From 4085782b05657f8f7ad670dd563ca2d75a63bce5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 21 Feb 2021 15:34:48 +0300
Subject: [PATCH 1221/1238] More gentle exception messages

---
 src/Functions/FunctionsConversion.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index b95d4ea9790..74d8d853dcb 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -1313,7 +1313,7 @@ public:
             else if constexpr (std::is_same_v<Name, NameToDecimal256>)
                 return createDecimalMaxPrecision<Decimal256>(scale);
 
-            throw Exception("Something wrong with toDecimalNN()", ErrorCodes::LOGICAL_ERROR);
+            throw Exception("Unexpected branch in code of conversion function: it is a bug.", ErrorCodes::LOGICAL_ERROR);
         }
         else
         {
@@ -1337,7 +1337,7 @@ public:
             if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
                 return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0));
             else if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
-                throw Exception("LOGICAL ERROR: It is a bug.", ErrorCodes::LOGICAL_ERROR);
+                throw Exception("Unexpected branch in code of conversion function: it is a bug.", ErrorCodes::LOGICAL_ERROR);
             else
                 return std::make_shared<ToDataType>();
         }

From 75edfd0549e80223371a87807b90a7006f02b7f3 Mon Sep 17 00:00:00 2001
From: Michael Monashev <mmb@ya.ru>
Date: Sun, 21 Feb 2021 15:56:47 +0300
Subject: [PATCH 1222/1238] Update lowcardinality.md

---
 docs/ru/sql-reference/data-types/lowcardinality.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/data-types/lowcardinality.md b/docs/ru/sql-reference/data-types/lowcardinality.md
index d94cedd29ce..1e83cdbc58c 100644
--- a/docs/ru/sql-reference/data-types/lowcardinality.md
+++ b/docs/ru/sql-reference/data-types/lowcardinality.md
@@ -23,7 +23,7 @@ LowCardinality(data_type)
 
 Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
 
-При работе со строками, использование `LowCardinality` вместо [Enum](enum.md). `LowCardinality` обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
+При работе со строками, используйте `LowCardinality` вместо [Enum](enum.md). `LowCardinality` обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
 
 ## Пример
 

From f93b450da9ea6f80f7cb50e6bf374eda2d73498a Mon Sep 17 00:00:00 2001
From: Michael Monashev <mmb@ya.ru>
Date: Sun, 21 Feb 2021 16:00:52 +0300
Subject: [PATCH 1223/1238] Update lowcardinality.md

---
 docs/ru/sql-reference/data-types/lowcardinality.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/data-types/lowcardinality.md b/docs/ru/sql-reference/data-types/lowcardinality.md
index 1e83cdbc58c..3b5b337d731 100644
--- a/docs/ru/sql-reference/data-types/lowcardinality.md
+++ b/docs/ru/sql-reference/data-types/lowcardinality.md
@@ -23,7 +23,7 @@ LowCardinality(data_type)
 
 Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
 
-При работе со строками, используйте `LowCardinality` вместо [Enum](enum.md). `LowCardinality` обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
+При работе со строками используйте `LowCardinality` вместо [Enum](enum.md). `LowCardinality` обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
 
 ## Пример
 

From 46f67fa8ac9ffb506efb089359dea17c33fde894 Mon Sep 17 00:00:00 2001
From: Vitaliy Zakaznikov <vzakaznikov@protonmail.com>
Date: Sun, 21 Feb 2021 09:45:20 -0500
Subject: [PATCH 1224/1238] Adding examples to live view docs.

---
 .../sql-reference/statements/create/view.md   | 95 ++++++++++++++++++-
 docs/en/sql-reference/statements/watch.md     | 54 ++++++++++-
 2 files changed, 146 insertions(+), 3 deletions(-)

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 662a4b54754..7a1200b2d94 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -41,7 +41,6 @@ SELECT a, b, c FROM (SELECT ...)
 CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
 ```
 
-
 Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query.
 
 When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` – the table engine for storing data.
@@ -95,12 +94,63 @@ You can watch for changes in the live view query result using the [WATCH](../../
 WATCH [db.]live_view
 ```
 
+**Example:**
+
+```sql
+CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
+CREATE LIVE VIEW lv AS SELECT sum(x) FROM mt;
+```
+
+Watch a live view while doing a parallel insert into the source table.
+
+```sql
+WATCH lv
+```
+
+```bash
+┌─sum(x)─┬─_version─┐
+│      1 │        1 │
+└────────┴──────────┘
+┌─sum(x)─┬─_version─┐
+│      2 │        2 │
+└────────┴──────────┘
+┌─sum(x)─┬─_version─┐
+│      6 │        3 │
+└────────┴──────────┘
+...
+```
+
+```sql
+INSERT INTO mt VALUES (1);
+INSERT INTO mt VALUES (2);
+INSERT INTO mt VALUES (3);
+```
+
 or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause to just get change events.
 
 ```sql
 WATCH [db.]live_view EVENTS
 ```
 
+**Example:**
+
+```sql
+WATCH lv EVENTS
+```
+
+```bash
+┌─version─┐
+│       1 │
+└─────────┘
+┌─version─┐
+│       2 │
+└─────────┘
+┌─version─┐
+│       3 │
+└─────────┘
+...
+```
+
 You can execute [SELECT](../../../sql-reference/statements/select/index.md) query on a live view in the same way as for any regular view or a table. If the query result is cached it will return the result immediately without running the stored query on the underlying tables.
 
 ```sql
@@ -121,6 +171,13 @@ CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
 
 If the timeout value is not specified then the value specified by the `temporary_live_view_timeout` setting is used.
 
+**Example:**
+
+```sql
+CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
+CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
+```
+
 ### With Refresh {#live-view-with-refresh}
 
 When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger.
@@ -131,11 +188,47 @@ CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ...
 
 If the refresh value is not specified then the value specified by the `periodic_live_view_refresh` setting is used.
 
+**Example:**
+
+```sql
+CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
+WATCH lv
+```
+
+```bash
+┌───────────────now()─┬─_version─┐
+│ 2021-02-21 08:47:05 │        1 │
+└─────────────────────┴──────────┘
+┌───────────────now()─┬─_version─┐
+│ 2021-02-21 08:47:10 │        2 │
+└─────────────────────┴──────────┘
+┌───────────────now()─┬─_version─┐
+│ 2021-02-21 08:47:15 │        3 │
+└─────────────────────┴──────────┘
+```
+
 You can combine `WITH TIMEOUT` and `WITH REFRESH` clauses using an `AND` clause. 
 
 ```sql
 CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
 ```
+
+**Example:**
+
+```sql
+CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now();
+```
+
+After 15 sec the live view will be automatically dropped if there are no active `WATCH` queries.
+
+```sql
+WATCH lv
+```
+
+```
+Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table default.lv doesn't exist.. 
+```
+
 ### Usage
 
 Most common uses of live view tables include:
diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md
index 07b050d4c4e..761bc8a041e 100644
--- a/docs/en/sql-reference/statements/watch.md
+++ b/docs/en/sql-reference/statements/watch.md
@@ -25,6 +25,26 @@ WATCH [db.]live_view
 
 The virtual `_version` column in the query result indicates the current result version.
 
+**Example:**
+
+```sql
+CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
+WATCH lv
+```
+
+```bash
+┌───────────────now()─┬─_version─┐
+│ 2021-02-21 09:17:21 │        1 │
+└─────────────────────┴──────────┘
+┌───────────────now()─┬─_version─┐
+│ 2021-02-21 09:17:26 │        2 │
+└─────────────────────┴──────────┘
+┌───────────────now()─┬─_version─┐
+│ 2021-02-21 09:17:31 │        3 │
+└─────────────────────┴──────────┘
+...
+```
+
 By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../sql-reference/statements/insert-into.md) it can be forwarded to a different table.
 
 ```sql
@@ -36,7 +56,24 @@ INSERT INTO [db.]table WATCH [db.]live_view ...
 The `EVENTS` clause can be used to obtain a short form of the `WATCH` query where instead of the query result you will just get the latest query result version.
 
 ```sql
-WATCH [db.]live_view EVENTS LIMIT 1
+WATCH [db.]live_view EVENTS
+```
+
+**Example:**
+
+```sql
+CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
+WATCH lv EVENTS
+```
+
+```bash
+┌─version─┐
+│       1 │
+└─────────┘
+┌─version─┐
+│       2 │
+└─────────┘
+...
 ```
 
 ## LIMIT Clause {#limit-clause}
@@ -44,7 +81,20 @@ WATCH [db.]live_view EVENTS LIMIT 1
 The `LIMIT n` clause species the number of updates the `WATCH` query should wait for before terminating. By default there is no limit on the number of updates and therefore the query will not terminate. The value of `0` indicates that the `WATCH` query should not wait for any new query results and therefore will return immediately once query is evaluated.
 
 ```sql
-WATCH [db.]live_view LIMIT 2
+WATCH [db.]live_view LIMIT 1
+```
+
+**Example:**
+
+```sql
+CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
+WATCH lv EVENTS LIMIT 1
+```
+
+```bash
+┌─version─┐
+│       1 │
+└─────────┘
 ```
 
 ## FORMAT Clause {#format-clause}

From 11dd0a4ba5b11d00a867d2a22df2803d96fc2d8e Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 21 Feb 2021 21:01:41 +0300
Subject: [PATCH 1225/1238] Trigger CI


From d2a9e5842a0a6258ac04f22f88a45770f767371c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 21 Feb 2021 11:04:13 +0300
Subject: [PATCH 1226/1238] Fix wrong formatting of overflowed DateTime64

---
 base/common/DateLUTImpl.h                     | 36 +++++++++----------
 ..._toDateTime_from_string_clamping.reference |  9 +++++
 .../01702_toDateTime_from_string_clamping.sql |  5 +++
 3 files changed, 31 insertions(+), 19 deletions(-)
 create mode 100644 tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference
 create mode 100644 tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql

diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h
index 0538d3a9ab8..064787fb64e 100644
--- a/base/common/DateLUTImpl.h
+++ b/base/common/DateLUTImpl.h
@@ -7,6 +7,7 @@
 #include <ctime>
 #include <string>
 
+
 #define DATE_LUT_MAX (0xFFFFFFFFU - 86400)
 #define DATE_LUT_MAX_DAY_NUM (0xFFFFFFFFU / 86400)
 /// Table size is bigger than DATE_LUT_MAX_DAY_NUM to fill all indices within UInt16 range: this allows to remove extra check.
@@ -249,7 +250,7 @@ public:
     {
         DayNum index = findIndex(t);
 
-        if (unlikely(index == 0))
+        if (unlikely(index == 0 || index > DATE_LUT_MAX_DAY_NUM))
             return t + offset_at_start_of_epoch;
 
         time_t res = t - lut[index].date;
@@ -264,18 +265,18 @@ public:
     {
         DayNum index = findIndex(t);
 
-        /// If it is not 1970 year (findIndex found nothing appropriate),
-        ///  than limit number of hours to avoid insane results like 1970-01-01 89:28:15
-        if (unlikely(index == 0))
+        /// If it is overflow case,
+        ///  then limit number of hours to avoid insane results like 1970-01-01 89:28:15
+        if (unlikely(index == 0 || index > DATE_LUT_MAX_DAY_NUM))
             return static_cast<unsigned>((t + offset_at_start_of_epoch) / 3600) % 24;
 
-        time_t res = t - lut[index].date;
+        time_t time = t - lut[index].date;
 
-        /// Data is cleaned to avoid possibility of underflow.
-        if (res >= lut[index].time_at_offset_change)
-            res += lut[index].amount_of_offset_change;
+        if (time >= lut[index].time_at_offset_change)
+            time += lut[index].amount_of_offset_change;
 
-        return res / 3600;
+        unsigned res = time / 3600;
+        return res <= 23 ? res : 0;
     }
 
     /** Calculating offset from UTC in seconds.
@@ -314,12 +315,12 @@ public:
       *  each minute, with added or subtracted leap second, spans exactly 60 unix timestamps.
       */
 
-    inline unsigned toSecond(time_t t) const { return t % 60; }
+    inline unsigned toSecond(time_t t) const { return UInt32(t) % 60; }
 
     inline unsigned toMinute(time_t t) const
     {
         if (offset_is_whole_number_of_hours_everytime)
-            return (t / 60) % 60;
+            return (UInt32(t) / 60) % 60;
 
         UInt32 date = find(t).date;
         return (UInt32(t) - date) / 60 % 60;
@@ -555,9 +556,7 @@ public:
         }
     }
 
-    /*
-     * check and change mode to effective
-     */
+    /// Check and change mode to effective.
     inline UInt8 check_week_mode(UInt8 mode) const
     {
         UInt8 week_format = (mode & 7);
@@ -566,10 +565,9 @@ public:
         return week_format;
     }
 
-    /*
-     * Calc weekday from d
-     * Returns 0 for monday, 1 for tuesday ...
-     */
+    /** Calculate weekday from d.
+      * Returns 0 for monday, 1 for tuesday...
+      */
     inline unsigned calc_weekday(DayNum d, bool sunday_first_day_of_week) const
     {
         if (!sunday_first_day_of_week)
@@ -578,7 +576,7 @@ public:
             return toDayOfWeek(DayNum(d + 1)) - 1;
     }
 
-    /* Calc days in one year. */
+    /// Calculate days in one year.
     inline unsigned calc_days_in_year(UInt16 year) const
     {
         return ((year & 3) == 0 && (year % 100 || (year % 400 == 0 && year)) ? 366 : 365);
diff --git a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference
new file mode 100644
index 00000000000..228086615da
--- /dev/null
+++ b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference
@@ -0,0 +1,9 @@
+-- { echo }
+SELECT toString(toDateTime('-922337203.6854775808', 1));
+2106-02-07 15:41:33.6
+SELECT toString(toDateTime('9922337203.6854775808', 1));
+2104-12-30 00:50:11.6
+SELECT toDateTime64(CAST('10000000000.1' AS Decimal64(1)), 1);
+2106-02-07 20:50:08.1
+SELECT toDateTime64(CAST('-10000000000.1' AS Decimal64(1)), 1);
+2011-12-23 00:38:08.1
diff --git a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql
new file mode 100644
index 00000000000..5c75da9bf30
--- /dev/null
+++ b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql
@@ -0,0 +1,5 @@
+-- { echo }
+SELECT toString(toDateTime('-922337203.6854775808', 1));
+SELECT toString(toDateTime('9922337203.6854775808', 1));
+SELECT toDateTime64(CAST('10000000000.1' AS Decimal64(1)), 1);
+SELECT toDateTime64(CAST('-10000000000.1' AS Decimal64(1)), 1);
\ No newline at end of file

From adfac060dc51fe65c50c01bec9bbdf5545f20204 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 21 Feb 2021 11:06:49 +0300
Subject: [PATCH 1227/1238] Update 01702_toDateTime_from_string_clamping.sql

---
 .../0_stateless/01702_toDateTime_from_string_clamping.sql       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql
index 5c75da9bf30..d1f0416149a 100644
--- a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql
+++ b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql
@@ -2,4 +2,4 @@
 SELECT toString(toDateTime('-922337203.6854775808', 1));
 SELECT toString(toDateTime('9922337203.6854775808', 1));
 SELECT toDateTime64(CAST('10000000000.1' AS Decimal64(1)), 1);
-SELECT toDateTime64(CAST('-10000000000.1' AS Decimal64(1)), 1);
\ No newline at end of file
+SELECT toDateTime64(CAST('-10000000000.1' AS Decimal64(1)), 1);

From 7dd49b69ea3dd72f4c3c904506f649451a66285d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 21 Feb 2021 22:36:39 +0300
Subject: [PATCH 1228/1238] Add test for #19376

---
 tests/queries/0_stateless/01733_transform_ubsan.reference | 0
 tests/queries/0_stateless/01733_transform_ubsan.sql       | 4 ++++
 2 files changed, 4 insertions(+)
 create mode 100644 tests/queries/0_stateless/01733_transform_ubsan.reference
 create mode 100644 tests/queries/0_stateless/01733_transform_ubsan.sql

diff --git a/tests/queries/0_stateless/01733_transform_ubsan.reference b/tests/queries/0_stateless/01733_transform_ubsan.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01733_transform_ubsan.sql b/tests/queries/0_stateless/01733_transform_ubsan.sql
new file mode 100644
index 00000000000..256603e9087
--- /dev/null
+++ b/tests/queries/0_stateless/01733_transform_ubsan.sql
@@ -0,0 +1,4 @@
+SELECT arrayStringConcat(arrayMap(x -> transform(x, [1025, -9223372036854775808, 65537, 257, 1048576, 10, 7, 1048575, 65536], ['yandex', 'googlegooglegooglegoogle', 'test', '', '', 'hello', 'world', '', 'xyz'], ''), arrayMap(x -> (x % -inf), range(number))), '')
+FROM system.numbers
+LIMIT 1025
+FORMAT Null;

From 521537eed8819201a11b3ca3bf21112f4ab91da4 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 21 Feb 2021 21:09:51 +0300
Subject: [PATCH 1229/1238] Fix DateTime64 from Float

---
 src/Functions/FunctionsConversion.h           | 23 +++++++++++++++++--
 .../01734_datetime64_from_float.reference     |  7 ++++++
 .../01734_datetime64_from_float.sql           |  4 ++++
 3 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/01734_datetime64_from_float.reference
 create mode 100644 tests/queries/0_stateless/01734_datetime64_from_float.sql

diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index 74d8d853dcb..2e2a4ce9cfa 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -516,6 +516,25 @@ struct ToDateTime64TransformSigned
         return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(from, 0, scale_multiplier);
     }
 };
+template <typename FromDataType, typename FromType>
+struct ToDateTime64TransformFloat
+{
+    static constexpr auto name = "toDateTime64";
+
+    const UInt32 scale = 1;
+
+    ToDateTime64TransformFloat(UInt32 scale_ = 0)
+        : scale(scale_)
+    {}
+
+    inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
+    {
+        if (from < 0)
+            return 0;
+        from = std::min<FromType>(from, FromType(0xFFFFFFFF));
+        return convertToDecimal<FromDataType, DataTypeDateTime64>(from, scale);
+    }
+};
 
 template <typename Name> struct ConvertImpl<DataTypeInt8, DataTypeDateTime64, Name>
     : DateTimeTransformImpl<DataTypeInt8, DataTypeDateTime64, ToDateTime64TransformSigned<Int8>> {};
@@ -528,9 +547,9 @@ template <typename Name> struct ConvertImpl<DataTypeInt64, DataTypeDateTime64, N
 template <typename Name> struct ConvertImpl<DataTypeUInt64, DataTypeDateTime64, Name>
     : DateTimeTransformImpl<DataTypeUInt64, DataTypeDateTime64, ToDateTime64TransformUnsigned<UInt64>> {};
 template <typename Name> struct ConvertImpl<DataTypeFloat32, DataTypeDateTime64, Name>
-    : DateTimeTransformImpl<DataTypeFloat32, DataTypeDateTime64, ToDateTime64TransformSigned<Float32>> {};
+    : DateTimeTransformImpl<DataTypeFloat32, DataTypeDateTime64, ToDateTime64TransformFloat<DataTypeFloat32, Float32>> {};
 template <typename Name> struct ConvertImpl<DataTypeFloat64, DataTypeDateTime64, Name>
-    : DateTimeTransformImpl<DataTypeFloat64, DataTypeDateTime64, ToDateTime64TransformSigned<Float64>> {};
+    : DateTimeTransformImpl<DataTypeFloat64, DataTypeDateTime64, ToDateTime64TransformFloat<DataTypeFloat64, Float64>> {};
 
 /** Conversion of DateTime64 to Date or DateTime: discards fractional part.
  */
diff --git a/tests/queries/0_stateless/01734_datetime64_from_float.reference b/tests/queries/0_stateless/01734_datetime64_from_float.reference
new file mode 100644
index 00000000000..32e7d2736c6
--- /dev/null
+++ b/tests/queries/0_stateless/01734_datetime64_from_float.reference
@@ -0,0 +1,7 @@
+-- { echo }
+SELECT CAST(1111111111.222 AS DateTime64(3));
+2005-03-18 04:58:31.222
+SELECT toDateTime(1111111111.222, 3);
+2005-03-18 04:58:31.222
+SELECT toDateTime64(1111111111.222, 3);
+2005-03-18 04:58:31.222
diff --git a/tests/queries/0_stateless/01734_datetime64_from_float.sql b/tests/queries/0_stateless/01734_datetime64_from_float.sql
new file mode 100644
index 00000000000..b6be65cb7c2
--- /dev/null
+++ b/tests/queries/0_stateless/01734_datetime64_from_float.sql
@@ -0,0 +1,4 @@
+-- { echo }
+SELECT CAST(1111111111.222 AS DateTime64(3));
+SELECT toDateTime(1111111111.222, 3);
+SELECT toDateTime64(1111111111.222, 3);

From d220fe1a4ed9ecd8e502973f8d9b0399753c7d74 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 21 Feb 2021 22:33:18 +0300
Subject: [PATCH 1230/1238] Add cases with floats into 01691_DateTime64_clamp

---
 .../queries/0_stateless/01691_DateTime64_clamp.reference  | 8 ++++++++
 tests/queries/0_stateless/01691_DateTime64_clamp.sql      | 4 ++++
 2 files changed, 12 insertions(+)

diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.reference b/tests/queries/0_stateless/01691_DateTime64_clamp.reference
index de72027334c..849f8139640 100644
--- a/tests/queries/0_stateless/01691_DateTime64_clamp.reference
+++ b/tests/queries/0_stateless/01691_DateTime64_clamp.reference
@@ -9,3 +9,11 @@ SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64);
 2020-01-01 00:00:00.300
 SELECT toDateTime64(bitShiftLeft(toUInt64(1),33), 2);
 2106-02-07 09:28:15.00
+SELECT toDateTime(-2., 2);
+1970-01-01 03:00:00.00
+SELECT toDateTime64(-2., 2);
+1970-01-01 03:00:00.00
+SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2);
+2106-02-07 09:28:16.00
+SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2);
+2106-02-07 09:28:15.00
diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.sql b/tests/queries/0_stateless/01691_DateTime64_clamp.sql
index 6b5a4815f37..f02d45a2cff 100644
--- a/tests/queries/0_stateless/01691_DateTime64_clamp.sql
+++ b/tests/queries/0_stateless/01691_DateTime64_clamp.sql
@@ -4,3 +4,7 @@ SELECT toDateTime64(-2, 2);
 SELECT CAST(-1 AS DateTime64);
 SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64);
 SELECT toDateTime64(bitShiftLeft(toUInt64(1),33), 2);
+SELECT toDateTime(-2., 2);
+SELECT toDateTime64(-2., 2);
+SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2);
+SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2);

From 0ab14120ef311ec7ff614b08a25268fb078cc7e5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 21 Feb 2021 23:06:31 +0300
Subject: [PATCH 1231/1238] Improve performance of trivial count query in
 presense of "distributed_aggregation_memory_efficient"

---
 src/Interpreters/InterpreterSelectQuery.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 3008c55973d..da6ad7ab102 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1285,8 +1285,11 @@ void InterpreterSelectQuery::executeFetchColumns(
         const auto & desc = query_analyzer->aggregates()[0];
         const auto & func = desc.function;
         std::optional<UInt64> num_rows{};
+
         if (!query.prewhere() && !query.where())
+        {
             num_rows = storage->totalRows(settings);
+        }
         else // It's possible to optimize count() given only partition predicates
         {
             SelectQueryInfo temp_query_info;
@@ -1296,6 +1299,7 @@ void InterpreterSelectQuery::executeFetchColumns(
 
             num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, *context);
         }
+
         if (num_rows)
         {
             AggregateFunctionCount & agg_count = static_cast<AggregateFunctionCount &>(*func);
@@ -1790,7 +1794,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
     auto merging_aggregated = std::make_unique<MergingAggregatedStep>(
             query_plan.getCurrentDataStream(),
             std::move(transform_params),
-            settings.distributed_aggregation_memory_efficient,
+            settings.distributed_aggregation_memory_efficient && storage && storage->isRemote(),
             settings.max_threads,
             settings.aggregation_memory_efficient_merge_threads);
 

From d7f017c4ddfabaf0e0ba972491ba1495a17e445c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 21 Feb 2021 23:06:31 +0300
Subject: [PATCH 1232/1238] Improve performance of trivial count query in
 presense of "distributed_aggregation_memory_efficient"

---
 src/Interpreters/InterpreterSelectQuery.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 9f97160f77f..370e7224542 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1269,8 +1269,11 @@ void InterpreterSelectQuery::executeFetchColumns(
         const auto & desc = query_analyzer->aggregates()[0];
         const auto & func = desc.function;
         std::optional<UInt64> num_rows{};
+
         if (!query.prewhere() && !query.where())
+        {
             num_rows = storage->totalRows(settings);
+        }
         else // It's possible to optimize count() given only partition predicates
         {
             SelectQueryInfo temp_query_info;
@@ -1280,6 +1283,7 @@ void InterpreterSelectQuery::executeFetchColumns(
 
             num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, *context);
         }
+
         if (num_rows)
         {
             AggregateFunctionCount & agg_count = static_cast<AggregateFunctionCount &>(*func);
@@ -1774,7 +1778,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
     auto merging_aggregated = std::make_unique<MergingAggregatedStep>(
             query_plan.getCurrentDataStream(),
             std::move(transform_params),
-            settings.distributed_aggregation_memory_efficient,
+            settings.distributed_aggregation_memory_efficient && storage && storage->isRemote(),
             settings.max_threads,
             settings.aggregation_memory_efficient_merge_threads);
 

From 2a8e21773cea7aea2e5fca99b02f69438f500373 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 21 Feb 2021 23:18:52 +0300
Subject: [PATCH 1233/1238] Add skip list for parallel runs

---
 tests/queries/skip_list.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index b3f4a8d009e..76b219799c5 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -747,6 +747,7 @@
         "memory_leak",
         "memory_limit",
         "polygon_dicts", // they use an explicitly specified database
-        "01658_read_file_to_stringcolumn"
+        "01658_read_file_to_stringcolumn",
+        "01721_engine_file_truncate_on_insert" // It's ok to execute in parallel but not several instances of the same test.
     ]
 }

From 29730425d6bf052ba40373f428708f7ae89faf2a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 21 Feb 2021 23:34:38 +0300
Subject: [PATCH 1234/1238] Add a test

---
 ...01732_more_consistent_datetime64_parsing.reference |  8 ++++++++
 .../01732_more_consistent_datetime64_parsing.sql      | 11 +++++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.reference
 create mode 100644 tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql

diff --git a/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.reference b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.reference
new file mode 100644
index 00000000000..4f3181ecce0
--- /dev/null
+++ b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.reference
@@ -0,0 +1,8 @@
+1	2005-03-18 01:58:31.222
+2	2005-03-18 01:58:31.222
+3	2005-03-18 01:58:31.222
+4	2005-03-18 01:58:31.222
+2005-03-18 04:58:31.222
+2005-03-18 04:58:31.222
+2005-03-18 04:58:31.222
+0
diff --git a/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql
new file mode 100644
index 00000000000..dcd874f8c45
--- /dev/null
+++ b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql
@@ -0,0 +1,11 @@
+CREATE TEMPORARY TABLE t (i UInt8, x DateTime64(3, 'UTC'));
+INSERT INTO t VALUES (1, 1111111111222);
+INSERT INTO t VALUES (2, 1111111111.222);
+INSERT INTO t VALUES (3, '1111111111222');
+INSERT INTO t VALUES (4, '1111111111.222');
+SELECT * FROM t ORDER BY i;
+
+SELECT toDateTime64(1111111111.222, 3);
+SELECT toDateTime64('1111111111.222', 3);
+SELECT toDateTime64('1111111111222', 3);
+SELECT ignore(toDateTime64(1111111111222, 3)); -- This gives somewhat correct but unexpected result

From 2159344628ab2dc34451cc972377cc00b0b7b514 Mon Sep 17 00:00:00 2001
From: idfer <idfer0912@gmail.com>
Date: Mon, 22 Feb 2021 11:17:05 +0800
Subject: [PATCH 1235/1238] Fixed words typo in the setting-users doc.

---
 docs/en/operations/settings/settings-users.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md
index 3e15d9e6dea..ee834dca98a 100644
--- a/docs/en/operations/settings/settings-users.md
+++ b/docs/en/operations/settings/settings-users.md
@@ -139,7 +139,7 @@ You can assign a quotas set for the user. For a detailed description of quotas c
 
 ### user_name/databases {#user-namedatabases}
 
-In this section, you can you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security.
+In this section, you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security.
 
 **Example**
 

From eb46f50993de7f585f00c5e5fa1ceabb0105c6a0 Mon Sep 17 00:00:00 2001
From: tavplubix <avtokmakov@yandex-team.ru>
Date: Mon, 22 Feb 2021 16:28:50 +0300
Subject: [PATCH 1236/1238] Update skip_list.json

---
 tests/queries/skip_list.json | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index 76b219799c5..39ec8bac3cf 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -111,6 +111,11 @@
         "memory_tracking",
         "memory_usage",
         "live_view",
+        "01413_alter_update_supertype",
+        "01149_zookeeper_mutation_stuck_after_replace_partition",
+        "00836_indices_alter_replicated_zookeeper",
+        "00652_mutations_alter_update",
+        "01715_tuple_insert_null_as_default",
         "00825_protobuf_format_map",
         "00152_insert_different_granularity",
         "01715_background_checker_blather_zookeeper",

From 871f15343350fd1ba4e065c6f47672b3944b3319 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 22 Feb 2021 20:44:24 +0300
Subject: [PATCH 1237/1238] Fix tests

---
 .../01691_DateTime64_clamp.reference          | 26 +++++++++----------
 .../0_stateless/01691_DateTime64_clamp.sql    | 18 ++++++-------
 2 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.reference b/tests/queries/0_stateless/01691_DateTime64_clamp.reference
index 849f8139640..3adc9a17e5c 100644
--- a/tests/queries/0_stateless/01691_DateTime64_clamp.reference
+++ b/tests/queries/0_stateless/01691_DateTime64_clamp.reference
@@ -1,19 +1,17 @@
 -- { echo }
-SELECT toDateTime(-2, 2);
+SELECT toTimeZone(toDateTime(-2, 2), 'Europe/Moscow');
 1970-01-01 03:00:00.00
-SELECT toDateTime64(-2, 2);
+SELECT toDateTime64(-2, 2, 'Europe/Moscow');
 1970-01-01 03:00:00.00
-SELECT CAST(-1 AS DateTime64);
-1970-01-01 03:00:00.000
-SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64);
-2020-01-01 00:00:00.300
-SELECT toDateTime64(bitShiftLeft(toUInt64(1),33), 2);
-2106-02-07 09:28:15.00
-SELECT toDateTime(-2., 2);
+SELECT CAST(-1 AS DateTime64(0, 'Europe/Moscow'));
+1970-01-01 03:00:00
+SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Europe/Moscow'));
+2020-01-01 00:00:00
+SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Europe/Moscow') FORMAT Null;
+SELECT toTimeZone(toDateTime(-2., 2), 'Europe/Moscow');
 1970-01-01 03:00:00.00
-SELECT toDateTime64(-2., 2);
+SELECT toDateTime64(-2., 2, 'Europe/Moscow');
 1970-01-01 03:00:00.00
-SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2);
-2106-02-07 09:28:16.00
-SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2);
-2106-02-07 09:28:15.00
+SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow');
+2106-02-07 09:00:00.00
+SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow') FORMAT Null;
diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.sql b/tests/queries/0_stateless/01691_DateTime64_clamp.sql
index f02d45a2cff..92d5a33328f 100644
--- a/tests/queries/0_stateless/01691_DateTime64_clamp.sql
+++ b/tests/queries/0_stateless/01691_DateTime64_clamp.sql
@@ -1,10 +1,10 @@
 -- { echo }
-SELECT toDateTime(-2, 2);
-SELECT toDateTime64(-2, 2);
-SELECT CAST(-1 AS DateTime64);
-SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64);
-SELECT toDateTime64(bitShiftLeft(toUInt64(1),33), 2);
-SELECT toDateTime(-2., 2);
-SELECT toDateTime64(-2., 2);
-SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2);
-SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2);
+SELECT toTimeZone(toDateTime(-2, 2), 'Europe/Moscow');
+SELECT toDateTime64(-2, 2, 'Europe/Moscow');
+SELECT CAST(-1 AS DateTime64(0, 'Europe/Moscow'));
+SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Europe/Moscow'));
+SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Europe/Moscow') FORMAT Null;
+SELECT toTimeZone(toDateTime(-2., 2), 'Europe/Moscow');
+SELECT toDateTime64(-2., 2, 'Europe/Moscow');
+SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow');
+SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow') FORMAT Null;

From 99875c23bcd7446c09c2c5e7d4e2746153e5cd91 Mon Sep 17 00:00:00 2001
From: Maksim Kita <maksim-kita@yandex-team.ru>
Date: Mon, 22 Feb 2021 21:34:23 +0300
Subject: [PATCH 1238/1238] Documentation low cardinality fix

---
 docs/ru/sql-reference/data-types/lowcardinality.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/data-types/lowcardinality.md b/docs/ru/sql-reference/data-types/lowcardinality.md
index 3b5b337d731..d94cedd29ce 100644
--- a/docs/ru/sql-reference/data-types/lowcardinality.md
+++ b/docs/ru/sql-reference/data-types/lowcardinality.md
@@ -23,7 +23,7 @@ LowCardinality(data_type)
 
 Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
 
-При работе со строками используйте `LowCardinality` вместо [Enum](enum.md). `LowCardinality` обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
+При работе со строками, использование `LowCardinality` вместо [Enum](enum.md). `LowCardinality` обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
 
 ## Пример